From 1bc5aee63eb72b341f506ad058502cd0361f0d10 Mon Sep 17 00:00:00 2001 From: Ben Cheng Date: Tue, 25 Mar 2014 22:37:19 -0700 Subject: Initial checkin of GCC 4.9.0 from trunk (r208799). Change-Id: I48a3c08bb98542aa215912a75f03c0890e497dba --- gcc-4.9/gcc/config/README | 5 + gcc-4.9/gcc/config/aarch64/aarch64-arches.def | 29 + gcc-4.9/gcc/config/aarch64/aarch64-builtins.c | 1253 + gcc-4.9/gcc/config/aarch64/aarch64-cores.def | 42 + gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h | 33 + gcc-4.9/gcc/config/aarch64/aarch64-elf.h | 161 + gcc-4.9/gcc/config/aarch64/aarch64-linux.h | 47 + gcc-4.9/gcc/config/aarch64/aarch64-modes.def | 55 + .../config/aarch64/aarch64-option-extensions.def | 38 + gcc-4.9/gcc/config/aarch64/aarch64-opts.h | 64 + gcc-4.9/gcc/config/aarch64/aarch64-protos.h | 292 + .../gcc/config/aarch64/aarch64-simd-builtins.def | 395 + gcc-4.9/gcc/config/aarch64/aarch64-simd.md | 4363 ++ gcc-4.9/gcc/config/aarch64/aarch64-tune.md | 5 + gcc-4.9/gcc/config/aarch64/aarch64.c | 8544 ++++ gcc-4.9/gcc/config/aarch64/aarch64.h | 873 + gcc-4.9/gcc/config/aarch64/aarch64.md | 3642 ++ gcc-4.9/gcc/config/aarch64/aarch64.opt | 118 + gcc-4.9/gcc/config/aarch64/arm_neon.h | 25403 ++++++++++ gcc-4.9/gcc/config/aarch64/atomics.md | 382 + gcc-4.9/gcc/config/aarch64/biarchilp32.h | 29 + gcc-4.9/gcc/config/aarch64/biarchlp64.h | 29 + gcc-4.9/gcc/config/aarch64/constraints.md | 188 + gcc-4.9/gcc/config/aarch64/gentune.sh | 32 + gcc-4.9/gcc/config/aarch64/iterators.md | 997 + gcc-4.9/gcc/config/aarch64/predicates.md | 302 + gcc-4.9/gcc/config/aarch64/t-aarch64 | 45 + gcc-4.9/gcc/config/aarch64/t-aarch64-linux | 31 + gcc-4.9/gcc/config/alpha/alpha-modes.def | 27 + gcc-4.9/gcc/config/alpha/alpha-protos.h | 117 + gcc-4.9/gcc/config/alpha/alpha.c | 9898 ++++ gcc-4.9/gcc/config/alpha/alpha.h | 1074 + gcc-4.9/gcc/config/alpha/alpha.md | 6113 +++ gcc-4.9/gcc/config/alpha/alpha.opt | 130 + gcc-4.9/gcc/config/alpha/constraints.md | 120 + gcc-4.9/gcc/config/alpha/driver-alpha.c | 99 + gcc-4.9/gcc/config/alpha/elf.h | 168 + gcc-4.9/gcc/config/alpha/elf.opt | 29 + gcc-4.9/gcc/config/alpha/ev4.md | 161 + gcc-4.9/gcc/config/alpha/ev5.md | 194 + gcc-4.9/gcc/config/alpha/ev6.md | 181 + gcc-4.9/gcc/config/alpha/freebsd.h | 68 + gcc-4.9/gcc/config/alpha/linux-elf.h | 53 + gcc-4.9/gcc/config/alpha/linux.h | 102 + gcc-4.9/gcc/config/alpha/netbsd.h | 72 + gcc-4.9/gcc/config/alpha/openbsd.h | 45 + gcc-4.9/gcc/config/alpha/predicates.md | 653 + gcc-4.9/gcc/config/alpha/sync.md | 343 + gcc-4.9/gcc/config/alpha/t-linux | 1 + gcc-4.9/gcc/config/alpha/t-vms | 21 + gcc-4.9/gcc/config/alpha/vms.h | 306 + gcc-4.9/gcc/config/alpha/x-alpha | 3 + gcc-4.9/gcc/config/arc/arc-modes.def | 37 + gcc-4.9/gcc/config/arc/arc-opts.h | 28 + gcc-4.9/gcc/config/arc/arc-protos.h | 118 + gcc-4.9/gcc/config/arc/arc-simd.h | 186 + gcc-4.9/gcc/config/arc/arc.c | 9360 ++++ gcc-4.9/gcc/config/arc/arc.h | 1696 + gcc-4.9/gcc/config/arc/arc.md | 5165 ++ gcc-4.9/gcc/config/arc/arc.opt | 390 + gcc-4.9/gcc/config/arc/arc600.md | 63 + gcc-4.9/gcc/config/arc/arc700.md | 170 + gcc-4.9/gcc/config/arc/constraints.md | 399 + gcc-4.9/gcc/config/arc/fpx.md | 674 + gcc-4.9/gcc/config/arc/predicates.md | 811 + gcc-4.9/gcc/config/arc/simdext.md | 1292 + gcc-4.9/gcc/config/arc/t-arc-newlib | 38 + gcc-4.9/gcc/config/arc/t-arc-uClibc | 20 + gcc-4.9/gcc/config/arm/README-interworking | 749 + gcc-4.9/gcc/config/arm/aarch-common-protos.h | 134 + gcc-4.9/gcc/config/arm/aarch-common.c | 353 + gcc-4.9/gcc/config/arm/aarch-cost-tables.h | 325 + gcc-4.9/gcc/config/arm/aout.h | 303 + gcc-4.9/gcc/config/arm/arm-arches.def | 60 + gcc-4.9/gcc/config/arm/arm-c.c | 44 + gcc-4.9/gcc/config/arm/arm-cores.def | 159 + gcc-4.9/gcc/config/arm/arm-fixed.md | 429 + gcc-4.9/gcc/config/arm/arm-fpus.def | 46 + gcc-4.9/gcc/config/arm/arm-generic.md | 152 + gcc-4.9/gcc/config/arm/arm-ldmstm.ml | 345 + gcc-4.9/gcc/config/arm/arm-modes.def | 84 + gcc-4.9/gcc/config/arm/arm-opts.h | 75 + gcc-4.9/gcc/config/arm/arm-protos.h | 297 + gcc-4.9/gcc/config/arm/arm-tables.opt | 439 + gcc-4.9/gcc/config/arm/arm-tune.md | 34 + gcc-4.9/gcc/config/arm/arm.c | 31119 ++++++++++++ gcc-4.9/gcc/config/arm/arm.h | 2398 + gcc-4.9/gcc/config/arm/arm.md | 12928 +++++ gcc-4.9/gcc/config/arm/arm.opt | 277 + gcc-4.9/gcc/config/arm/arm1020e.md | 385 + gcc-4.9/gcc/config/arm/arm1026ejs.md | 250 + gcc-4.9/gcc/config/arm/arm1136jfs.md | 387 + gcc-4.9/gcc/config/arm/arm926ejs.md | 198 + gcc-4.9/gcc/config/arm/arm_acle.h | 100 + gcc-4.9/gcc/config/arm/arm_neon.h | 13429 ++++++ gcc-4.9/gcc/config/arm/arm_neon_builtins.def | 212 + gcc-4.9/gcc/config/arm/bpabi.h | 163 + gcc-4.9/gcc/config/arm/coff.h | 82 + gcc-4.9/gcc/config/arm/constraints.md | 438 + gcc-4.9/gcc/config/arm/cortex-a15-neon.md | 677 + gcc-4.9/gcc/config/arm/cortex-a15.md | 186 + gcc-4.9/gcc/config/arm/cortex-a5.md | 311 + gcc-4.9/gcc/config/arm/cortex-a53.md | 309 + gcc-4.9/gcc/config/arm/cortex-a7.md | 394 + gcc-4.9/gcc/config/arm/cortex-a8-neon.md | 1534 + gcc-4.9/gcc/config/arm/cortex-a8.md | 279 + gcc-4.9/gcc/config/arm/cortex-a9-neon.md | 1471 + gcc-4.9/gcc/config/arm/cortex-a9.md | 283 + gcc-4.9/gcc/config/arm/cortex-m4-fpu.md | 117 + gcc-4.9/gcc/config/arm/cortex-m4.md | 128 + gcc-4.9/gcc/config/arm/cortex-r4.md | 299 + gcc-4.9/gcc/config/arm/cortex-r4f.md | 161 + gcc-4.9/gcc/config/arm/crypto.def | 34 + gcc-4.9/gcc/config/arm/crypto.md | 86 + gcc-4.9/gcc/config/arm/driver-arm.c | 151 + gcc-4.9/gcc/config/arm/elf.h | 159 + gcc-4.9/gcc/config/arm/fa526.md | 173 + gcc-4.9/gcc/config/arm/fa606te.md | 182 + gcc-4.9/gcc/config/arm/fa626te.md | 177 + gcc-4.9/gcc/config/arm/fa726te.md | 223 + gcc-4.9/gcc/config/arm/fmp626.md | 191 + gcc-4.9/gcc/config/arm/genopt.sh | 95 + gcc-4.9/gcc/config/arm/gentune.sh | 29 + gcc-4.9/gcc/config/arm/iterators.md | 585 + gcc-4.9/gcc/config/arm/iwmmxt.md | 1775 + gcc-4.9/gcc/config/arm/iwmmxt2.md | 903 + gcc-4.9/gcc/config/arm/ldmstm.md | 1225 + gcc-4.9/gcc/config/arm/ldrdstrd.md | 260 + gcc-4.9/gcc/config/arm/linux-eabi.h | 122 + gcc-4.9/gcc/config/arm/linux-elf.h | 115 + gcc-4.9/gcc/config/arm/linux-gas.h | 55 + gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md | 189 + gcc-4.9/gcc/config/arm/marvell-pj4.md | 232 + gcc-4.9/gcc/config/arm/mmintrin.h | 1836 + gcc-4.9/gcc/config/arm/neon-docgen.ml | 424 + gcc-4.9/gcc/config/arm/neon-gen.ml | 520 + gcc-4.9/gcc/config/arm/neon-testgen.ml | 305 + gcc-4.9/gcc/config/arm/neon.md | 5808 +++ gcc-4.9/gcc/config/arm/neon.ml | 2355 + gcc-4.9/gcc/config/arm/netbsd-elf.h | 154 + gcc-4.9/gcc/config/arm/predicates.md | 677 + gcc-4.9/gcc/config/arm/rtems-eabi.h | 29 + gcc-4.9/gcc/config/arm/semi.h | 68 + gcc-4.9/gcc/config/arm/symbian.h | 101 + gcc-4.9/gcc/config/arm/sync.md | 472 + gcc-4.9/gcc/config/arm/t-aprofile | 178 + gcc-4.9/gcc/config/arm/t-arm | 100 + gcc-4.9/gcc/config/arm/t-arm-elf | 90 + gcc-4.9/gcc/config/arm/t-bpabi | 1 + gcc-4.9/gcc/config/arm/t-linux-androideabi | 10 + gcc-4.9/gcc/config/arm/t-linux-eabi | 31 + gcc-4.9/gcc/config/arm/t-rtems-eabi | 47 + gcc-4.9/gcc/config/arm/t-symbian | 26 + gcc-4.9/gcc/config/arm/t-vxworks | 24 + gcc-4.9/gcc/config/arm/thumb2.md | 1495 + gcc-4.9/gcc/config/arm/types.md | 1077 + gcc-4.9/gcc/config/arm/uclinux-eabi.h | 67 + gcc-4.9/gcc/config/arm/uclinux-elf.h | 84 + gcc-4.9/gcc/config/arm/unknown-elf.h | 96 + gcc-4.9/gcc/config/arm/unspecs.md | 305 + gcc-4.9/gcc/config/arm/vec-common.md | 136 + gcc-4.9/gcc/config/arm/vfp.md | 1330 + gcc-4.9/gcc/config/arm/vfp11.md | 93 + gcc-4.9/gcc/config/arm/vxworks.h | 109 + gcc-4.9/gcc/config/arm/vxworks.opt | 59 + gcc-4.9/gcc/config/arm/x-arm | 3 + gcc-4.9/gcc/config/avr/avr-arch.h | 156 + gcc-4.9/gcc/config/avr/avr-c.c | 402 + gcc-4.9/gcc/config/avr/avr-devices.c | 114 + gcc-4.9/gcc/config/avr/avr-dimode.md | 479 + gcc-4.9/gcc/config/avr/avr-fixed.md | 497 + gcc-4.9/gcc/config/avr/avr-log.c | 351 + gcc-4.9/gcc/config/avr/avr-mcus.def | 323 + gcc-4.9/gcc/config/avr/avr-modes.def | 33 + gcc-4.9/gcc/config/avr/avr-protos.h | 164 + gcc-4.9/gcc/config/avr/avr-stdint.h | 66 + gcc-4.9/gcc/config/avr/avr-tables.opt | 766 + gcc-4.9/gcc/config/avr/avr.c | 12522 +++++ gcc-4.9/gcc/config/avr/avr.h | 606 + gcc-4.9/gcc/config/avr/avr.md | 6358 +++ gcc-4.9/gcc/config/avr/avr.opt | 84 + gcc-4.9/gcc/config/avr/avrlibc.h | 30 + gcc-4.9/gcc/config/avr/builtins.def | 169 + gcc-4.9/gcc/config/avr/constraints.md | 238 + gcc-4.9/gcc/config/avr/driver-avr.c | 150 + gcc-4.9/gcc/config/avr/elf.h | 41 + gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c | 144 + gcc-4.9/gcc/config/avr/genmultilib.awk | 216 + gcc-4.9/gcc/config/avr/genopt.sh | 59 + gcc-4.9/gcc/config/avr/predicates.md | 275 + gcc-4.9/gcc/config/avr/rtems.h | 27 + gcc-4.9/gcc/config/avr/stdfix.h | 236 + gcc-4.9/gcc/config/avr/t-avr | 83 + gcc-4.9/gcc/config/avr/t-multilib | 269 + gcc-4.9/gcc/config/avr/t-rtems | 3 + gcc-4.9/gcc/config/bfin/bfin-modes.def | 28 + gcc-4.9/gcc/config/bfin/bfin-opts.h | 59 + gcc-4.9/gcc/config/bfin/bfin-protos.h | 117 + gcc-4.9/gcc/config/bfin/bfin.c | 5834 +++ gcc-4.9/gcc/config/bfin/bfin.h | 1156 + gcc-4.9/gcc/config/bfin/bfin.md | 4202 ++ gcc-4.9/gcc/config/bfin/bfin.opt | 118 + gcc-4.9/gcc/config/bfin/constraints.md | 225 + gcc-4.9/gcc/config/bfin/elf.h | 74 + gcc-4.9/gcc/config/bfin/linux.h | 52 + gcc-4.9/gcc/config/bfin/predicates.md | 249 + gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh | 81 + gcc-4.9/gcc/config/bfin/rtems.h | 28 + gcc-4.9/gcc/config/bfin/sync.md | 178 + gcc-4.9/gcc/config/bfin/t-bfin-elf | 49 + gcc-4.9/gcc/config/bfin/t-bfin-linux | 52 + gcc-4.9/gcc/config/bfin/t-bfin-uclinux | 48 + gcc-4.9/gcc/config/bfin/t-rtems | 6 + gcc-4.9/gcc/config/bfin/uclinux.h | 38 + gcc-4.9/gcc/config/c6x/c6x-isas.def | 37 + gcc-4.9/gcc/config/c6x/c6x-modes.def | 24 + gcc-4.9/gcc/config/c6x/c6x-mult.md | 844 + gcc-4.9/gcc/config/c6x/c6x-mult.md.in | 421 + gcc-4.9/gcc/config/c6x/c6x-opts.h | 35 + gcc-4.9/gcc/config/c6x/c6x-protos.h | 65 + gcc-4.9/gcc/config/c6x/c6x-sched.md | 934 + gcc-4.9/gcc/config/c6x/c6x-sched.md.in | 230 + gcc-4.9/gcc/config/c6x/c6x-tables.opt | 43 + gcc-4.9/gcc/config/c6x/c6x.c | 6846 +++ gcc-4.9/gcc/config/c6x/c6x.h | 618 + gcc-4.9/gcc/config/c6x/c6x.md | 3136 ++ gcc-4.9/gcc/config/c6x/c6x.opt | 67 + gcc-4.9/gcc/config/c6x/c6x_intrinsics.h | 194 + gcc-4.9/gcc/config/c6x/constraints.md | 174 + gcc-4.9/gcc/config/c6x/elf-common.h | 37 + gcc-4.9/gcc/config/c6x/elf.h | 35 + gcc-4.9/gcc/config/c6x/genmult.sh | 33 + gcc-4.9/gcc/config/c6x/genopt.sh | 59 + gcc-4.9/gcc/config/c6x/gensched.sh | 44 + gcc-4.9/gcc/config/c6x/predicates.md | 226 + gcc-4.9/gcc/config/c6x/sync.md | 270 + gcc-4.9/gcc/config/c6x/t-c6x | 42 + gcc-4.9/gcc/config/c6x/t-c6x-elf | 30 + gcc-4.9/gcc/config/c6x/t-c6x-uclinux | 3 + gcc-4.9/gcc/config/c6x/uclinux-elf.h | 63 + gcc-4.9/gcc/config/cr16/constraints.md | 81 + gcc-4.9/gcc/config/cr16/cr16-protos.h | 99 + gcc-4.9/gcc/config/cr16/cr16.c | 2194 + gcc-4.9/gcc/config/cr16/cr16.h | 586 + gcc-4.9/gcc/config/cr16/cr16.md | 1084 + gcc-4.9/gcc/config/cr16/cr16.opt | 51 + gcc-4.9/gcc/config/cr16/predicates.md | 225 + gcc-4.9/gcc/config/cr16/t-cr16 | 25 + gcc-4.9/gcc/config/cris/constraints.md | 164 + gcc-4.9/gcc/config/cris/cris-protos.h | 67 + gcc-4.9/gcc/config/cris/cris.c | 4359 ++ gcc-4.9/gcc/config/cris/cris.h | 1081 + gcc-4.9/gcc/config/cris/cris.md | 5157 ++ gcc-4.9/gcc/config/cris/cris.opt | 202 + gcc-4.9/gcc/config/cris/elf.opt | 25 + gcc-4.9/gcc/config/cris/linux.h | 150 + gcc-4.9/gcc/config/cris/linux.opt | 33 + gcc-4.9/gcc/config/cris/predicates.md | 178 + gcc-4.9/gcc/config/cris/sync.md | 314 + gcc-4.9/gcc/config/cris/t-cris | 29 + gcc-4.9/gcc/config/cris/t-elfmulti | 31 + gcc-4.9/gcc/config/cris/t-linux | 5 + gcc-4.9/gcc/config/darwin-c.c | 775 + gcc-4.9/gcc/config/darwin-driver.c | 224 + gcc-4.9/gcc/config/darwin-f.c | 60 + gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def | 113 + gcc-4.9/gcc/config/darwin-protos.h | 127 + gcc-4.9/gcc/config/darwin-sections.def | 195 + gcc-4.9/gcc/config/darwin.c | 3663 ++ gcc-4.9/gcc/config/darwin.h | 921 + gcc-4.9/gcc/config/darwin.opt | 393 + gcc-4.9/gcc/config/darwin10.h | 34 + gcc-4.9/gcc/config/darwin9.h | 65 + gcc-4.9/gcc/config/dbx.h | 27 + gcc-4.9/gcc/config/dbxcoff.h | 62 + gcc-4.9/gcc/config/dbxelf.h | 68 + gcc-4.9/gcc/config/default-c.c | 33 + gcc-4.9/gcc/config/elfos.h | 438 + gcc-4.9/gcc/config/epiphany/constraints.md | 130 + gcc-4.9/gcc/config/epiphany/epiphany-modes.def | 40 + gcc-4.9/gcc/config/epiphany/epiphany-protos.h | 64 + gcc-4.9/gcc/config/epiphany/epiphany-sched.md | 145 + gcc-4.9/gcc/config/epiphany/epiphany.c | 2938 ++ gcc-4.9/gcc/config/epiphany/epiphany.h | 945 + gcc-4.9/gcc/config/epiphany/epiphany.md | 2812 ++ gcc-4.9/gcc/config/epiphany/epiphany.opt | 148 + gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h | 27 + gcc-4.9/gcc/config/epiphany/mode-switch-use.c | 109 + gcc-4.9/gcc/config/epiphany/predicates.md | 368 + gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c | 200 + gcc-4.9/gcc/config/epiphany/t-epiphany | 38 + gcc-4.9/gcc/config/flat.h | 22 + gcc-4.9/gcc/config/fr30/constraints.md | 71 + gcc-4.9/gcc/config/fr30/fr30-protos.h | 32 + gcc-4.9/gcc/config/fr30/fr30.c | 1062 + gcc-4.9/gcc/config/fr30/fr30.h | 845 + gcc-4.9/gcc/config/fr30/fr30.md | 1267 + gcc-4.9/gcc/config/fr30/fr30.opt | 27 + gcc-4.9/gcc/config/fr30/predicates.md | 123 + gcc-4.9/gcc/config/freebsd-nthr.h | 21 + gcc-4.9/gcc/config/freebsd-spec.h | 138 + gcc-4.9/gcc/config/freebsd-stdint.h | 56 + gcc-4.9/gcc/config/freebsd.h | 93 + gcc-4.9/gcc/config/freebsd.opt | 64 + gcc-4.9/gcc/config/frv/constraints.md | 174 + gcc-4.9/gcc/config/frv/frv-asm.h | 48 + gcc-4.9/gcc/config/frv/frv-modes.def | 34 + gcc-4.9/gcc/config/frv/frv-opts.h | 37 + gcc-4.9/gcc/config/frv/frv-protos.h | 178 + gcc-4.9/gcc/config/frv/frv.c | 9655 ++++ gcc-4.9/gcc/config/frv/frv.h | 2133 + gcc-4.9/gcc/config/frv/frv.md | 8021 ++++ gcc-4.9/gcc/config/frv/frv.opt | 237 + gcc-4.9/gcc/config/frv/linux.h | 74 + gcc-4.9/gcc/config/frv/predicates.md | 1543 + gcc-4.9/gcc/config/frv/t-frv | 36 + gcc-4.9/gcc/config/frv/t-linux | 24 + gcc-4.9/gcc/config/fused-madd.opt | 24 + gcc-4.9/gcc/config/g.opt | 29 + gcc-4.9/gcc/config/glibc-c.c | 37 + gcc-4.9/gcc/config/glibc-stdint.h | 55 + gcc-4.9/gcc/config/gnu-user.h | 128 + gcc-4.9/gcc/config/gnu-user.opt | 38 + gcc-4.9/gcc/config/gnu.h | 41 + gcc-4.9/gcc/config/h8300/constraints.md | 214 + gcc-4.9/gcc/config/h8300/elf.h | 44 + gcc-4.9/gcc/config/h8300/genmova.sh | 198 + gcc-4.9/gcc/config/h8300/h8300-protos.h | 118 + gcc-4.9/gcc/config/h8300/h8300.c | 6102 +++ gcc-4.9/gcc/config/h8300/h8300.h | 789 + gcc-4.9/gcc/config/h8300/h8300.md | 6322 +++ gcc-4.9/gcc/config/h8300/h8300.opt | 71 + gcc-4.9/gcc/config/h8300/mova.md | 858 + gcc-4.9/gcc/config/h8300/predicates.md | 504 + gcc-4.9/gcc/config/h8300/rtems.h | 29 + gcc-4.9/gcc/config/h8300/t-h8300 | 29 + gcc-4.9/gcc/config/h8300/t-rtems | 7 + gcc-4.9/gcc/config/host-darwin.c | 77 + gcc-4.9/gcc/config/host-darwin.h | 27 + gcc-4.9/gcc/config/host-hpux.c | 129 + gcc-4.9/gcc/config/host-linux.c | 228 + gcc-4.9/gcc/config/host-openbsd.c | 85 + gcc-4.9/gcc/config/host-solaris.c | 125 + gcc-4.9/gcc/config/hpux-stdint.h | 34 + gcc-4.9/gcc/config/hpux11.opt | 32 + gcc-4.9/gcc/config/i386/adxintrin.h | 49 + gcc-4.9/gcc/config/i386/ammintrin.h | 93 + gcc-4.9/gcc/config/i386/athlon.md | 1186 + gcc-4.9/gcc/config/i386/atom.md | 794 + gcc-4.9/gcc/config/i386/att.h | 91 + gcc-4.9/gcc/config/i386/avx2intrin.h | 1889 + gcc-4.9/gcc/config/i386/avx512cdintrin.h | 184 + gcc-4.9/gcc/config/i386/avx512erintrin.h | 394 + gcc-4.9/gcc/config/i386/avx512fintrin.h | 12832 +++++ gcc-4.9/gcc/config/i386/avx512pfintrin.h | 212 + gcc-4.9/gcc/config/i386/avxintrin.h | 1463 + gcc-4.9/gcc/config/i386/avxmath.h | 28 + gcc-4.9/gcc/config/i386/bdver1.md | 800 + gcc-4.9/gcc/config/i386/bdver3.md | 748 + gcc-4.9/gcc/config/i386/biarch64.h | 29 + gcc-4.9/gcc/config/i386/biarchx32.h | 28 + gcc-4.9/gcc/config/i386/bmi2intrin.h | 109 + gcc-4.9/gcc/config/i386/bmiintrin.h | 138 + gcc-4.9/gcc/config/i386/bmmintrin.h | 29 + gcc-4.9/gcc/config/i386/bsd.h | 99 + gcc-4.9/gcc/config/i386/btver2.md | 1391 + gcc-4.9/gcc/config/i386/constraints.md | 246 + gcc-4.9/gcc/config/i386/core2.md | 691 + gcc-4.9/gcc/config/i386/cpuid.h | 277 + gcc-4.9/gcc/config/i386/cross-stdarg.h | 72 + gcc-4.9/gcc/config/i386/crtdll.h | 42 + gcc-4.9/gcc/config/i386/cygming.h | 487 + gcc-4.9/gcc/config/i386/cygming.opt | 60 + gcc-4.9/gcc/config/i386/cygwin-stdint.h | 94 + gcc-4.9/gcc/config/i386/cygwin-w64.h | 83 + gcc-4.9/gcc/config/i386/cygwin.h | 146 + gcc-4.9/gcc/config/i386/darwin.h | 313 + gcc-4.9/gcc/config/i386/darwin64.h | 32 + gcc-4.9/gcc/config/i386/djgpp-stdint.h | 62 + gcc-4.9/gcc/config/i386/djgpp.h | 178 + gcc-4.9/gcc/config/i386/djgpp.opt | 28 + gcc-4.9/gcc/config/i386/driver-i386.c | 913 + gcc-4.9/gcc/config/i386/emmintrin.h | 1541 + gcc-4.9/gcc/config/i386/f16cintrin.h | 98 + gcc-4.9/gcc/config/i386/fma4intrin.h | 241 + gcc-4.9/gcc/config/i386/fmaintrin.h | 302 + gcc-4.9/gcc/config/i386/freebsd.h | 149 + gcc-4.9/gcc/config/i386/freebsd64.h | 44 + gcc-4.9/gcc/config/i386/fxsrintrin.h | 73 + gcc-4.9/gcc/config/i386/gas.h | 124 + gcc-4.9/gcc/config/i386/geode.md | 151 + gcc-4.9/gcc/config/i386/gmm_malloc.h | 74 + gcc-4.9/gcc/config/i386/gnu-user-common.h | 72 + gcc-4.9/gcc/config/i386/gnu-user.h | 164 + gcc-4.9/gcc/config/i386/gnu-user64.h | 99 + gcc-4.9/gcc/config/i386/gnu.h | 47 + gcc-4.9/gcc/config/i386/gstabs.h | 7 + gcc-4.9/gcc/config/i386/host-cygwin.c | 78 + gcc-4.9/gcc/config/i386/host-i386-darwin.c | 30 + gcc-4.9/gcc/config/i386/host-mingw32.c | 198 + gcc-4.9/gcc/config/i386/i386-builtin-types.awk | 280 + gcc-4.9/gcc/config/i386/i386-builtin-types.def | 808 + gcc-4.9/gcc/config/i386/i386-c.c | 546 + gcc-4.9/gcc/config/i386/i386-interix.h | 346 + gcc-4.9/gcc/config/i386/i386-modes.def | 99 + gcc-4.9/gcc/config/i386/i386-opts.h | 96 + gcc-4.9/gcc/config/i386/i386-protos.h | 332 + gcc-4.9/gcc/config/i386/i386.c | 47138 +++++++++++++++++++ gcc-4.9/gcc/config/i386/i386.h | 2552 + gcc-4.9/gcc/config/i386/i386.md | 18044 +++++++ gcc-4.9/gcc/config/i386/i386.opt | 796 + gcc-4.9/gcc/config/i386/i386elf.h | 103 + gcc-4.9/gcc/config/i386/ia32intrin.h | 293 + gcc-4.9/gcc/config/i386/immintrin.h | 177 + gcc-4.9/gcc/config/i386/interix.opt | 34 + gcc-4.9/gcc/config/i386/k6.md | 266 + gcc-4.9/gcc/config/i386/kfreebsd-gnu.h | 22 + gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h | 27 + gcc-4.9/gcc/config/i386/knetbsd-gnu.h | 21 + gcc-4.9/gcc/config/i386/kopensolaris-gnu.h | 21 + gcc-4.9/gcc/config/i386/linux-common.h | 55 + gcc-4.9/gcc/config/i386/linux.h | 23 + gcc-4.9/gcc/config/i386/linux64.h | 32 + gcc-4.9/gcc/config/i386/lwpintrin.h | 105 + gcc-4.9/gcc/config/i386/lynx.h | 87 + gcc-4.9/gcc/config/i386/lzcntintrin.h | 75 + gcc-4.9/gcc/config/i386/mingw-pthread.h | 21 + gcc-4.9/gcc/config/i386/mingw-stdint.h | 50 + gcc-4.9/gcc/config/i386/mingw-w64.h | 86 + gcc-4.9/gcc/config/i386/mingw-w64.opt | 25 + gcc-4.9/gcc/config/i386/mingw.opt | 35 + gcc-4.9/gcc/config/i386/mingw32.h | 248 + gcc-4.9/gcc/config/i386/mm3dnow.h | 218 + gcc-4.9/gcc/config/i386/mmintrin.h | 942 + gcc-4.9/gcc/config/i386/mmx.md | 1613 + gcc-4.9/gcc/config/i386/msformat-c.c | 195 + gcc-4.9/gcc/config/i386/netbsd-elf.h | 121 + gcc-4.9/gcc/config/i386/netbsd64.h | 69 + gcc-4.9/gcc/config/i386/nmmintrin.h | 33 + gcc-4.9/gcc/config/i386/nto.h | 105 + gcc-4.9/gcc/config/i386/nto.opt | 32 + gcc-4.9/gcc/config/i386/openbsd.h | 101 + gcc-4.9/gcc/config/i386/openbsdelf.h | 119 + gcc-4.9/gcc/config/i386/pentium.md | 306 + gcc-4.9/gcc/config/i386/pmm_malloc.h | 57 + gcc-4.9/gcc/config/i386/pmmintrin.h | 132 + gcc-4.9/gcc/config/i386/popcntintrin.h | 53 + gcc-4.9/gcc/config/i386/ppro.md | 758 + gcc-4.9/gcc/config/i386/predicates.md | 1424 + gcc-4.9/gcc/config/i386/prfchwintrin.h | 37 + gcc-4.9/gcc/config/i386/rdos.h | 39 + gcc-4.9/gcc/config/i386/rdos64.h | 24 + gcc-4.9/gcc/config/i386/rdseedintrin.h | 66 + gcc-4.9/gcc/config/i386/rtemself.h | 40 + gcc-4.9/gcc/config/i386/rtmintrin.h | 84 + gcc-4.9/gcc/config/i386/shaintrin.h | 98 + gcc-4.9/gcc/config/i386/slm.md | 758 + gcc-4.9/gcc/config/i386/smmintrin.h | 862 + gcc-4.9/gcc/config/i386/sol2-9.h | 23 + gcc-4.9/gcc/config/i386/sol2-bi.h | 109 + gcc-4.9/gcc/config/i386/sol2.h | 189 + gcc-4.9/gcc/config/i386/sse.md | 15507 ++++++ gcc-4.9/gcc/config/i386/ssemath.h | 28 + gcc-4.9/gcc/config/i386/stringop.def | 37 + gcc-4.9/gcc/config/i386/stringop.opt | 31 + gcc-4.9/gcc/config/i386/subst.md | 198 + gcc-4.9/gcc/config/i386/sync.md | 606 + gcc-4.9/gcc/config/i386/sysv4.h | 72 + gcc-4.9/gcc/config/i386/t-cygming | 48 + gcc-4.9/gcc/config/i386/t-cygwin-w64 | 3 + gcc-4.9/gcc/config/i386/t-darwin | 2 + gcc-4.9/gcc/config/i386/t-darwin64 | 2 + gcc-4.9/gcc/config/i386/t-gmm_malloc | 6 + gcc-4.9/gcc/config/i386/t-gnu | 1 + gcc-4.9/gcc/config/i386/t-i386 | 31 + gcc-4.9/gcc/config/i386/t-interix | 30 + gcc-4.9/gcc/config/i386/t-kfreebsd | 5 + gcc-4.9/gcc/config/i386/t-linux | 1 + gcc-4.9/gcc/config/i386/t-linux64 | 38 + gcc-4.9/gcc/config/i386/t-mingw-w32 | 3 + gcc-4.9/gcc/config/i386/t-mingw-w64 | 3 + gcc-4.9/gcc/config/i386/t-openbsd | 4 + gcc-4.9/gcc/config/i386/t-pmm_malloc | 6 + gcc-4.9/gcc/config/i386/t-rtems | 26 + gcc-4.9/gcc/config/i386/t-sol2-64 | 21 + gcc-4.9/gcc/config/i386/t-vxworks | 8 + gcc-4.9/gcc/config/i386/t-vxworksae | 5 + gcc-4.9/gcc/config/i386/tbmintrin.h | 180 + gcc-4.9/gcc/config/i386/tmmintrin.h | 249 + gcc-4.9/gcc/config/i386/unix.h | 80 + gcc-4.9/gcc/config/i386/vx-common.h | 33 + gcc-4.9/gcc/config/i386/vxworks.h | 73 + gcc-4.9/gcc/config/i386/vxworksae.h | 35 + gcc-4.9/gcc/config/i386/winnt-cxx.c | 184 + gcc-4.9/gcc/config/i386/winnt-stubs.c | 51 + gcc-4.9/gcc/config/i386/winnt.c | 1304 + gcc-4.9/gcc/config/i386/wmmintrin.h | 132 + gcc-4.9/gcc/config/i386/x-cygwin | 4 + gcc-4.9/gcc/config/i386/x-darwin | 3 + gcc-4.9/gcc/config/i386/x-i386 | 3 + gcc-4.9/gcc/config/i386/x-mingw32 | 31 + gcc-4.9/gcc/config/i386/x86-64.h | 108 + gcc-4.9/gcc/config/i386/x86-tune.def | 525 + gcc-4.9/gcc/config/i386/x86intrin.h | 78 + gcc-4.9/gcc/config/i386/xm-cygwin.h | 21 + gcc-4.9/gcc/config/i386/xm-djgpp.h | 83 + gcc-4.9/gcc/config/i386/xm-mingw32.h | 40 + gcc-4.9/gcc/config/i386/xmmintrin.h | 1265 + gcc-4.9/gcc/config/i386/xopintrin.h | 844 + gcc-4.9/gcc/config/i386/xsaveintrin.h | 72 + gcc-4.9/gcc/config/i386/xsaveoptintrin.h | 58 + gcc-4.9/gcc/config/i386/xtestintrin.h | 51 + gcc-4.9/gcc/config/ia64/constraints.md | 154 + gcc-4.9/gcc/config/ia64/div.md | 1221 + gcc-4.9/gcc/config/ia64/elf.h | 68 + gcc-4.9/gcc/config/ia64/freebsd.h | 52 + gcc-4.9/gcc/config/ia64/hpux-unix2003.h | 8 + gcc-4.9/gcc/config/ia64/hpux.h | 234 + gcc-4.9/gcc/config/ia64/ia64-c.c | 191 + gcc-4.9/gcc/config/ia64/ia64-modes.def | 86 + gcc-4.9/gcc/config/ia64/ia64-opts.h | 34 + gcc-4.9/gcc/config/ia64/ia64-protos.h | 104 + gcc-4.9/gcc/config/ia64/ia64.c | 11762 +++++ gcc-4.9/gcc/config/ia64/ia64.h | 1724 + gcc-4.9/gcc/config/ia64/ia64.md | 5242 +++ gcc-4.9/gcc/config/ia64/ia64.opt | 198 + gcc-4.9/gcc/config/ia64/ia64intrin.h | 2 + gcc-4.9/gcc/config/ia64/ilp32.opt | 7 + gcc-4.9/gcc/config/ia64/itanium2.md | 1867 + gcc-4.9/gcc/config/ia64/linux.h | 90 + gcc-4.9/gcc/config/ia64/predicates.md | 636 + gcc-4.9/gcc/config/ia64/sync.md | 330 + gcc-4.9/gcc/config/ia64/sysv4.h | 144 + gcc-4.9/gcc/config/ia64/t-hpux | 28 + gcc-4.9/gcc/config/ia64/t-ia64 | 28 + gcc-4.9/gcc/config/ia64/t-linux | 1 + gcc-4.9/gcc/config/ia64/vect.md | 1569 + gcc-4.9/gcc/config/ia64/vms.h | 156 + gcc-4.9/gcc/config/ia64/vms.opt | 29 + gcc-4.9/gcc/config/initfini-array.h | 40 + gcc-4.9/gcc/config/iq2000/abi | 239 + gcc-4.9/gcc/config/iq2000/constraints.md | 79 + gcc-4.9/gcc/config/iq2000/iq2000-opts.h | 32 + gcc-4.9/gcc/config/iq2000/iq2000-protos.h | 48 + gcc-4.9/gcc/config/iq2000/iq2000.c | 3474 ++ gcc-4.9/gcc/config/iq2000/iq2000.h | 871 + gcc-4.9/gcc/config/iq2000/iq2000.md | 2179 + gcc-4.9/gcc/config/iq2000/iq2000.opt | 74 + gcc-4.9/gcc/config/iq2000/predicates.md | 240 + gcc-4.9/gcc/config/kfreebsd-gnu.h | 35 + gcc-4.9/gcc/config/knetbsd-gnu.h | 35 + gcc-4.9/gcc/config/kopensolaris-gnu.h | 34 + gcc-4.9/gcc/config/linux-android.h | 59 + gcc-4.9/gcc/config/linux-android.opt | 30 + gcc-4.9/gcc/config/linux-protos.h | 22 + gcc-4.9/gcc/config/linux.c | 46 + gcc-4.9/gcc/config/linux.h | 129 + gcc-4.9/gcc/config/linux.opt | 32 + gcc-4.9/gcc/config/lm32/constraints.md | 57 + gcc-4.9/gcc/config/lm32/lm32-protos.h | 38 + gcc-4.9/gcc/config/lm32/lm32.c | 1227 + gcc-4.9/gcc/config/lm32/lm32.h | 545 + gcc-4.9/gcc/config/lm32/lm32.md | 1015 + gcc-4.9/gcc/config/lm32/lm32.opt | 40 + gcc-4.9/gcc/config/lm32/predicates.md | 72 + gcc-4.9/gcc/config/lm32/rtems.h | 32 + gcc-4.9/gcc/config/lm32/t-lm32 | 2 + gcc-4.9/gcc/config/lm32/t-rtems | 21 + gcc-4.9/gcc/config/lm32/uclinux-elf.h | 78 + gcc-4.9/gcc/config/lynx.h | 176 + gcc-4.9/gcc/config/lynx.opt | 31 + gcc-4.9/gcc/config/m32c/addsub.md | 259 + gcc-4.9/gcc/config/m32c/bitops.md | 421 + gcc-4.9/gcc/config/m32c/blkmov.md | 241 + gcc-4.9/gcc/config/m32c/cond.md | 309 + gcc-4.9/gcc/config/m32c/constraints.md | 225 + gcc-4.9/gcc/config/m32c/jump.md | 134 + gcc-4.9/gcc/config/m32c/m32c-modes.def | 28 + gcc-4.9/gcc/config/m32c/m32c-pragma.c | 129 + gcc-4.9/gcc/config/m32c/m32c-protos.h | 84 + gcc-4.9/gcc/config/m32c/m32c.abi | 131 + gcc-4.9/gcc/config/m32c/m32c.c | 4544 ++ gcc-4.9/gcc/config/m32c/m32c.h | 647 + gcc-4.9/gcc/config/m32c/m32c.md | 79 + gcc-4.9/gcc/config/m32c/m32c.opt | 43 + gcc-4.9/gcc/config/m32c/minmax.md | 57 + gcc-4.9/gcc/config/m32c/mov.md | 490 + gcc-4.9/gcc/config/m32c/muldiv.md | 287 + gcc-4.9/gcc/config/m32c/predicates.md | 294 + gcc-4.9/gcc/config/m32c/prologue.md | 201 + gcc-4.9/gcc/config/m32c/rtems.h | 33 + gcc-4.9/gcc/config/m32c/shift.md | 351 + gcc-4.9/gcc/config/m32c/t-m32c | 42 + gcc-4.9/gcc/config/m32r/constraints.md | 147 + gcc-4.9/gcc/config/m32r/linux.h | 91 + gcc-4.9/gcc/config/m32r/little.h | 20 + gcc-4.9/gcc/config/m32r/m32r-opts.h | 108 + gcc-4.9/gcc/config/m32r/m32r-protos.h | 62 + gcc-4.9/gcc/config/m32r/m32r.c | 2921 ++ gcc-4.9/gcc/config/m32r/m32r.h | 1051 + gcc-4.9/gcc/config/m32r/m32r.md | 2276 + gcc-4.9/gcc/config/m32r/m32r.opt | 117 + gcc-4.9/gcc/config/m32r/predicates.md | 440 + gcc-4.9/gcc/config/m32r/rtems.h | 33 + gcc-4.9/gcc/config/m32r/t-linux | 20 + gcc-4.9/gcc/config/m32r/t-m32r | 31 + gcc-4.9/gcc/config/m68k/cf.md | 2250 + gcc-4.9/gcc/config/m68k/constraints.md | 165 + gcc-4.9/gcc/config/m68k/genopt.sh | 92 + gcc-4.9/gcc/config/m68k/ieee.opt | 24 + gcc-4.9/gcc/config/m68k/linux.h | 241 + gcc-4.9/gcc/config/m68k/m68020-elf.h | 30 + gcc-4.9/gcc/config/m68k/m68k-devices.def | 195 + gcc-4.9/gcc/config/m68k/m68k-isas.def | 41 + gcc-4.9/gcc/config/m68k/m68k-microarchs.def | 44 + gcc-4.9/gcc/config/m68k/m68k-modes.def | 25 + gcc-4.9/gcc/config/m68k/m68k-none.h | 19 + gcc-4.9/gcc/config/m68k/m68k-opts.h | 44 + gcc-4.9/gcc/config/m68k/m68k-protos.h | 101 + gcc-4.9/gcc/config/m68k/m68k-tables.opt | 445 + gcc-4.9/gcc/config/m68k/m68k.c | 6530 +++ gcc-4.9/gcc/config/m68k/m68k.h | 980 + gcc-4.9/gcc/config/m68k/m68k.md | 7585 +++ gcc-4.9/gcc/config/m68k/m68k.opt | 195 + gcc-4.9/gcc/config/m68k/m68kelf.h | 148 + gcc-4.9/gcc/config/m68k/m68kemb.h | 52 + gcc-4.9/gcc/config/m68k/math-68881.h | 529 + gcc-4.9/gcc/config/m68k/netbsd-elf.h | 297 + gcc-4.9/gcc/config/m68k/openbsd.h | 90 + gcc-4.9/gcc/config/m68k/predicates.md | 246 + gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh | 81 + gcc-4.9/gcc/config/m68k/rtemself.h | 33 + gcc-4.9/gcc/config/m68k/sync.md | 80 + gcc-4.9/gcc/config/m68k/t-cf | 7 + gcc-4.9/gcc/config/m68k/t-linux | 31 + gcc-4.9/gcc/config/m68k/t-m68k | 4 + gcc-4.9/gcc/config/m68k/t-m68kbare | 4 + gcc-4.9/gcc/config/m68k/t-mlibs | 94 + gcc-4.9/gcc/config/m68k/t-openbsd | 4 + gcc-4.9/gcc/config/m68k/t-opts | 5 + gcc-4.9/gcc/config/m68k/t-rtems | 9 + gcc-4.9/gcc/config/m68k/t-uclinux | 33 + gcc-4.9/gcc/config/m68k/uclinux.h | 69 + gcc-4.9/gcc/config/m68k/uclinux.opt | 35 + gcc-4.9/gcc/config/mcore/constraints.md | 111 + gcc-4.9/gcc/config/mcore/mcore-elf.h | 125 + gcc-4.9/gcc/config/mcore/mcore-protos.h | 68 + gcc-4.9/gcc/config/mcore/mcore.c | 3184 ++ gcc-4.9/gcc/config/mcore/mcore.h | 839 + gcc-4.9/gcc/config/mcore/mcore.md | 3057 ++ gcc-4.9/gcc/config/mcore/mcore.opt | 75 + gcc-4.9/gcc/config/mcore/predicates.md | 338 + gcc-4.9/gcc/config/mcore/t-mcore | 29 + gcc-4.9/gcc/config/mep/constraints.md | 162 + gcc-4.9/gcc/config/mep/default.h | 10 + gcc-4.9/gcc/config/mep/intrinsics.h | 620 + gcc-4.9/gcc/config/mep/intrinsics.md | 21568 +++++++++ gcc-4.9/gcc/config/mep/ivc2-template.h | 9 + gcc-4.9/gcc/config/mep/mep-c5.cpu | 277 + gcc-4.9/gcc/config/mep/mep-core.cpu | 3080 ++ gcc-4.9/gcc/config/mep/mep-default.cpu | 25 + gcc-4.9/gcc/config/mep/mep-ext-cop.cpu | 23 + gcc-4.9/gcc/config/mep/mep-intrin.h | 8933 ++++ gcc-4.9/gcc/config/mep/mep-ivc2.cpu | 9775 ++++ gcc-4.9/gcc/config/mep/mep-pragma.c | 404 + gcc-4.9/gcc/config/mep/mep-protos.h | 128 + gcc-4.9/gcc/config/mep/mep.c | 7303 +++ gcc-4.9/gcc/config/mep/mep.cpu | 21 + gcc-4.9/gcc/config/mep/mep.h | 793 + gcc-4.9/gcc/config/mep/mep.md | 2256 + gcc-4.9/gcc/config/mep/mep.opt | 164 + gcc-4.9/gcc/config/mep/predicates.md | 184 + gcc-4.9/gcc/config/mep/t-mep | 68 + gcc-4.9/gcc/config/microblaze/constraints.md | 77 + gcc-4.9/gcc/config/microblaze/linux.h | 45 + gcc-4.9/gcc/config/microblaze/microblaze-c.c | 105 + gcc-4.9/gcc/config/microblaze/microblaze-protos.h | 62 + gcc-4.9/gcc/config/microblaze/microblaze.c | 3594 ++ gcc-4.9/gcc/config/microblaze/microblaze.h | 937 + gcc-4.9/gcc/config/microblaze/microblaze.md | 2264 + gcc-4.9/gcc/config/microblaze/microblaze.opt | 127 + gcc-4.9/gcc/config/microblaze/predicates.md | 129 + gcc-4.9/gcc/config/microblaze/rtems.h | 25 + gcc-4.9/gcc/config/microblaze/sync.md | 43 + gcc-4.9/gcc/config/microblaze/t-microblaze | 12 + gcc-4.9/gcc/config/microblaze/t-microblaze-linux | 3 + gcc-4.9/gcc/config/microblaze/t-rtems | 1 + gcc-4.9/gcc/config/mips/10000.md | 251 + gcc-4.9/gcc/config/mips/20kc.md | 284 + gcc-4.9/gcc/config/mips/24k.md | 545 + gcc-4.9/gcc/config/mips/3000.md | 71 + gcc-4.9/gcc/config/mips/4000.md | 32 + gcc-4.9/gcc/config/mips/4100.md | 51 + gcc-4.9/gcc/config/mips/4130.md | 153 + gcc-4.9/gcc/config/mips/4300.md | 85 + gcc-4.9/gcc/config/mips/4600.md | 130 + gcc-4.9/gcc/config/mips/4k.md | 153 + gcc-4.9/gcc/config/mips/5000.md | 80 + gcc-4.9/gcc/config/mips/5400.md | 184 + gcc-4.9/gcc/config/mips/5500.md | 227 + gcc-4.9/gcc/config/mips/5k.md | 229 + gcc-4.9/gcc/config/mips/6000.md | 56 + gcc-4.9/gcc/config/mips/7000.md | 214 + gcc-4.9/gcc/config/mips/74k.md | 501 + gcc-4.9/gcc/config/mips/9000.md | 151 + gcc-4.9/gcc/config/mips/constraints.md | 363 + gcc-4.9/gcc/config/mips/driver-native.c | 89 + gcc-4.9/gcc/config/mips/elf.h | 50 + gcc-4.9/gcc/config/mips/elfoabi.h | 39 + gcc-4.9/gcc/config/mips/elforion.h | 20 + gcc-4.9/gcc/config/mips/generic.md | 121 + gcc-4.9/gcc/config/mips/genopt.sh | 123 + gcc-4.9/gcc/config/mips/gnu-user.h | 139 + gcc-4.9/gcc/config/mips/gnu-user64.h | 52 + gcc-4.9/gcc/config/mips/linux-common.h | 64 + gcc-4.9/gcc/config/mips/linux.h | 25 + gcc-4.9/gcc/config/mips/linux64.h | 44 + gcc-4.9/gcc/config/mips/loongson.h | 690 + gcc-4.9/gcc/config/mips/loongson.md | 939 + gcc-4.9/gcc/config/mips/loongson2ef.md | 252 + gcc-4.9/gcc/config/mips/loongson3a.md | 137 + gcc-4.9/gcc/config/mips/micromips.md | 138 + gcc-4.9/gcc/config/mips/mips-cpus.def | 154 + gcc-4.9/gcc/config/mips/mips-dsp.md | 1205 + gcc-4.9/gcc/config/mips/mips-dspr2.md | 632 + gcc-4.9/gcc/config/mips/mips-fixed.md | 156 + gcc-4.9/gcc/config/mips/mips-ftypes.def | 129 + gcc-4.9/gcc/config/mips/mips-modes.def | 48 + gcc-4.9/gcc/config/mips/mips-opts.h | 50 + gcc-4.9/gcc/config/mips/mips-protos.h | 362 + gcc-4.9/gcc/config/mips/mips-ps-3d.md | 764 + gcc-4.9/gcc/config/mips/mips-tables.opt | 644 + gcc-4.9/gcc/config/mips/mips.c | 19139 ++++++++ gcc-4.9/gcc/config/mips/mips.h | 3005 ++ gcc-4.9/gcc/config/mips/mips.md | 7190 +++ gcc-4.9/gcc/config/mips/mips.opt | 404 + gcc-4.9/gcc/config/mips/mti-elf.h | 42 + gcc-4.9/gcc/config/mips/mti-linux.h | 46 + gcc-4.9/gcc/config/mips/n32-elf.h | 35 + gcc-4.9/gcc/config/mips/netbsd.h | 179 + gcc-4.9/gcc/config/mips/octeon.md | 136 + gcc-4.9/gcc/config/mips/predicates.md | 494 + gcc-4.9/gcc/config/mips/r3900.h | 39 + gcc-4.9/gcc/config/mips/rtems.h | 34 + gcc-4.9/gcc/config/mips/sb1.md | 573 + gcc-4.9/gcc/config/mips/sde.h | 103 + gcc-4.9/gcc/config/mips/sde.opt | 28 + gcc-4.9/gcc/config/mips/sdemtk.h | 102 + gcc-4.9/gcc/config/mips/sr71k.md | 337 + gcc-4.9/gcc/config/mips/st.h | 30 + gcc-4.9/gcc/config/mips/sync.md | 716 + gcc-4.9/gcc/config/mips/t-elf | 23 + gcc-4.9/gcc/config/mips/t-irix6 | 4 + gcc-4.9/gcc/config/mips/t-isa3264 | 33 + gcc-4.9/gcc/config/mips/t-linux64 | 26 + gcc-4.9/gcc/config/mips/t-mips | 22 + gcc-4.9/gcc/config/mips/t-mti-elf | 50 + gcc-4.9/gcc/config/mips/t-mti-linux | 50 + gcc-4.9/gcc/config/mips/t-r3900 | 23 + gcc-4.9/gcc/config/mips/t-rtems | 34 + gcc-4.9/gcc/config/mips/t-sb1 | 62 + gcc-4.9/gcc/config/mips/t-sde | 37 + gcc-4.9/gcc/config/mips/t-sdemtk | 40 + gcc-4.9/gcc/config/mips/t-sr71k | 21 + gcc-4.9/gcc/config/mips/t-st | 30 + gcc-4.9/gcc/config/mips/t-vr | 106 + gcc-4.9/gcc/config/mips/t-vxworks | 35 + gcc-4.9/gcc/config/mips/vr.h | 58 + gcc-4.9/gcc/config/mips/vxworks.h | 78 + gcc-4.9/gcc/config/mips/x-native | 3 + gcc-4.9/gcc/config/mips/xlp.md | 213 + gcc-4.9/gcc/config/mips/xlr.md | 94 + gcc-4.9/gcc/config/mmix/constraints.md | 116 + gcc-4.9/gcc/config/mmix/mmix-modes.def | 49 + gcc-4.9/gcc/config/mmix/mmix-protos.h | 85 + gcc-4.9/gcc/config/mmix/mmix.c | 2799 ++ gcc-4.9/gcc/config/mmix/mmix.h | 831 + gcc-4.9/gcc/config/mmix/mmix.md | 1243 + gcc-4.9/gcc/config/mmix/mmix.opt | 99 + gcc-4.9/gcc/config/mmix/predicates.md | 174 + gcc-4.9/gcc/config/mmix/t-mmix | 20 + gcc-4.9/gcc/config/mn10300/constraints.md | 107 + gcc-4.9/gcc/config/mn10300/linux.h | 87 + gcc-4.9/gcc/config/mn10300/mn10300-modes.def | 23 + gcc-4.9/gcc/config/mn10300/mn10300-opts.h | 31 + gcc-4.9/gcc/config/mn10300/mn10300-protos.h | 48 + gcc-4.9/gcc/config/mn10300/mn10300.c | 3426 ++ gcc-4.9/gcc/config/mn10300/mn10300.h | 732 + gcc-4.9/gcc/config/mn10300/mn10300.md | 2217 + gcc-4.9/gcc/config/mn10300/mn10300.opt | 67 + gcc-4.9/gcc/config/mn10300/predicates.md | 73 + gcc-4.9/gcc/config/mn10300/t-mn10300 | 20 + gcc-4.9/gcc/config/moxie/constraints.md | 56 + gcc-4.9/gcc/config/moxie/moxie-protos.h | 24 + gcc-4.9/gcc/config/moxie/moxie.c | 614 + gcc-4.9/gcc/config/moxie/moxie.h | 491 + gcc-4.9/gcc/config/moxie/moxie.md | 450 + gcc-4.9/gcc/config/moxie/moxie.opt | 31 + gcc-4.9/gcc/config/moxie/predicates.md | 55 + gcc-4.9/gcc/config/moxie/rtems.h | 35 + gcc-4.9/gcc/config/moxie/t-moxie | 23 + gcc-4.9/gcc/config/moxie/uclinux.h | 42 + gcc-4.9/gcc/config/msp430/README.txt | 7 + gcc-4.9/gcc/config/msp430/constraints.md | 85 + gcc-4.9/gcc/config/msp430/msp430-c.c | 36 + gcc-4.9/gcc/config/msp430/msp430-modes.def | 3 + gcc-4.9/gcc/config/msp430/msp430-protos.h | 49 + gcc-4.9/gcc/config/msp430/msp430.c | 2338 + gcc-4.9/gcc/config/msp430/msp430.h | 411 + gcc-4.9/gcc/config/msp430/msp430.md | 1370 + gcc-4.9/gcc/config/msp430/msp430.opt | 38 + gcc-4.9/gcc/config/msp430/predicates.md | 80 + gcc-4.9/gcc/config/msp430/t-msp430 | 257 + gcc-4.9/gcc/config/nds32/constants.md | 46 + gcc-4.9/gcc/config/nds32/constraints.md | 254 + gcc-4.9/gcc/config/nds32/iterators.md | 55 + gcc-4.9/gcc/config/nds32/nds32-doubleword.md | 251 + gcc-4.9/gcc/config/nds32/nds32-intrinsic.md | 97 + gcc-4.9/gcc/config/nds32/nds32-modes.def | 21 + gcc-4.9/gcc/config/nds32/nds32-multiple.md | 410 + gcc-4.9/gcc/config/nds32/nds32-opts.h | 35 + gcc-4.9/gcc/config/nds32/nds32-peephole2.md | 25 + gcc-4.9/gcc/config/nds32/nds32-protos.h | 128 + gcc-4.9/gcc/config/nds32/nds32.c | 5724 +++ gcc-4.9/gcc/config/nds32/nds32.h | 993 + gcc-4.9/gcc/config/nds32/nds32.md | 2221 + gcc-4.9/gcc/config/nds32/nds32.opt | 102 + gcc-4.9/gcc/config/nds32/nds32_intrinsic.h | 37 + gcc-4.9/gcc/config/nds32/pipelines.md | 29 + gcc-4.9/gcc/config/nds32/predicates.md | 92 + gcc-4.9/gcc/config/nds32/t-mlibs | 38 + gcc-4.9/gcc/config/netbsd-elf.h | 86 + gcc-4.9/gcc/config/netbsd-elf.opt | 32 + gcc-4.9/gcc/config/netbsd.h | 177 + gcc-4.9/gcc/config/netbsd.opt | 35 + gcc-4.9/gcc/config/newlib-stdint.h | 64 + gcc-4.9/gcc/config/nios2/constraints.md | 89 + gcc-4.9/gcc/config/nios2/elf.h | 52 + gcc-4.9/gcc/config/nios2/elf.opt | 38 + gcc-4.9/gcc/config/nios2/linux.h | 38 + gcc-4.9/gcc/config/nios2/nios2-opts.h | 69 + gcc-4.9/gcc/config/nios2/nios2-protos.h | 61 + gcc-4.9/gcc/config/nios2/nios2.c | 3312 ++ gcc-4.9/gcc/config/nios2/nios2.h | 499 + gcc-4.9/gcc/config/nios2/nios2.md | 1030 + gcc-4.9/gcc/config/nios2/nios2.opt | 531 + gcc-4.9/gcc/config/nios2/predicates.md | 85 + gcc-4.9/gcc/config/nios2/t-nios2 | 27 + gcc-4.9/gcc/config/openbsd-libpthread.h | 22 + gcc-4.9/gcc/config/openbsd-oldgas.h | 22 + gcc-4.9/gcc/config/openbsd-stdint.h | 34 + gcc-4.9/gcc/config/openbsd.h | 289 + gcc-4.9/gcc/config/openbsd.opt | 35 + gcc-4.9/gcc/config/pa/constraints.md | 135 + gcc-4.9/gcc/config/pa/elf.h | 92 + gcc-4.9/gcc/config/pa/pa-64.h | 100 + gcc-4.9/gcc/config/pa/pa-hpux.h | 119 + gcc-4.9/gcc/config/pa/pa-hpux.opt | 41 + gcc-4.9/gcc/config/pa/pa-hpux10.h | 143 + gcc-4.9/gcc/config/pa/pa-hpux10.opt | 22 + gcc-4.9/gcc/config/pa/pa-hpux1010.h | 27 + gcc-4.9/gcc/config/pa/pa-hpux1010.opt | 23 + gcc-4.9/gcc/config/pa/pa-hpux11.h | 189 + gcc-4.9/gcc/config/pa/pa-hpux1111.h | 27 + gcc-4.9/gcc/config/pa/pa-hpux1111.opt | 23 + gcc-4.9/gcc/config/pa/pa-hpux1131.h | 29 + gcc-4.9/gcc/config/pa/pa-hpux1131.opt | 23 + gcc-4.9/gcc/config/pa/pa-linux.h | 135 + gcc-4.9/gcc/config/pa/pa-modes.def | 32 + gcc-4.9/gcc/config/pa/pa-openbsd.h | 154 + gcc-4.9/gcc/config/pa/pa-opts.h | 35 + gcc-4.9/gcc/config/pa/pa-protos.h | 121 + gcc-4.9/gcc/config/pa/pa.c | 10581 +++++ gcc-4.9/gcc/config/pa/pa.h | 1310 + gcc-4.9/gcc/config/pa/pa.md | 9855 ++++ gcc-4.9/gcc/config/pa/pa.opt | 140 + gcc-4.9/gcc/config/pa/pa32-linux.h | 64 + gcc-4.9/gcc/config/pa/pa32-openbsd.h | 22 + gcc-4.9/gcc/config/pa/pa32-regs.h | 359 + gcc-4.9/gcc/config/pa/pa64-hpux.h | 454 + gcc-4.9/gcc/config/pa/pa64-hpux.opt | 27 + gcc-4.9/gcc/config/pa/pa64-linux.h | 64 + gcc-4.9/gcc/config/pa/pa64-regs.h | 280 + gcc-4.9/gcc/config/pa/pa64-start.h | 8 + gcc-4.9/gcc/config/pa/predicates.md | 657 + gcc-4.9/gcc/config/pa/som.h | 345 + gcc-4.9/gcc/config/pa/t-dce-thr | 2 + gcc-4.9/gcc/config/pa/t-linux | 1 + gcc-4.9/gcc/config/pdp11/constraints.md | 81 + gcc-4.9/gcc/config/pdp11/pdp11-modes.def | 26 + gcc-4.9/gcc/config/pdp11/pdp11-protos.h | 49 + gcc-4.9/gcc/config/pdp11/pdp11.c | 1903 + gcc-4.9/gcc/config/pdp11/pdp11.h | 672 + gcc-4.9/gcc/config/pdp11/pdp11.md | 1436 + gcc-4.9/gcc/config/pdp11/pdp11.opt | 87 + gcc-4.9/gcc/config/pdp11/predicates.md | 54 + gcc-4.9/gcc/config/pdp11/t-pdp11 | 27 + gcc-4.9/gcc/config/picochip/constraints.md | 64 + gcc-4.9/gcc/config/picochip/dfa_space.md | 43 + gcc-4.9/gcc/config/picochip/dfa_speed.md | 123 + gcc-4.9/gcc/config/picochip/picochip-protos.h | 128 + gcc-4.9/gcc/config/picochip/picochip.c | 4703 ++ gcc-4.9/gcc/config/picochip/picochip.h | 661 + gcc-4.9/gcc/config/picochip/picochip.md | 2623 ++ gcc-4.9/gcc/config/picochip/picochip.opt | 46 + gcc-4.9/gcc/config/picochip/predicates.md | 72 + gcc-4.9/gcc/config/picochip/t-picochip | 24 + gcc-4.9/gcc/config/print-sysroot-suffix.sh | 145 + gcc-4.9/gcc/config/rl78/constraints.md | 386 + gcc-4.9/gcc/config/rl78/predicates.md | 71 + gcc-4.9/gcc/config/rl78/rl78-c.c | 34 + gcc-4.9/gcc/config/rl78/rl78-expand.md | 306 + gcc-4.9/gcc/config/rl78/rl78-opts.h | 30 + gcc-4.9/gcc/config/rl78/rl78-protos.h | 47 + gcc-4.9/gcc/config/rl78/rl78-real.md | 561 + gcc-4.9/gcc/config/rl78/rl78-virt.md | 416 + gcc-4.9/gcc/config/rl78/rl78.c | 3748 ++ gcc-4.9/gcc/config/rl78/rl78.h | 473 + gcc-4.9/gcc/config/rl78/rl78.md | 443 + gcc-4.9/gcc/config/rl78/rl78.opt | 55 + gcc-4.9/gcc/config/rl78/t-rl78 | 27 + gcc-4.9/gcc/config/rpath.opt | 28 + gcc-4.9/gcc/config/rs6000/40x.md | 120 + gcc-4.9/gcc/config/rs6000/440.md | 133 + gcc-4.9/gcc/config/rs6000/476.h | 32 + gcc-4.9/gcc/config/rs6000/476.md | 141 + gcc-4.9/gcc/config/rs6000/476.opt | 24 + gcc-4.9/gcc/config/rs6000/601.md | 136 + gcc-4.9/gcc/config/rs6000/603.md | 143 + gcc-4.9/gcc/config/rs6000/6xx.md | 275 + gcc-4.9/gcc/config/rs6000/7450.md | 185 + gcc-4.9/gcc/config/rs6000/750cl.h | 30 + gcc-4.9/gcc/config/rs6000/7xx.md | 184 + gcc-4.9/gcc/config/rs6000/8540.md | 250 + gcc-4.9/gcc/config/rs6000/a2.md | 134 + gcc-4.9/gcc/config/rs6000/aix-stdint.h | 51 + gcc-4.9/gcc/config/rs6000/aix.h | 225 + gcc-4.9/gcc/config/rs6000/aix43.h | 164 + gcc-4.9/gcc/config/rs6000/aix51.h | 168 + gcc-4.9/gcc/config/rs6000/aix52.h | 181 + gcc-4.9/gcc/config/rs6000/aix53.h | 181 + gcc-4.9/gcc/config/rs6000/aix61.h | 214 + gcc-4.9/gcc/config/rs6000/aix64.opt | 55 + gcc-4.9/gcc/config/rs6000/altivec.h | 536 + gcc-4.9/gcc/config/rs6000/altivec.md | 3324 ++ gcc-4.9/gcc/config/rs6000/biarch64.h | 26 + gcc-4.9/gcc/config/rs6000/cell.md | 399 + gcc-4.9/gcc/config/rs6000/constraints.md | 242 + gcc-4.9/gcc/config/rs6000/crypto.md | 101 + gcc-4.9/gcc/config/rs6000/darwin.h | 426 + gcc-4.9/gcc/config/rs6000/darwin.md | 480 + gcc-4.9/gcc/config/rs6000/darwin.opt | 42 + gcc-4.9/gcc/config/rs6000/darwin64.h | 32 + gcc-4.9/gcc/config/rs6000/darwin7.h | 32 + gcc-4.9/gcc/config/rs6000/darwin8.h | 31 + gcc-4.9/gcc/config/rs6000/default64.h | 27 + gcc-4.9/gcc/config/rs6000/dfp.md | 324 + gcc-4.9/gcc/config/rs6000/driver-rs6000.c | 528 + gcc-4.9/gcc/config/rs6000/e300c2c3.md | 189 + gcc-4.9/gcc/config/rs6000/e500.h | 54 + gcc-4.9/gcc/config/rs6000/e500mc.md | 200 + gcc-4.9/gcc/config/rs6000/e500mc64.md | 191 + gcc-4.9/gcc/config/rs6000/e5500.md | 176 + gcc-4.9/gcc/config/rs6000/e6500.md | 213 + gcc-4.9/gcc/config/rs6000/eabi.h | 41 + gcc-4.9/gcc/config/rs6000/eabialtivec.h | 27 + gcc-4.9/gcc/config/rs6000/eabisim.h | 51 + gcc-4.9/gcc/config/rs6000/eabispe.h | 26 + gcc-4.9/gcc/config/rs6000/freebsd.h | 78 + gcc-4.9/gcc/config/rs6000/freebsd64.h | 435 + gcc-4.9/gcc/config/rs6000/genopt.sh | 64 + gcc-4.9/gcc/config/rs6000/host-darwin.c | 153 + gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c | 30 + gcc-4.9/gcc/config/rs6000/htm.md | 366 + gcc-4.9/gcc/config/rs6000/htmintrin.h | 131 + gcc-4.9/gcc/config/rs6000/htmxlintrin.h | 208 + gcc-4.9/gcc/config/rs6000/linux.h | 153 + gcc-4.9/gcc/config/rs6000/linux64.h | 583 + gcc-4.9/gcc/config/rs6000/linux64.opt | 42 + gcc-4.9/gcc/config/rs6000/linuxaltivec.h | 32 + gcc-4.9/gcc/config/rs6000/linuxspe.h | 32 + gcc-4.9/gcc/config/rs6000/lynx.h | 119 + gcc-4.9/gcc/config/rs6000/milli.exp | 7 + gcc-4.9/gcc/config/rs6000/mpc.md | 111 + gcc-4.9/gcc/config/rs6000/netbsd.h | 90 + gcc-4.9/gcc/config/rs6000/option-defaults.h | 64 + gcc-4.9/gcc/config/rs6000/paired.h | 75 + gcc-4.9/gcc/config/rs6000/paired.md | 488 + gcc-4.9/gcc/config/rs6000/power4.md | 408 + gcc-4.9/gcc/config/rs6000/power5.md | 308 + gcc-4.9/gcc/config/rs6000/power6.md | 578 + gcc-4.9/gcc/config/rs6000/power7.md | 333 + gcc-4.9/gcc/config/rs6000/power8.md | 373 + gcc-4.9/gcc/config/rs6000/ppc-asm.h | 381 + gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h | 727 + gcc-4.9/gcc/config/rs6000/predicates.md | 1828 + gcc-4.9/gcc/config/rs6000/rs6000-builtin.def | 1845 + gcc-4.9/gcc/config/rs6000/rs6000-c.c | 4557 ++ gcc-4.9/gcc/config/rs6000/rs6000-cpus.def | 191 + gcc-4.9/gcc/config/rs6000/rs6000-linux.c | 38 + gcc-4.9/gcc/config/rs6000/rs6000-modes.def | 49 + gcc-4.9/gcc/config/rs6000/rs6000-opts.h | 160 + gcc-4.9/gcc/config/rs6000/rs6000-protos.h | 224 + gcc-4.9/gcc/config/rs6000/rs6000-tables.opt | 190 + gcc-4.9/gcc/config/rs6000/rs6000.c | 32834 +++++++++++++ gcc-4.9/gcc/config/rs6000/rs6000.h | 2676 ++ gcc-4.9/gcc/config/rs6000/rs6000.md | 15700 ++++++ gcc-4.9/gcc/config/rs6000/rs6000.opt | 590 + gcc-4.9/gcc/config/rs6000/rs64.md | 154 + gcc-4.9/gcc/config/rs6000/rtems.h | 59 + gcc-4.9/gcc/config/rs6000/secureplt.h | 20 + gcc-4.9/gcc/config/rs6000/si2vmx.h | 2048 + gcc-4.9/gcc/config/rs6000/singlefp.h | 40 + gcc-4.9/gcc/config/rs6000/spe.h | 1107 + gcc-4.9/gcc/config/rs6000/spe.md | 3223 ++ gcc-4.9/gcc/config/rs6000/spu2vmx.h | 2415 + gcc-4.9/gcc/config/rs6000/sync.md | 411 + gcc-4.9/gcc/config/rs6000/sysv4.h | 951 + gcc-4.9/gcc/config/rs6000/sysv4.opt | 157 + gcc-4.9/gcc/config/rs6000/sysv4le.h | 37 + gcc-4.9/gcc/config/rs6000/t-aix43 | 39 + gcc-4.9/gcc/config/rs6000/t-aix52 | 26 + gcc-4.9/gcc/config/rs6000/t-darwin64 | 2 + gcc-4.9/gcc/config/rs6000/t-darwin8 | 3 + gcc-4.9/gcc/config/rs6000/t-fprules | 26 + gcc-4.9/gcc/config/rs6000/t-freebsd64 | 31 + gcc-4.9/gcc/config/rs6000/t-linux | 13 + gcc-4.9/gcc/config/rs6000/t-linux64 | 36 + gcc-4.9/gcc/config/rs6000/t-linux64bele | 7 + gcc-4.9/gcc/config/rs6000/t-linux64le | 3 + gcc-4.9/gcc/config/rs6000/t-linux64lebe | 7 + gcc-4.9/gcc/config/rs6000/t-lynx | 29 + gcc-4.9/gcc/config/rs6000/t-netbsd | 36 + gcc-4.9/gcc/config/rs6000/t-ppccomm | 23 + gcc-4.9/gcc/config/rs6000/t-ppcendian | 30 + gcc-4.9/gcc/config/rs6000/t-ppcgas | 32 + gcc-4.9/gcc/config/rs6000/t-ppcos | 8 + gcc-4.9/gcc/config/rs6000/t-rs6000 | 66 + gcc-4.9/gcc/config/rs6000/t-rtems | 88 + gcc-4.9/gcc/config/rs6000/t-spe | 73 + gcc-4.9/gcc/config/rs6000/t-vxworks | 25 + gcc-4.9/gcc/config/rs6000/t-vxworksae | 5 + gcc-4.9/gcc/config/rs6000/t-xilinx | 28 + gcc-4.9/gcc/config/rs6000/titan.md | 169 + gcc-4.9/gcc/config/rs6000/vec_types.h | 52 + gcc-4.9/gcc/config/rs6000/vector.md | 1217 + gcc-4.9/gcc/config/rs6000/vsx.md | 2015 + gcc-4.9/gcc/config/rs6000/vxworks.h | 133 + gcc-4.9/gcc/config/rs6000/x-aix | 6 + gcc-4.9/gcc/config/rs6000/x-darwin | 3 + gcc-4.9/gcc/config/rs6000/x-darwin64 | 3 + gcc-4.9/gcc/config/rs6000/x-linux-relax | 2 + gcc-4.9/gcc/config/rs6000/x-rs6000 | 3 + gcc-4.9/gcc/config/rs6000/xcoff.h | 362 + gcc-4.9/gcc/config/rs6000/xfpu.h | 26 + gcc-4.9/gcc/config/rs6000/xfpu.md | 140 + gcc-4.9/gcc/config/rs6000/xilinx.h | 47 + gcc-4.9/gcc/config/rs6000/xilinx.opt | 32 + gcc-4.9/gcc/config/rtems.h | 45 + gcc-4.9/gcc/config/rtems.opt | 35 + gcc-4.9/gcc/config/rx/constraints.md | 108 + gcc-4.9/gcc/config/rx/predicates.md | 307 + gcc-4.9/gcc/config/rx/rx-modes.def | 25 + gcc-4.9/gcc/config/rx/rx-opts.h | 31 + gcc-4.9/gcc/config/rx/rx-protos.h | 46 + gcc-4.9/gcc/config/rx/rx.c | 3443 ++ gcc-4.9/gcc/config/rx/rx.h | 665 + gcc-4.9/gcc/config/rx/rx.md | 2641 ++ gcc-4.9/gcc/config/rx/rx.opt | 141 + gcc-4.9/gcc/config/rx/t-rx | 34 + gcc-4.9/gcc/config/s390/2064.md | 135 + gcc-4.9/gcc/config/s390/2084.md | 309 + gcc-4.9/gcc/config/s390/2097.md | 764 + gcc-4.9/gcc/config/s390/2817.md | 314 + gcc-4.9/gcc/config/s390/2827.md | 624 + gcc-4.9/gcc/config/s390/constraints.md | 501 + gcc-4.9/gcc/config/s390/htmintrin.h | 57 + gcc-4.9/gcc/config/s390/htmxlintrin.h | 189 + gcc-4.9/gcc/config/s390/linux.h | 93 + gcc-4.9/gcc/config/s390/predicates.md | 470 + gcc-4.9/gcc/config/s390/s390-modes.def | 183 + gcc-4.9/gcc/config/s390/s390-opts.h | 41 + gcc-4.9/gcc/config/s390/s390-protos.h | 117 + gcc-4.9/gcc/config/s390/s390.c | 12237 +++++ gcc-4.9/gcc/config/s390/s390.h | 931 + gcc-4.9/gcc/config/s390/s390.md | 10220 ++++ gcc-4.9/gcc/config/s390/s390.opt | 167 + gcc-4.9/gcc/config/s390/s390intrin.h | 33 + gcc-4.9/gcc/config/s390/s390x.h | 27 + gcc-4.9/gcc/config/s390/t-linux64 | 11 + gcc-4.9/gcc/config/s390/tpf.h | 118 + gcc-4.9/gcc/config/s390/tpf.md | 33 + gcc-4.9/gcc/config/s390/tpf.opt | 27 + gcc-4.9/gcc/config/score/constraints.md | 93 + gcc-4.9/gcc/config/score/elf.h | 97 + gcc-4.9/gcc/config/score/predicates.md | 152 + gcc-4.9/gcc/config/score/score-conv.h | 78 + gcc-4.9/gcc/config/score/score-generic.md | 44 + gcc-4.9/gcc/config/score/score-modes.def | 24 + gcc-4.9/gcc/config/score/score-protos.h | 83 + gcc-4.9/gcc/config/score/score.c | 1939 + gcc-4.9/gcc/config/score/score.h | 867 + gcc-4.9/gcc/config/score/score.md | 1879 + gcc-4.9/gcc/config/score/score.opt | 57 + gcc-4.9/gcc/config/sh/constraints.md | 324 + gcc-4.9/gcc/config/sh/divcost-analysis | 88 + gcc-4.9/gcc/config/sh/divtab-sh4-300.c | 77 + gcc-4.9/gcc/config/sh/divtab-sh4.c | 85 + gcc-4.9/gcc/config/sh/divtab.c | 203 + gcc-4.9/gcc/config/sh/elf.h | 88 + gcc-4.9/gcc/config/sh/embed-elf.h | 36 + gcc-4.9/gcc/config/sh/iterators.md | 46 + gcc-4.9/gcc/config/sh/linux.h | 152 + gcc-4.9/gcc/config/sh/little.h | 21 + gcc-4.9/gcc/config/sh/netbsd-elf.h | 106 + gcc-4.9/gcc/config/sh/newlib.h | 29 + gcc-4.9/gcc/config/sh/predicates.md | 1201 + gcc-4.9/gcc/config/sh/rtems.h | 26 + gcc-4.9/gcc/config/sh/rtemself.h | 26 + gcc-4.9/gcc/config/sh/sh-c.c | 148 + gcc-4.9/gcc/config/sh/sh-mem.cc | 610 + gcc-4.9/gcc/config/sh/sh-modes.def | 34 + gcc-4.9/gcc/config/sh/sh-protos.h | 235 + gcc-4.9/gcc/config/sh/sh.c | 13504 ++++++ gcc-4.9/gcc/config/sh/sh.h | 2311 + gcc-4.9/gcc/config/sh/sh.md | 15960 +++++++ gcc-4.9/gcc/config/sh/sh.opt | 362 + gcc-4.9/gcc/config/sh/sh1.md | 85 + gcc-4.9/gcc/config/sh/sh4-300.md | 281 + gcc-4.9/gcc/config/sh/sh4.md | 454 + gcc-4.9/gcc/config/sh/sh4a.md | 234 + gcc-4.9/gcc/config/sh/sh64.h | 22 + gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc | 473 + gcc-4.9/gcc/config/sh/sh_treg_combine.cc | 1509 + gcc-4.9/gcc/config/sh/shmedia.h | 30 + gcc-4.9/gcc/config/sh/shmedia.md | 94 + gcc-4.9/gcc/config/sh/sshmedia.h | 78 + gcc-4.9/gcc/config/sh/superh.h | 104 + gcc-4.9/gcc/config/sh/superh.opt | 10 + gcc-4.9/gcc/config/sh/sync.md | 1388 + gcc-4.9/gcc/config/sh/t-linux | 2 + gcc-4.9/gcc/config/sh/t-netbsd-sh5-64 | 1 + gcc-4.9/gcc/config/sh/t-rtems | 7 + gcc-4.9/gcc/config/sh/t-sh | 101 + gcc-4.9/gcc/config/sh/t-sh64 | 22 + gcc-4.9/gcc/config/sh/t-vxworks | 6 + gcc-4.9/gcc/config/sh/ushmedia.h | 1091 + gcc-4.9/gcc/config/sh/vxworks.h | 66 + gcc-4.9/gcc/config/sol2-10.h | 24 + gcc-4.9/gcc/config/sol2-bi.h | 135 + gcc-4.9/gcc/config/sol2-c.c | 274 + gcc-4.9/gcc/config/sol2-cxx.c | 65 + gcc-4.9/gcc/config/sol2-protos.h | 33 + gcc-4.9/gcc/config/sol2-stubs.c | 33 + gcc-4.9/gcc/config/sol2.c | 300 + gcc-4.9/gcc/config/sol2.h | 300 + gcc-4.9/gcc/config/sol2.opt | 41 + gcc-4.9/gcc/config/sparc/biarch64.h | 23 + gcc-4.9/gcc/config/sparc/constraints.md | 201 + gcc-4.9/gcc/config/sparc/cypress.md | 50 + gcc-4.9/gcc/config/sparc/default-64.h | 22 + gcc-4.9/gcc/config/sparc/driver-sparc.c | 172 + gcc-4.9/gcc/config/sparc/freebsd.h | 149 + gcc-4.9/gcc/config/sparc/hypersparc.md | 82 + gcc-4.9/gcc/config/sparc/leon.md | 64 + gcc-4.9/gcc/config/sparc/linux.h | 169 + gcc-4.9/gcc/config/sparc/linux64.h | 284 + gcc-4.9/gcc/config/sparc/long-double-switch.opt | 27 + gcc-4.9/gcc/config/sparc/netbsd-elf.h | 226 + gcc-4.9/gcc/config/sparc/niagara.md | 118 + gcc-4.9/gcc/config/sparc/niagara2.md | 120 + gcc-4.9/gcc/config/sparc/niagara4.md | 114 + gcc-4.9/gcc/config/sparc/openbsd1-64.h | 23 + gcc-4.9/gcc/config/sparc/openbsd64.h | 78 + gcc-4.9/gcc/config/sparc/predicates.md | 535 + gcc-4.9/gcc/config/sparc/rtemself.h | 33 + gcc-4.9/gcc/config/sparc/sol2.h | 411 + gcc-4.9/gcc/config/sparc/sp-elf.h | 66 + gcc-4.9/gcc/config/sparc/sp64-elf.h | 76 + gcc-4.9/gcc/config/sparc/sparc-c.c | 62 + gcc-4.9/gcc/config/sparc/sparc-modes.def | 50 + gcc-4.9/gcc/config/sparc/sparc-opts.h | 60 + gcc-4.9/gcc/config/sparc/sparc-protos.h | 116 + gcc-4.9/gcc/config/sparc/sparc.c | 12704 +++++ gcc-4.9/gcc/config/sparc/sparc.h | 1782 + gcc-4.9/gcc/config/sparc/sparc.md | 9024 ++++ gcc-4.9/gcc/config/sparc/sparc.opt | 256 + gcc-4.9/gcc/config/sparc/sparclet.md | 43 + gcc-4.9/gcc/config/sparc/supersparc.md | 92 + gcc-4.9/gcc/config/sparc/sync.md | 286 + gcc-4.9/gcc/config/sparc/sysv4.h | 119 + gcc-4.9/gcc/config/sparc/t-elf | 21 + gcc-4.9/gcc/config/sparc/t-leon | 24 + gcc-4.9/gcc/config/sparc/t-leon3 | 22 + gcc-4.9/gcc/config/sparc/t-linux | 1 + gcc-4.9/gcc/config/sparc/t-linux64 | 29 + gcc-4.9/gcc/config/sparc/t-netbsd64 | 5 + gcc-4.9/gcc/config/sparc/t-rtems | 22 + gcc-4.9/gcc/config/sparc/t-rtems-64 | 22 + gcc-4.9/gcc/config/sparc/t-sol2-64 | 4 + gcc-4.9/gcc/config/sparc/t-sparc | 23 + gcc-4.9/gcc/config/sparc/t-vxworks | 5 + gcc-4.9/gcc/config/sparc/tso.h | 23 + gcc-4.9/gcc/config/sparc/ultra1_2.md | 301 + gcc-4.9/gcc/config/sparc/ultra3.md | 194 + gcc-4.9/gcc/config/sparc/visintrin.h | 709 + gcc-4.9/gcc/config/sparc/vxworks.h | 57 + gcc-4.9/gcc/config/sparc/x-sparc | 4 + gcc-4.9/gcc/config/spu/constraints.md | 179 + gcc-4.9/gcc/config/spu/predicates.md | 122 + gcc-4.9/gcc/config/spu/spu-builtins.def | 781 + gcc-4.9/gcc/config/spu/spu-builtins.md | 864 + gcc-4.9/gcc/config/spu/spu-c.c | 235 + gcc-4.9/gcc/config/spu/spu-elf.h | 75 + gcc-4.9/gcc/config/spu/spu-modes.def | 29 + gcc-4.9/gcc/config/spu/spu-protos.h | 93 + gcc-4.9/gcc/config/spu/spu.c | 7349 +++ gcc-4.9/gcc/config/spu/spu.h | 551 + gcc-4.9/gcc/config/spu/spu.md | 5093 ++ gcc-4.9/gcc/config/spu/spu.opt | 105 + gcc-4.9/gcc/config/spu/spu_cache.h | 39 + gcc-4.9/gcc/config/spu/spu_internals.h | 421 + gcc-4.9/gcc/config/spu/spu_intrinsics.h | 83 + gcc-4.9/gcc/config/spu/spu_mfcio.h | 342 + gcc-4.9/gcc/config/spu/t-spu-elf | 34 + gcc-4.9/gcc/config/spu/vec_types.h | 36 + gcc-4.9/gcc/config/spu/vmx2spu.h | 3985 ++ gcc-4.9/gcc/config/stormy16/constraints.md | 119 + gcc-4.9/gcc/config/stormy16/predicates.md | 178 + gcc-4.9/gcc/config/stormy16/stormy-abi | 174 + gcc-4.9/gcc/config/stormy16/stormy16-protos.h | 69 + gcc-4.9/gcc/config/stormy16/stormy16.c | 2703 ++ gcc-4.9/gcc/config/stormy16/stormy16.h | 504 + gcc-4.9/gcc/config/stormy16/stormy16.md | 1250 + gcc-4.9/gcc/config/stormy16/stormy16.opt | 24 + gcc-4.9/gcc/config/t-darwin | 36 + gcc-4.9/gcc/config/t-glibc | 21 + gcc-4.9/gcc/config/t-libunwind | 21 + gcc-4.9/gcc/config/t-linux | 21 + gcc-4.9/gcc/config/t-lynx | 24 + gcc-4.9/gcc/config/t-openbsd | 2 + gcc-4.9/gcc/config/t-pnt16-warn | 27 + gcc-4.9/gcc/config/t-rtems | 2 + gcc-4.9/gcc/config/t-slibgcc | 2 + gcc-4.9/gcc/config/t-sol2 | 37 + gcc-4.9/gcc/config/t-sysroot-suffix | 7 + gcc-4.9/gcc/config/t-vxworks | 24 + gcc-4.9/gcc/config/t-winnt | 22 + gcc-4.9/gcc/config/tilegx/constraints.md | 122 + gcc-4.9/gcc/config/tilegx/feedback.h | 14 + gcc-4.9/gcc/config/tilegx/linux.h | 72 + gcc-4.9/gcc/config/tilegx/mul-tables.c | 27243 +++++++++++ gcc-4.9/gcc/config/tilegx/predicates.md | 298 + gcc-4.9/gcc/config/tilegx/sync.md | 227 + gcc-4.9/gcc/config/tilegx/t-tilegx | 21 + gcc-4.9/gcc/config/tilegx/tilegx-builtins.h | 325 + gcc-4.9/gcc/config/tilegx/tilegx-c.c | 57 + gcc-4.9/gcc/config/tilegx/tilegx-generic.md | 115 + gcc-4.9/gcc/config/tilegx/tilegx-modes.def | 37 + gcc-4.9/gcc/config/tilegx/tilegx-multiply.h | 78 + gcc-4.9/gcc/config/tilegx/tilegx-opts.h | 33 + gcc-4.9/gcc/config/tilegx/tilegx-protos.h | 75 + gcc-4.9/gcc/config/tilegx/tilegx.c | 5707 +++ gcc-4.9/gcc/config/tilegx/tilegx.h | 556 + gcc-4.9/gcc/config/tilegx/tilegx.md | 5630 +++ gcc-4.9/gcc/config/tilegx/tilegx.opt | 63 + gcc-4.9/gcc/config/tilepro/constraints.md | 101 + gcc-4.9/gcc/config/tilepro/feedback.h | 14 + gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc | 1361 + gcc-4.9/gcc/config/tilepro/linux.h | 64 + gcc-4.9/gcc/config/tilepro/mul-tables.c | 17831 +++++++ gcc-4.9/gcc/config/tilepro/predicates.md | 260 + gcc-4.9/gcc/config/tilepro/t-tilepro | 15 + gcc-4.9/gcc/config/tilepro/tilepro-builtins.h | 216 + gcc-4.9/gcc/config/tilepro/tilepro-c.c | 56 + gcc-4.9/gcc/config/tilepro/tilepro-generic.md | 107 + gcc-4.9/gcc/config/tilepro/tilepro-modes.def | 34 + gcc-4.9/gcc/config/tilepro/tilepro-multiply.h | 82 + gcc-4.9/gcc/config/tilepro/tilepro-protos.h | 76 + gcc-4.9/gcc/config/tilepro/tilepro.c | 5097 ++ gcc-4.9/gcc/config/tilepro/tilepro.h | 481 + gcc-4.9/gcc/config/tilepro/tilepro.md | 3810 ++ gcc-4.9/gcc/config/tilepro/tilepro.opt | 36 + gcc-4.9/gcc/config/tm-dwarf2.h | 4 + gcc-4.9/gcc/config/usegas.h | 20 + gcc-4.9/gcc/config/usegld.h | 1 + gcc-4.9/gcc/config/v850/constraints.md | 108 + gcc-4.9/gcc/config/v850/predicates.md | 584 + gcc-4.9/gcc/config/v850/rtems.h | 36 + gcc-4.9/gcc/config/v850/t-rtems | 7 + gcc-4.9/gcc/config/v850/t-v850 | 35 + gcc-4.9/gcc/config/v850/v850-c.c | 274 + gcc-4.9/gcc/config/v850/v850-modes.def | 27 + gcc-4.9/gcc/config/v850/v850-opts.h | 33 + gcc-4.9/gcc/config/v850/v850-protos.h | 69 + gcc-4.9/gcc/config/v850/v850.c | 3281 ++ gcc-4.9/gcc/config/v850/v850.h | 979 + gcc-4.9/gcc/config/v850/v850.md | 3115 ++ gcc-4.9/gcc/config/v850/v850.opt | 159 + gcc-4.9/gcc/config/vax/builtins.md | 192 + gcc-4.9/gcc/config/vax/constraints.md | 119 + gcc-4.9/gcc/config/vax/elf.h | 112 + gcc-4.9/gcc/config/vax/elf.opt | 29 + gcc-4.9/gcc/config/vax/linux.h | 51 + gcc-4.9/gcc/config/vax/netbsd-elf.h | 68 + gcc-4.9/gcc/config/vax/openbsd.h | 50 + gcc-4.9/gcc/config/vax/openbsd1.h | 22 + gcc-4.9/gcc/config/vax/predicates.md | 111 + gcc-4.9/gcc/config/vax/vax-modes.def | 22 + gcc-4.9/gcc/config/vax/vax-protos.h | 39 + gcc-4.9/gcc/config/vax/vax.c | 2177 + gcc-4.9/gcc/config/vax/vax.h | 708 + gcc-4.9/gcc/config/vax/vax.md | 1662 + gcc-4.9/gcc/config/vax/vax.opt | 51 + gcc-4.9/gcc/config/vms/make-crtlmap.awk | 55 + gcc-4.9/gcc/config/vms/t-vms | 40 + gcc-4.9/gcc/config/vms/t-vmsnative | 34 + gcc-4.9/gcc/config/vms/vms-ar.c | 348 + gcc-4.9/gcc/config/vms/vms-c.c | 488 + gcc-4.9/gcc/config/vms/vms-crtlmap.map | 930 + gcc-4.9/gcc/config/vms/vms-f.c | 31 + gcc-4.9/gcc/config/vms/vms-ld.c | 968 + gcc-4.9/gcc/config/vms/vms-opts.h | 30 + gcc-4.9/gcc/config/vms/vms-protos.h | 35 + gcc-4.9/gcc/config/vms/vms-stdint.h | 50 + gcc-4.9/gcc/config/vms/vms.c | 330 + gcc-4.9/gcc/config/vms/vms.h | 92 + gcc-4.9/gcc/config/vms/vms.opt | 63 + gcc-4.9/gcc/config/vms/x-vms | 26 + gcc-4.9/gcc/config/vms/xm-vms.h | 63 + gcc-4.9/gcc/config/vx-common.h | 94 + gcc-4.9/gcc/config/vxworks-dummy.h | 40 + gcc-4.9/gcc/config/vxworks.c | 154 + gcc-4.9/gcc/config/vxworks.h | 140 + gcc-4.9/gcc/config/vxworks.opt | 46 + gcc-4.9/gcc/config/vxworksae.h | 70 + gcc-4.9/gcc/config/winnt-c.c | 38 + gcc-4.9/gcc/config/x-cflags-O1 | 5 + gcc-4.9/gcc/config/x-darwin | 3 + gcc-4.9/gcc/config/x-hpux | 4 + gcc-4.9/gcc/config/x-linux | 3 + gcc-4.9/gcc/config/x-openbsd | 4 + gcc-4.9/gcc/config/x-solaris | 3 + gcc-4.9/gcc/config/xtensa/constraints.md | 139 + gcc-4.9/gcc/config/xtensa/elf.h | 101 + gcc-4.9/gcc/config/xtensa/elf.opt | 29 + gcc-4.9/gcc/config/xtensa/linux.h | 65 + gcc-4.9/gcc/config/xtensa/predicates.md | 175 + gcc-4.9/gcc/config/xtensa/t-xtensa | 19 + gcc-4.9/gcc/config/xtensa/xtensa-protos.h | 73 + gcc-4.9/gcc/config/xtensa/xtensa.c | 3712 ++ gcc-4.9/gcc/config/xtensa/xtensa.h | 819 + gcc-4.9/gcc/config/xtensa/xtensa.md | 1913 + gcc-4.9/gcc/config/xtensa/xtensa.opt | 43 + 1354 files changed, 1042837 insertions(+) create mode 100644 gcc-4.9/gcc/config/README create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-arches.def create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-builtins.c create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-cores.def create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-elf.h create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-linux.h create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-modes.def create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-opts.h create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-protos.h create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-simd.md create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64-tune.md create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.c create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.h create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.md create mode 100644 gcc-4.9/gcc/config/aarch64/aarch64.opt create mode 100644 gcc-4.9/gcc/config/aarch64/arm_neon.h create mode 100644 gcc-4.9/gcc/config/aarch64/atomics.md create mode 100644 gcc-4.9/gcc/config/aarch64/biarchilp32.h create mode 100644 gcc-4.9/gcc/config/aarch64/biarchlp64.h create mode 100644 gcc-4.9/gcc/config/aarch64/constraints.md create mode 100644 gcc-4.9/gcc/config/aarch64/gentune.sh create mode 100644 gcc-4.9/gcc/config/aarch64/iterators.md create mode 100644 gcc-4.9/gcc/config/aarch64/predicates.md create mode 100644 gcc-4.9/gcc/config/aarch64/t-aarch64 create mode 100644 gcc-4.9/gcc/config/aarch64/t-aarch64-linux create mode 100644 gcc-4.9/gcc/config/alpha/alpha-modes.def create mode 100644 gcc-4.9/gcc/config/alpha/alpha-protos.h create mode 100644 gcc-4.9/gcc/config/alpha/alpha.c create mode 100644 gcc-4.9/gcc/config/alpha/alpha.h create mode 100644 gcc-4.9/gcc/config/alpha/alpha.md create mode 100644 gcc-4.9/gcc/config/alpha/alpha.opt create mode 100644 gcc-4.9/gcc/config/alpha/constraints.md create mode 100644 gcc-4.9/gcc/config/alpha/driver-alpha.c create mode 100644 gcc-4.9/gcc/config/alpha/elf.h create mode 100644 gcc-4.9/gcc/config/alpha/elf.opt create mode 100644 gcc-4.9/gcc/config/alpha/ev4.md create mode 100644 gcc-4.9/gcc/config/alpha/ev5.md create mode 100644 gcc-4.9/gcc/config/alpha/ev6.md create mode 100644 gcc-4.9/gcc/config/alpha/freebsd.h create mode 100644 gcc-4.9/gcc/config/alpha/linux-elf.h create mode 100644 gcc-4.9/gcc/config/alpha/linux.h create mode 100644 gcc-4.9/gcc/config/alpha/netbsd.h create mode 100644 gcc-4.9/gcc/config/alpha/openbsd.h create mode 100644 gcc-4.9/gcc/config/alpha/predicates.md create mode 100644 gcc-4.9/gcc/config/alpha/sync.md create mode 100644 gcc-4.9/gcc/config/alpha/t-linux create mode 100644 gcc-4.9/gcc/config/alpha/t-vms create mode 100644 gcc-4.9/gcc/config/alpha/vms.h create mode 100644 gcc-4.9/gcc/config/alpha/x-alpha create mode 100644 gcc-4.9/gcc/config/arc/arc-modes.def create mode 100644 gcc-4.9/gcc/config/arc/arc-opts.h create mode 100644 gcc-4.9/gcc/config/arc/arc-protos.h create mode 100644 gcc-4.9/gcc/config/arc/arc-simd.h create mode 100644 gcc-4.9/gcc/config/arc/arc.c create mode 100644 gcc-4.9/gcc/config/arc/arc.h create mode 100644 gcc-4.9/gcc/config/arc/arc.md create mode 100644 gcc-4.9/gcc/config/arc/arc.opt create mode 100644 gcc-4.9/gcc/config/arc/arc600.md create mode 100644 gcc-4.9/gcc/config/arc/arc700.md create mode 100644 gcc-4.9/gcc/config/arc/constraints.md create mode 100644 gcc-4.9/gcc/config/arc/fpx.md create mode 100644 gcc-4.9/gcc/config/arc/predicates.md create mode 100644 gcc-4.9/gcc/config/arc/simdext.md create mode 100644 gcc-4.9/gcc/config/arc/t-arc-newlib create mode 100644 gcc-4.9/gcc/config/arc/t-arc-uClibc create mode 100644 gcc-4.9/gcc/config/arm/README-interworking create mode 100644 gcc-4.9/gcc/config/arm/aarch-common-protos.h create mode 100644 gcc-4.9/gcc/config/arm/aarch-common.c create mode 100644 gcc-4.9/gcc/config/arm/aarch-cost-tables.h create mode 100644 gcc-4.9/gcc/config/arm/aout.h create mode 100644 gcc-4.9/gcc/config/arm/arm-arches.def create mode 100644 gcc-4.9/gcc/config/arm/arm-c.c create mode 100644 gcc-4.9/gcc/config/arm/arm-cores.def create mode 100644 gcc-4.9/gcc/config/arm/arm-fixed.md create mode 100644 gcc-4.9/gcc/config/arm/arm-fpus.def create mode 100644 gcc-4.9/gcc/config/arm/arm-generic.md create mode 100644 gcc-4.9/gcc/config/arm/arm-ldmstm.ml create mode 100644 gcc-4.9/gcc/config/arm/arm-modes.def create mode 100644 gcc-4.9/gcc/config/arm/arm-opts.h create mode 100644 gcc-4.9/gcc/config/arm/arm-protos.h create mode 100644 gcc-4.9/gcc/config/arm/arm-tables.opt create mode 100644 gcc-4.9/gcc/config/arm/arm-tune.md create mode 100644 gcc-4.9/gcc/config/arm/arm.c create mode 100644 gcc-4.9/gcc/config/arm/arm.h create mode 100644 gcc-4.9/gcc/config/arm/arm.md create mode 100644 gcc-4.9/gcc/config/arm/arm.opt create mode 100644 gcc-4.9/gcc/config/arm/arm1020e.md create mode 100644 gcc-4.9/gcc/config/arm/arm1026ejs.md create mode 100644 gcc-4.9/gcc/config/arm/arm1136jfs.md create mode 100644 gcc-4.9/gcc/config/arm/arm926ejs.md create mode 100644 gcc-4.9/gcc/config/arm/arm_acle.h create mode 100644 gcc-4.9/gcc/config/arm/arm_neon.h create mode 100644 gcc-4.9/gcc/config/arm/arm_neon_builtins.def create mode 100644 gcc-4.9/gcc/config/arm/bpabi.h create mode 100644 gcc-4.9/gcc/config/arm/coff.h create mode 100644 gcc-4.9/gcc/config/arm/constraints.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a15-neon.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a15.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a5.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a53.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a7.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a8-neon.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a8.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a9-neon.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-a9.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-m4-fpu.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-m4.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-r4.md create mode 100644 gcc-4.9/gcc/config/arm/cortex-r4f.md create mode 100644 gcc-4.9/gcc/config/arm/crypto.def create mode 100644 gcc-4.9/gcc/config/arm/crypto.md create mode 100644 gcc-4.9/gcc/config/arm/driver-arm.c create mode 100644 gcc-4.9/gcc/config/arm/elf.h create mode 100644 gcc-4.9/gcc/config/arm/fa526.md create mode 100644 gcc-4.9/gcc/config/arm/fa606te.md create mode 100644 gcc-4.9/gcc/config/arm/fa626te.md create mode 100644 gcc-4.9/gcc/config/arm/fa726te.md create mode 100644 gcc-4.9/gcc/config/arm/fmp626.md create mode 100755 gcc-4.9/gcc/config/arm/genopt.sh create mode 100755 gcc-4.9/gcc/config/arm/gentune.sh create mode 100644 gcc-4.9/gcc/config/arm/iterators.md create mode 100644 gcc-4.9/gcc/config/arm/iwmmxt.md create mode 100644 gcc-4.9/gcc/config/arm/iwmmxt2.md create mode 100644 gcc-4.9/gcc/config/arm/ldmstm.md create mode 100644 gcc-4.9/gcc/config/arm/ldrdstrd.md create mode 100644 gcc-4.9/gcc/config/arm/linux-eabi.h create mode 100644 gcc-4.9/gcc/config/arm/linux-elf.h create mode 100644 gcc-4.9/gcc/config/arm/linux-gas.h create mode 100644 gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md create mode 100644 gcc-4.9/gcc/config/arm/marvell-pj4.md create mode 100644 gcc-4.9/gcc/config/arm/mmintrin.h create mode 100644 gcc-4.9/gcc/config/arm/neon-docgen.ml create mode 100644 gcc-4.9/gcc/config/arm/neon-gen.ml create mode 100644 gcc-4.9/gcc/config/arm/neon-testgen.ml create mode 100644 gcc-4.9/gcc/config/arm/neon.md create mode 100644 gcc-4.9/gcc/config/arm/neon.ml create mode 100644 gcc-4.9/gcc/config/arm/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/arm/predicates.md create mode 100644 gcc-4.9/gcc/config/arm/rtems-eabi.h create mode 100644 gcc-4.9/gcc/config/arm/semi.h create mode 100644 gcc-4.9/gcc/config/arm/symbian.h create mode 100644 gcc-4.9/gcc/config/arm/sync.md create mode 100644 gcc-4.9/gcc/config/arm/t-aprofile create mode 100644 gcc-4.9/gcc/config/arm/t-arm create mode 100644 gcc-4.9/gcc/config/arm/t-arm-elf create mode 100644 gcc-4.9/gcc/config/arm/t-bpabi create mode 100644 gcc-4.9/gcc/config/arm/t-linux-androideabi create mode 100644 gcc-4.9/gcc/config/arm/t-linux-eabi create mode 100644 gcc-4.9/gcc/config/arm/t-rtems-eabi create mode 100644 gcc-4.9/gcc/config/arm/t-symbian create mode 100644 gcc-4.9/gcc/config/arm/t-vxworks create mode 100644 gcc-4.9/gcc/config/arm/thumb2.md create mode 100644 gcc-4.9/gcc/config/arm/types.md create mode 100644 gcc-4.9/gcc/config/arm/uclinux-eabi.h create mode 100644 gcc-4.9/gcc/config/arm/uclinux-elf.h create mode 100644 gcc-4.9/gcc/config/arm/unknown-elf.h create mode 100644 gcc-4.9/gcc/config/arm/unspecs.md create mode 100644 gcc-4.9/gcc/config/arm/vec-common.md create mode 100644 gcc-4.9/gcc/config/arm/vfp.md create mode 100644 gcc-4.9/gcc/config/arm/vfp11.md create mode 100644 gcc-4.9/gcc/config/arm/vxworks.h create mode 100644 gcc-4.9/gcc/config/arm/vxworks.opt create mode 100644 gcc-4.9/gcc/config/arm/x-arm create mode 100644 gcc-4.9/gcc/config/avr/avr-arch.h create mode 100644 gcc-4.9/gcc/config/avr/avr-c.c create mode 100644 gcc-4.9/gcc/config/avr/avr-devices.c create mode 100644 gcc-4.9/gcc/config/avr/avr-dimode.md create mode 100644 gcc-4.9/gcc/config/avr/avr-fixed.md create mode 100644 gcc-4.9/gcc/config/avr/avr-log.c create mode 100644 gcc-4.9/gcc/config/avr/avr-mcus.def create mode 100644 gcc-4.9/gcc/config/avr/avr-modes.def create mode 100644 gcc-4.9/gcc/config/avr/avr-protos.h create mode 100644 gcc-4.9/gcc/config/avr/avr-stdint.h create mode 100644 gcc-4.9/gcc/config/avr/avr-tables.opt create mode 100644 gcc-4.9/gcc/config/avr/avr.c create mode 100644 gcc-4.9/gcc/config/avr/avr.h create mode 100644 gcc-4.9/gcc/config/avr/avr.md create mode 100644 gcc-4.9/gcc/config/avr/avr.opt create mode 100644 gcc-4.9/gcc/config/avr/avrlibc.h create mode 100644 gcc-4.9/gcc/config/avr/builtins.def create mode 100644 gcc-4.9/gcc/config/avr/constraints.md create mode 100644 gcc-4.9/gcc/config/avr/driver-avr.c create mode 100644 gcc-4.9/gcc/config/avr/elf.h create mode 100644 gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c create mode 100644 gcc-4.9/gcc/config/avr/genmultilib.awk create mode 100755 gcc-4.9/gcc/config/avr/genopt.sh create mode 100644 gcc-4.9/gcc/config/avr/predicates.md create mode 100644 gcc-4.9/gcc/config/avr/rtems.h create mode 100644 gcc-4.9/gcc/config/avr/stdfix.h create mode 100644 gcc-4.9/gcc/config/avr/t-avr create mode 100644 gcc-4.9/gcc/config/avr/t-multilib create mode 100644 gcc-4.9/gcc/config/avr/t-rtems create mode 100644 gcc-4.9/gcc/config/bfin/bfin-modes.def create mode 100644 gcc-4.9/gcc/config/bfin/bfin-opts.h create mode 100644 gcc-4.9/gcc/config/bfin/bfin-protos.h create mode 100644 gcc-4.9/gcc/config/bfin/bfin.c create mode 100644 gcc-4.9/gcc/config/bfin/bfin.h create mode 100644 gcc-4.9/gcc/config/bfin/bfin.md create mode 100644 gcc-4.9/gcc/config/bfin/bfin.opt create mode 100644 gcc-4.9/gcc/config/bfin/constraints.md create mode 100644 gcc-4.9/gcc/config/bfin/elf.h create mode 100644 gcc-4.9/gcc/config/bfin/linux.h create mode 100644 gcc-4.9/gcc/config/bfin/predicates.md create mode 100644 gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh create mode 100644 gcc-4.9/gcc/config/bfin/rtems.h create mode 100644 gcc-4.9/gcc/config/bfin/sync.md create mode 100644 gcc-4.9/gcc/config/bfin/t-bfin-elf create mode 100644 gcc-4.9/gcc/config/bfin/t-bfin-linux create mode 100644 gcc-4.9/gcc/config/bfin/t-bfin-uclinux create mode 100644 gcc-4.9/gcc/config/bfin/t-rtems create mode 100644 gcc-4.9/gcc/config/bfin/uclinux.h create mode 100644 gcc-4.9/gcc/config/c6x/c6x-isas.def create mode 100644 gcc-4.9/gcc/config/c6x/c6x-modes.def create mode 100644 gcc-4.9/gcc/config/c6x/c6x-mult.md create mode 100644 gcc-4.9/gcc/config/c6x/c6x-mult.md.in create mode 100644 gcc-4.9/gcc/config/c6x/c6x-opts.h create mode 100644 gcc-4.9/gcc/config/c6x/c6x-protos.h create mode 100644 gcc-4.9/gcc/config/c6x/c6x-sched.md create mode 100644 gcc-4.9/gcc/config/c6x/c6x-sched.md.in create mode 100644 gcc-4.9/gcc/config/c6x/c6x-tables.opt create mode 100644 gcc-4.9/gcc/config/c6x/c6x.c create mode 100644 gcc-4.9/gcc/config/c6x/c6x.h create mode 100644 gcc-4.9/gcc/config/c6x/c6x.md create mode 100644 gcc-4.9/gcc/config/c6x/c6x.opt create mode 100644 gcc-4.9/gcc/config/c6x/c6x_intrinsics.h create mode 100644 gcc-4.9/gcc/config/c6x/constraints.md create mode 100644 gcc-4.9/gcc/config/c6x/elf-common.h create mode 100644 gcc-4.9/gcc/config/c6x/elf.h create mode 100644 gcc-4.9/gcc/config/c6x/genmult.sh create mode 100644 gcc-4.9/gcc/config/c6x/genopt.sh create mode 100644 gcc-4.9/gcc/config/c6x/gensched.sh create mode 100644 gcc-4.9/gcc/config/c6x/predicates.md create mode 100644 gcc-4.9/gcc/config/c6x/sync.md create mode 100644 gcc-4.9/gcc/config/c6x/t-c6x create mode 100644 gcc-4.9/gcc/config/c6x/t-c6x-elf create mode 100644 gcc-4.9/gcc/config/c6x/t-c6x-uclinux create mode 100644 gcc-4.9/gcc/config/c6x/uclinux-elf.h create mode 100644 gcc-4.9/gcc/config/cr16/constraints.md create mode 100644 gcc-4.9/gcc/config/cr16/cr16-protos.h create mode 100644 gcc-4.9/gcc/config/cr16/cr16.c create mode 100644 gcc-4.9/gcc/config/cr16/cr16.h create mode 100644 gcc-4.9/gcc/config/cr16/cr16.md create mode 100644 gcc-4.9/gcc/config/cr16/cr16.opt create mode 100644 gcc-4.9/gcc/config/cr16/predicates.md create mode 100644 gcc-4.9/gcc/config/cr16/t-cr16 create mode 100644 gcc-4.9/gcc/config/cris/constraints.md create mode 100644 gcc-4.9/gcc/config/cris/cris-protos.h create mode 100644 gcc-4.9/gcc/config/cris/cris.c create mode 100644 gcc-4.9/gcc/config/cris/cris.h create mode 100644 gcc-4.9/gcc/config/cris/cris.md create mode 100644 gcc-4.9/gcc/config/cris/cris.opt create mode 100644 gcc-4.9/gcc/config/cris/elf.opt create mode 100644 gcc-4.9/gcc/config/cris/linux.h create mode 100644 gcc-4.9/gcc/config/cris/linux.opt create mode 100644 gcc-4.9/gcc/config/cris/predicates.md create mode 100644 gcc-4.9/gcc/config/cris/sync.md create mode 100644 gcc-4.9/gcc/config/cris/t-cris create mode 100644 gcc-4.9/gcc/config/cris/t-elfmulti create mode 100644 gcc-4.9/gcc/config/cris/t-linux create mode 100644 gcc-4.9/gcc/config/darwin-c.c create mode 100644 gcc-4.9/gcc/config/darwin-driver.c create mode 100644 gcc-4.9/gcc/config/darwin-f.c create mode 100644 gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def create mode 100644 gcc-4.9/gcc/config/darwin-protos.h create mode 100644 gcc-4.9/gcc/config/darwin-sections.def create mode 100644 gcc-4.9/gcc/config/darwin.c create mode 100644 gcc-4.9/gcc/config/darwin.h create mode 100644 gcc-4.9/gcc/config/darwin.opt create mode 100644 gcc-4.9/gcc/config/darwin10.h create mode 100644 gcc-4.9/gcc/config/darwin9.h create mode 100644 gcc-4.9/gcc/config/dbx.h create mode 100644 gcc-4.9/gcc/config/dbxcoff.h create mode 100644 gcc-4.9/gcc/config/dbxelf.h create mode 100644 gcc-4.9/gcc/config/default-c.c create mode 100644 gcc-4.9/gcc/config/elfos.h create mode 100644 gcc-4.9/gcc/config/epiphany/constraints.md create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany-modes.def create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany-protos.h create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany-sched.md create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.c create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.h create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.md create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany.opt create mode 100644 gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h create mode 100644 gcc-4.9/gcc/config/epiphany/mode-switch-use.c create mode 100644 gcc-4.9/gcc/config/epiphany/predicates.md create mode 100644 gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c create mode 100644 gcc-4.9/gcc/config/epiphany/t-epiphany create mode 100644 gcc-4.9/gcc/config/flat.h create mode 100644 gcc-4.9/gcc/config/fr30/constraints.md create mode 100644 gcc-4.9/gcc/config/fr30/fr30-protos.h create mode 100644 gcc-4.9/gcc/config/fr30/fr30.c create mode 100644 gcc-4.9/gcc/config/fr30/fr30.h create mode 100644 gcc-4.9/gcc/config/fr30/fr30.md create mode 100644 gcc-4.9/gcc/config/fr30/fr30.opt create mode 100644 gcc-4.9/gcc/config/fr30/predicates.md create mode 100644 gcc-4.9/gcc/config/freebsd-nthr.h create mode 100644 gcc-4.9/gcc/config/freebsd-spec.h create mode 100644 gcc-4.9/gcc/config/freebsd-stdint.h create mode 100644 gcc-4.9/gcc/config/freebsd.h create mode 100644 gcc-4.9/gcc/config/freebsd.opt create mode 100644 gcc-4.9/gcc/config/frv/constraints.md create mode 100644 gcc-4.9/gcc/config/frv/frv-asm.h create mode 100644 gcc-4.9/gcc/config/frv/frv-modes.def create mode 100644 gcc-4.9/gcc/config/frv/frv-opts.h create mode 100644 gcc-4.9/gcc/config/frv/frv-protos.h create mode 100644 gcc-4.9/gcc/config/frv/frv.c create mode 100644 gcc-4.9/gcc/config/frv/frv.h create mode 100644 gcc-4.9/gcc/config/frv/frv.md create mode 100644 gcc-4.9/gcc/config/frv/frv.opt create mode 100644 gcc-4.9/gcc/config/frv/linux.h create mode 100644 gcc-4.9/gcc/config/frv/predicates.md create mode 100644 gcc-4.9/gcc/config/frv/t-frv create mode 100644 gcc-4.9/gcc/config/frv/t-linux create mode 100644 gcc-4.9/gcc/config/fused-madd.opt create mode 100644 gcc-4.9/gcc/config/g.opt create mode 100644 gcc-4.9/gcc/config/glibc-c.c create mode 100644 gcc-4.9/gcc/config/glibc-stdint.h create mode 100644 gcc-4.9/gcc/config/gnu-user.h create mode 100644 gcc-4.9/gcc/config/gnu-user.opt create mode 100644 gcc-4.9/gcc/config/gnu.h create mode 100644 gcc-4.9/gcc/config/h8300/constraints.md create mode 100644 gcc-4.9/gcc/config/h8300/elf.h create mode 100644 gcc-4.9/gcc/config/h8300/genmova.sh create mode 100644 gcc-4.9/gcc/config/h8300/h8300-protos.h create mode 100644 gcc-4.9/gcc/config/h8300/h8300.c create mode 100644 gcc-4.9/gcc/config/h8300/h8300.h create mode 100644 gcc-4.9/gcc/config/h8300/h8300.md create mode 100644 gcc-4.9/gcc/config/h8300/h8300.opt create mode 100644 gcc-4.9/gcc/config/h8300/mova.md create mode 100644 gcc-4.9/gcc/config/h8300/predicates.md create mode 100644 gcc-4.9/gcc/config/h8300/rtems.h create mode 100644 gcc-4.9/gcc/config/h8300/t-h8300 create mode 100644 gcc-4.9/gcc/config/h8300/t-rtems create mode 100644 gcc-4.9/gcc/config/host-darwin.c create mode 100644 gcc-4.9/gcc/config/host-darwin.h create mode 100644 gcc-4.9/gcc/config/host-hpux.c create mode 100644 gcc-4.9/gcc/config/host-linux.c create mode 100644 gcc-4.9/gcc/config/host-openbsd.c create mode 100644 gcc-4.9/gcc/config/host-solaris.c create mode 100644 gcc-4.9/gcc/config/hpux-stdint.h create mode 100644 gcc-4.9/gcc/config/hpux11.opt create mode 100644 gcc-4.9/gcc/config/i386/adxintrin.h create mode 100644 gcc-4.9/gcc/config/i386/ammintrin.h create mode 100644 gcc-4.9/gcc/config/i386/athlon.md create mode 100644 gcc-4.9/gcc/config/i386/atom.md create mode 100644 gcc-4.9/gcc/config/i386/att.h create mode 100644 gcc-4.9/gcc/config/i386/avx2intrin.h create mode 100644 gcc-4.9/gcc/config/i386/avx512cdintrin.h create mode 100644 gcc-4.9/gcc/config/i386/avx512erintrin.h create mode 100644 gcc-4.9/gcc/config/i386/avx512fintrin.h create mode 100644 gcc-4.9/gcc/config/i386/avx512pfintrin.h create mode 100644 gcc-4.9/gcc/config/i386/avxintrin.h create mode 100644 gcc-4.9/gcc/config/i386/avxmath.h create mode 100644 gcc-4.9/gcc/config/i386/bdver1.md create mode 100644 gcc-4.9/gcc/config/i386/bdver3.md create mode 100644 gcc-4.9/gcc/config/i386/biarch64.h create mode 100644 gcc-4.9/gcc/config/i386/biarchx32.h create mode 100644 gcc-4.9/gcc/config/i386/bmi2intrin.h create mode 100644 gcc-4.9/gcc/config/i386/bmiintrin.h create mode 100644 gcc-4.9/gcc/config/i386/bmmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/bsd.h create mode 100644 gcc-4.9/gcc/config/i386/btver2.md create mode 100644 gcc-4.9/gcc/config/i386/constraints.md create mode 100644 gcc-4.9/gcc/config/i386/core2.md create mode 100644 gcc-4.9/gcc/config/i386/cpuid.h create mode 100644 gcc-4.9/gcc/config/i386/cross-stdarg.h create mode 100644 gcc-4.9/gcc/config/i386/crtdll.h create mode 100644 gcc-4.9/gcc/config/i386/cygming.h create mode 100644 gcc-4.9/gcc/config/i386/cygming.opt create mode 100644 gcc-4.9/gcc/config/i386/cygwin-stdint.h create mode 100644 gcc-4.9/gcc/config/i386/cygwin-w64.h create mode 100644 gcc-4.9/gcc/config/i386/cygwin.h create mode 100644 gcc-4.9/gcc/config/i386/darwin.h create mode 100644 gcc-4.9/gcc/config/i386/darwin64.h create mode 100644 gcc-4.9/gcc/config/i386/djgpp-stdint.h create mode 100644 gcc-4.9/gcc/config/i386/djgpp.h create mode 100644 gcc-4.9/gcc/config/i386/djgpp.opt create mode 100644 gcc-4.9/gcc/config/i386/driver-i386.c create mode 100644 gcc-4.9/gcc/config/i386/emmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/f16cintrin.h create mode 100644 gcc-4.9/gcc/config/i386/fma4intrin.h create mode 100644 gcc-4.9/gcc/config/i386/fmaintrin.h create mode 100644 gcc-4.9/gcc/config/i386/freebsd.h create mode 100644 gcc-4.9/gcc/config/i386/freebsd64.h create mode 100644 gcc-4.9/gcc/config/i386/fxsrintrin.h create mode 100644 gcc-4.9/gcc/config/i386/gas.h create mode 100644 gcc-4.9/gcc/config/i386/geode.md create mode 100644 gcc-4.9/gcc/config/i386/gmm_malloc.h create mode 100644 gcc-4.9/gcc/config/i386/gnu-user-common.h create mode 100644 gcc-4.9/gcc/config/i386/gnu-user.h create mode 100644 gcc-4.9/gcc/config/i386/gnu-user64.h create mode 100644 gcc-4.9/gcc/config/i386/gnu.h create mode 100644 gcc-4.9/gcc/config/i386/gstabs.h create mode 100644 gcc-4.9/gcc/config/i386/host-cygwin.c create mode 100644 gcc-4.9/gcc/config/i386/host-i386-darwin.c create mode 100644 gcc-4.9/gcc/config/i386/host-mingw32.c create mode 100644 gcc-4.9/gcc/config/i386/i386-builtin-types.awk create mode 100644 gcc-4.9/gcc/config/i386/i386-builtin-types.def create mode 100644 gcc-4.9/gcc/config/i386/i386-c.c create mode 100644 gcc-4.9/gcc/config/i386/i386-interix.h create mode 100644 gcc-4.9/gcc/config/i386/i386-modes.def create mode 100644 gcc-4.9/gcc/config/i386/i386-opts.h create mode 100644 gcc-4.9/gcc/config/i386/i386-protos.h create mode 100644 gcc-4.9/gcc/config/i386/i386.c create mode 100644 gcc-4.9/gcc/config/i386/i386.h create mode 100644 gcc-4.9/gcc/config/i386/i386.md create mode 100644 gcc-4.9/gcc/config/i386/i386.opt create mode 100644 gcc-4.9/gcc/config/i386/i386elf.h create mode 100644 gcc-4.9/gcc/config/i386/ia32intrin.h create mode 100644 gcc-4.9/gcc/config/i386/immintrin.h create mode 100644 gcc-4.9/gcc/config/i386/interix.opt create mode 100644 gcc-4.9/gcc/config/i386/k6.md create mode 100644 gcc-4.9/gcc/config/i386/kfreebsd-gnu.h create mode 100644 gcc-4.9/gcc/config/i386/kfreebsd-gnu64.h create mode 100644 gcc-4.9/gcc/config/i386/knetbsd-gnu.h create mode 100644 gcc-4.9/gcc/config/i386/kopensolaris-gnu.h create mode 100644 gcc-4.9/gcc/config/i386/linux-common.h create mode 100644 gcc-4.9/gcc/config/i386/linux.h create mode 100644 gcc-4.9/gcc/config/i386/linux64.h create mode 100644 gcc-4.9/gcc/config/i386/lwpintrin.h create mode 100644 gcc-4.9/gcc/config/i386/lynx.h create mode 100644 gcc-4.9/gcc/config/i386/lzcntintrin.h create mode 100644 gcc-4.9/gcc/config/i386/mingw-pthread.h create mode 100644 gcc-4.9/gcc/config/i386/mingw-stdint.h create mode 100644 gcc-4.9/gcc/config/i386/mingw-w64.h create mode 100644 gcc-4.9/gcc/config/i386/mingw-w64.opt create mode 100644 gcc-4.9/gcc/config/i386/mingw.opt create mode 100644 gcc-4.9/gcc/config/i386/mingw32.h create mode 100644 gcc-4.9/gcc/config/i386/mm3dnow.h create mode 100644 gcc-4.9/gcc/config/i386/mmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/mmx.md create mode 100644 gcc-4.9/gcc/config/i386/msformat-c.c create mode 100644 gcc-4.9/gcc/config/i386/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/i386/netbsd64.h create mode 100644 gcc-4.9/gcc/config/i386/nmmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/nto.h create mode 100644 gcc-4.9/gcc/config/i386/nto.opt create mode 100644 gcc-4.9/gcc/config/i386/openbsd.h create mode 100644 gcc-4.9/gcc/config/i386/openbsdelf.h create mode 100644 gcc-4.9/gcc/config/i386/pentium.md create mode 100644 gcc-4.9/gcc/config/i386/pmm_malloc.h create mode 100644 gcc-4.9/gcc/config/i386/pmmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/popcntintrin.h create mode 100644 gcc-4.9/gcc/config/i386/ppro.md create mode 100644 gcc-4.9/gcc/config/i386/predicates.md create mode 100644 gcc-4.9/gcc/config/i386/prfchwintrin.h create mode 100644 gcc-4.9/gcc/config/i386/rdos.h create mode 100644 gcc-4.9/gcc/config/i386/rdos64.h create mode 100644 gcc-4.9/gcc/config/i386/rdseedintrin.h create mode 100644 gcc-4.9/gcc/config/i386/rtemself.h create mode 100644 gcc-4.9/gcc/config/i386/rtmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/shaintrin.h create mode 100644 gcc-4.9/gcc/config/i386/slm.md create mode 100644 gcc-4.9/gcc/config/i386/smmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/sol2-9.h create mode 100644 gcc-4.9/gcc/config/i386/sol2-bi.h create mode 100644 gcc-4.9/gcc/config/i386/sol2.h create mode 100644 gcc-4.9/gcc/config/i386/sse.md create mode 100644 gcc-4.9/gcc/config/i386/ssemath.h create mode 100644 gcc-4.9/gcc/config/i386/stringop.def create mode 100644 gcc-4.9/gcc/config/i386/stringop.opt create mode 100644 gcc-4.9/gcc/config/i386/subst.md create mode 100644 gcc-4.9/gcc/config/i386/sync.md create mode 100644 gcc-4.9/gcc/config/i386/sysv4.h create mode 100644 gcc-4.9/gcc/config/i386/t-cygming create mode 100644 gcc-4.9/gcc/config/i386/t-cygwin-w64 create mode 100644 gcc-4.9/gcc/config/i386/t-darwin create mode 100644 gcc-4.9/gcc/config/i386/t-darwin64 create mode 100644 gcc-4.9/gcc/config/i386/t-gmm_malloc create mode 100644 gcc-4.9/gcc/config/i386/t-gnu create mode 100644 gcc-4.9/gcc/config/i386/t-i386 create mode 100644 gcc-4.9/gcc/config/i386/t-interix create mode 100644 gcc-4.9/gcc/config/i386/t-kfreebsd create mode 100644 gcc-4.9/gcc/config/i386/t-linux create mode 100644 gcc-4.9/gcc/config/i386/t-linux64 create mode 100644 gcc-4.9/gcc/config/i386/t-mingw-w32 create mode 100644 gcc-4.9/gcc/config/i386/t-mingw-w64 create mode 100644 gcc-4.9/gcc/config/i386/t-openbsd create mode 100644 gcc-4.9/gcc/config/i386/t-pmm_malloc create mode 100644 gcc-4.9/gcc/config/i386/t-rtems create mode 100644 gcc-4.9/gcc/config/i386/t-sol2-64 create mode 100644 gcc-4.9/gcc/config/i386/t-vxworks create mode 100644 gcc-4.9/gcc/config/i386/t-vxworksae create mode 100644 gcc-4.9/gcc/config/i386/tbmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/tmmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/unix.h create mode 100644 gcc-4.9/gcc/config/i386/vx-common.h create mode 100644 gcc-4.9/gcc/config/i386/vxworks.h create mode 100644 gcc-4.9/gcc/config/i386/vxworksae.h create mode 100644 gcc-4.9/gcc/config/i386/winnt-cxx.c create mode 100644 gcc-4.9/gcc/config/i386/winnt-stubs.c create mode 100644 gcc-4.9/gcc/config/i386/winnt.c create mode 100644 gcc-4.9/gcc/config/i386/wmmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/x-cygwin create mode 100644 gcc-4.9/gcc/config/i386/x-darwin create mode 100644 gcc-4.9/gcc/config/i386/x-i386 create mode 100644 gcc-4.9/gcc/config/i386/x-mingw32 create mode 100644 gcc-4.9/gcc/config/i386/x86-64.h create mode 100644 gcc-4.9/gcc/config/i386/x86-tune.def create mode 100644 gcc-4.9/gcc/config/i386/x86intrin.h create mode 100644 gcc-4.9/gcc/config/i386/xm-cygwin.h create mode 100644 gcc-4.9/gcc/config/i386/xm-djgpp.h create mode 100644 gcc-4.9/gcc/config/i386/xm-mingw32.h create mode 100644 gcc-4.9/gcc/config/i386/xmmintrin.h create mode 100644 gcc-4.9/gcc/config/i386/xopintrin.h create mode 100644 gcc-4.9/gcc/config/i386/xsaveintrin.h create mode 100644 gcc-4.9/gcc/config/i386/xsaveoptintrin.h create mode 100644 gcc-4.9/gcc/config/i386/xtestintrin.h create mode 100644 gcc-4.9/gcc/config/ia64/constraints.md create mode 100644 gcc-4.9/gcc/config/ia64/div.md create mode 100644 gcc-4.9/gcc/config/ia64/elf.h create mode 100644 gcc-4.9/gcc/config/ia64/freebsd.h create mode 100644 gcc-4.9/gcc/config/ia64/hpux-unix2003.h create mode 100644 gcc-4.9/gcc/config/ia64/hpux.h create mode 100644 gcc-4.9/gcc/config/ia64/ia64-c.c create mode 100644 gcc-4.9/gcc/config/ia64/ia64-modes.def create mode 100644 gcc-4.9/gcc/config/ia64/ia64-opts.h create mode 100644 gcc-4.9/gcc/config/ia64/ia64-protos.h create mode 100644 gcc-4.9/gcc/config/ia64/ia64.c create mode 100644 gcc-4.9/gcc/config/ia64/ia64.h create mode 100644 gcc-4.9/gcc/config/ia64/ia64.md create mode 100644 gcc-4.9/gcc/config/ia64/ia64.opt create mode 100644 gcc-4.9/gcc/config/ia64/ia64intrin.h create mode 100644 gcc-4.9/gcc/config/ia64/ilp32.opt create mode 100644 gcc-4.9/gcc/config/ia64/itanium2.md create mode 100644 gcc-4.9/gcc/config/ia64/linux.h create mode 100644 gcc-4.9/gcc/config/ia64/predicates.md create mode 100644 gcc-4.9/gcc/config/ia64/sync.md create mode 100644 gcc-4.9/gcc/config/ia64/sysv4.h create mode 100644 gcc-4.9/gcc/config/ia64/t-hpux create mode 100644 gcc-4.9/gcc/config/ia64/t-ia64 create mode 100644 gcc-4.9/gcc/config/ia64/t-linux create mode 100644 gcc-4.9/gcc/config/ia64/vect.md create mode 100644 gcc-4.9/gcc/config/ia64/vms.h create mode 100644 gcc-4.9/gcc/config/ia64/vms.opt create mode 100644 gcc-4.9/gcc/config/initfini-array.h create mode 100644 gcc-4.9/gcc/config/iq2000/abi create mode 100644 gcc-4.9/gcc/config/iq2000/constraints.md create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000-opts.h create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000-protos.h create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.c create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.h create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.md create mode 100644 gcc-4.9/gcc/config/iq2000/iq2000.opt create mode 100644 gcc-4.9/gcc/config/iq2000/predicates.md create mode 100644 gcc-4.9/gcc/config/kfreebsd-gnu.h create mode 100644 gcc-4.9/gcc/config/knetbsd-gnu.h create mode 100644 gcc-4.9/gcc/config/kopensolaris-gnu.h create mode 100644 gcc-4.9/gcc/config/linux-android.h create mode 100644 gcc-4.9/gcc/config/linux-android.opt create mode 100644 gcc-4.9/gcc/config/linux-protos.h create mode 100644 gcc-4.9/gcc/config/linux.c create mode 100644 gcc-4.9/gcc/config/linux.h create mode 100644 gcc-4.9/gcc/config/linux.opt create mode 100644 gcc-4.9/gcc/config/lm32/constraints.md create mode 100644 gcc-4.9/gcc/config/lm32/lm32-protos.h create mode 100644 gcc-4.9/gcc/config/lm32/lm32.c create mode 100644 gcc-4.9/gcc/config/lm32/lm32.h create mode 100644 gcc-4.9/gcc/config/lm32/lm32.md create mode 100644 gcc-4.9/gcc/config/lm32/lm32.opt create mode 100644 gcc-4.9/gcc/config/lm32/predicates.md create mode 100644 gcc-4.9/gcc/config/lm32/rtems.h create mode 100644 gcc-4.9/gcc/config/lm32/t-lm32 create mode 100644 gcc-4.9/gcc/config/lm32/t-rtems create mode 100644 gcc-4.9/gcc/config/lm32/uclinux-elf.h create mode 100644 gcc-4.9/gcc/config/lynx.h create mode 100644 gcc-4.9/gcc/config/lynx.opt create mode 100644 gcc-4.9/gcc/config/m32c/addsub.md create mode 100644 gcc-4.9/gcc/config/m32c/bitops.md create mode 100644 gcc-4.9/gcc/config/m32c/blkmov.md create mode 100644 gcc-4.9/gcc/config/m32c/cond.md create mode 100644 gcc-4.9/gcc/config/m32c/constraints.md create mode 100644 gcc-4.9/gcc/config/m32c/jump.md create mode 100644 gcc-4.9/gcc/config/m32c/m32c-modes.def create mode 100644 gcc-4.9/gcc/config/m32c/m32c-pragma.c create mode 100644 gcc-4.9/gcc/config/m32c/m32c-protos.h create mode 100644 gcc-4.9/gcc/config/m32c/m32c.abi create mode 100644 gcc-4.9/gcc/config/m32c/m32c.c create mode 100644 gcc-4.9/gcc/config/m32c/m32c.h create mode 100644 gcc-4.9/gcc/config/m32c/m32c.md create mode 100644 gcc-4.9/gcc/config/m32c/m32c.opt create mode 100644 gcc-4.9/gcc/config/m32c/minmax.md create mode 100644 gcc-4.9/gcc/config/m32c/mov.md create mode 100644 gcc-4.9/gcc/config/m32c/muldiv.md create mode 100644 gcc-4.9/gcc/config/m32c/predicates.md create mode 100644 gcc-4.9/gcc/config/m32c/prologue.md create mode 100644 gcc-4.9/gcc/config/m32c/rtems.h create mode 100644 gcc-4.9/gcc/config/m32c/shift.md create mode 100644 gcc-4.9/gcc/config/m32c/t-m32c create mode 100644 gcc-4.9/gcc/config/m32r/constraints.md create mode 100644 gcc-4.9/gcc/config/m32r/linux.h create mode 100644 gcc-4.9/gcc/config/m32r/little.h create mode 100644 gcc-4.9/gcc/config/m32r/m32r-opts.h create mode 100644 gcc-4.9/gcc/config/m32r/m32r-protos.h create mode 100644 gcc-4.9/gcc/config/m32r/m32r.c create mode 100644 gcc-4.9/gcc/config/m32r/m32r.h create mode 100644 gcc-4.9/gcc/config/m32r/m32r.md create mode 100644 gcc-4.9/gcc/config/m32r/m32r.opt create mode 100644 gcc-4.9/gcc/config/m32r/predicates.md create mode 100644 gcc-4.9/gcc/config/m32r/rtems.h create mode 100644 gcc-4.9/gcc/config/m32r/t-linux create mode 100644 gcc-4.9/gcc/config/m32r/t-m32r create mode 100644 gcc-4.9/gcc/config/m68k/cf.md create mode 100644 gcc-4.9/gcc/config/m68k/constraints.md create mode 100755 gcc-4.9/gcc/config/m68k/genopt.sh create mode 100644 gcc-4.9/gcc/config/m68k/ieee.opt create mode 100644 gcc-4.9/gcc/config/m68k/linux.h create mode 100644 gcc-4.9/gcc/config/m68k/m68020-elf.h create mode 100644 gcc-4.9/gcc/config/m68k/m68k-devices.def create mode 100644 gcc-4.9/gcc/config/m68k/m68k-isas.def create mode 100644 gcc-4.9/gcc/config/m68k/m68k-microarchs.def create mode 100644 gcc-4.9/gcc/config/m68k/m68k-modes.def create mode 100644 gcc-4.9/gcc/config/m68k/m68k-none.h create mode 100644 gcc-4.9/gcc/config/m68k/m68k-opts.h create mode 100644 gcc-4.9/gcc/config/m68k/m68k-protos.h create mode 100644 gcc-4.9/gcc/config/m68k/m68k-tables.opt create mode 100644 gcc-4.9/gcc/config/m68k/m68k.c create mode 100644 gcc-4.9/gcc/config/m68k/m68k.h create mode 100644 gcc-4.9/gcc/config/m68k/m68k.md create mode 100644 gcc-4.9/gcc/config/m68k/m68k.opt create mode 100644 gcc-4.9/gcc/config/m68k/m68kelf.h create mode 100644 gcc-4.9/gcc/config/m68k/m68kemb.h create mode 100644 gcc-4.9/gcc/config/m68k/math-68881.h create mode 100644 gcc-4.9/gcc/config/m68k/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/m68k/openbsd.h create mode 100644 gcc-4.9/gcc/config/m68k/predicates.md create mode 100644 gcc-4.9/gcc/config/m68k/print-sysroot-suffix.sh create mode 100644 gcc-4.9/gcc/config/m68k/rtemself.h create mode 100644 gcc-4.9/gcc/config/m68k/sync.md create mode 100644 gcc-4.9/gcc/config/m68k/t-cf create mode 100644 gcc-4.9/gcc/config/m68k/t-linux create mode 100644 gcc-4.9/gcc/config/m68k/t-m68k create mode 100644 gcc-4.9/gcc/config/m68k/t-m68kbare create mode 100644 gcc-4.9/gcc/config/m68k/t-mlibs create mode 100644 gcc-4.9/gcc/config/m68k/t-openbsd create mode 100644 gcc-4.9/gcc/config/m68k/t-opts create mode 100644 gcc-4.9/gcc/config/m68k/t-rtems create mode 100644 gcc-4.9/gcc/config/m68k/t-uclinux create mode 100644 gcc-4.9/gcc/config/m68k/uclinux.h create mode 100644 gcc-4.9/gcc/config/m68k/uclinux.opt create mode 100644 gcc-4.9/gcc/config/mcore/constraints.md create mode 100644 gcc-4.9/gcc/config/mcore/mcore-elf.h create mode 100644 gcc-4.9/gcc/config/mcore/mcore-protos.h create mode 100644 gcc-4.9/gcc/config/mcore/mcore.c create mode 100644 gcc-4.9/gcc/config/mcore/mcore.h create mode 100644 gcc-4.9/gcc/config/mcore/mcore.md create mode 100644 gcc-4.9/gcc/config/mcore/mcore.opt create mode 100644 gcc-4.9/gcc/config/mcore/predicates.md create mode 100644 gcc-4.9/gcc/config/mcore/t-mcore create mode 100644 gcc-4.9/gcc/config/mep/constraints.md create mode 100644 gcc-4.9/gcc/config/mep/default.h create mode 100644 gcc-4.9/gcc/config/mep/intrinsics.h create mode 100644 gcc-4.9/gcc/config/mep/intrinsics.md create mode 100644 gcc-4.9/gcc/config/mep/ivc2-template.h create mode 100644 gcc-4.9/gcc/config/mep/mep-c5.cpu create mode 100644 gcc-4.9/gcc/config/mep/mep-core.cpu create mode 100644 gcc-4.9/gcc/config/mep/mep-default.cpu create mode 100644 gcc-4.9/gcc/config/mep/mep-ext-cop.cpu create mode 100644 gcc-4.9/gcc/config/mep/mep-intrin.h create mode 100644 gcc-4.9/gcc/config/mep/mep-ivc2.cpu create mode 100644 gcc-4.9/gcc/config/mep/mep-pragma.c create mode 100644 gcc-4.9/gcc/config/mep/mep-protos.h create mode 100644 gcc-4.9/gcc/config/mep/mep.c create mode 100644 gcc-4.9/gcc/config/mep/mep.cpu create mode 100644 gcc-4.9/gcc/config/mep/mep.h create mode 100644 gcc-4.9/gcc/config/mep/mep.md create mode 100644 gcc-4.9/gcc/config/mep/mep.opt create mode 100644 gcc-4.9/gcc/config/mep/predicates.md create mode 100644 gcc-4.9/gcc/config/mep/t-mep create mode 100644 gcc-4.9/gcc/config/microblaze/constraints.md create mode 100644 gcc-4.9/gcc/config/microblaze/linux.h create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze-c.c create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze-protos.h create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.c create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.h create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.md create mode 100644 gcc-4.9/gcc/config/microblaze/microblaze.opt create mode 100644 gcc-4.9/gcc/config/microblaze/predicates.md create mode 100644 gcc-4.9/gcc/config/microblaze/rtems.h create mode 100644 gcc-4.9/gcc/config/microblaze/sync.md create mode 100644 gcc-4.9/gcc/config/microblaze/t-microblaze create mode 100644 gcc-4.9/gcc/config/microblaze/t-microblaze-linux create mode 100644 gcc-4.9/gcc/config/microblaze/t-rtems create mode 100644 gcc-4.9/gcc/config/mips/10000.md create mode 100644 gcc-4.9/gcc/config/mips/20kc.md create mode 100644 gcc-4.9/gcc/config/mips/24k.md create mode 100644 gcc-4.9/gcc/config/mips/3000.md create mode 100644 gcc-4.9/gcc/config/mips/4000.md create mode 100644 gcc-4.9/gcc/config/mips/4100.md create mode 100644 gcc-4.9/gcc/config/mips/4130.md create mode 100644 gcc-4.9/gcc/config/mips/4300.md create mode 100644 gcc-4.9/gcc/config/mips/4600.md create mode 100644 gcc-4.9/gcc/config/mips/4k.md create mode 100644 gcc-4.9/gcc/config/mips/5000.md create mode 100644 gcc-4.9/gcc/config/mips/5400.md create mode 100644 gcc-4.9/gcc/config/mips/5500.md create mode 100644 gcc-4.9/gcc/config/mips/5k.md create mode 100644 gcc-4.9/gcc/config/mips/6000.md create mode 100644 gcc-4.9/gcc/config/mips/7000.md create mode 100644 gcc-4.9/gcc/config/mips/74k.md create mode 100644 gcc-4.9/gcc/config/mips/9000.md create mode 100644 gcc-4.9/gcc/config/mips/constraints.md create mode 100644 gcc-4.9/gcc/config/mips/driver-native.c create mode 100644 gcc-4.9/gcc/config/mips/elf.h create mode 100644 gcc-4.9/gcc/config/mips/elfoabi.h create mode 100644 gcc-4.9/gcc/config/mips/elforion.h create mode 100644 gcc-4.9/gcc/config/mips/generic.md create mode 100755 gcc-4.9/gcc/config/mips/genopt.sh create mode 100644 gcc-4.9/gcc/config/mips/gnu-user.h create mode 100644 gcc-4.9/gcc/config/mips/gnu-user64.h create mode 100644 gcc-4.9/gcc/config/mips/linux-common.h create mode 100644 gcc-4.9/gcc/config/mips/linux.h create mode 100644 gcc-4.9/gcc/config/mips/linux64.h create mode 100644 gcc-4.9/gcc/config/mips/loongson.h create mode 100644 gcc-4.9/gcc/config/mips/loongson.md create mode 100644 gcc-4.9/gcc/config/mips/loongson2ef.md create mode 100644 gcc-4.9/gcc/config/mips/loongson3a.md create mode 100644 gcc-4.9/gcc/config/mips/micromips.md create mode 100644 gcc-4.9/gcc/config/mips/mips-cpus.def create mode 100644 gcc-4.9/gcc/config/mips/mips-dsp.md create mode 100644 gcc-4.9/gcc/config/mips/mips-dspr2.md create mode 100644 gcc-4.9/gcc/config/mips/mips-fixed.md create mode 100644 gcc-4.9/gcc/config/mips/mips-ftypes.def create mode 100644 gcc-4.9/gcc/config/mips/mips-modes.def create mode 100644 gcc-4.9/gcc/config/mips/mips-opts.h create mode 100644 gcc-4.9/gcc/config/mips/mips-protos.h create mode 100644 gcc-4.9/gcc/config/mips/mips-ps-3d.md create mode 100644 gcc-4.9/gcc/config/mips/mips-tables.opt create mode 100644 gcc-4.9/gcc/config/mips/mips.c create mode 100644 gcc-4.9/gcc/config/mips/mips.h create mode 100644 gcc-4.9/gcc/config/mips/mips.md create mode 100644 gcc-4.9/gcc/config/mips/mips.opt create mode 100644 gcc-4.9/gcc/config/mips/mti-elf.h create mode 100644 gcc-4.9/gcc/config/mips/mti-linux.h create mode 100644 gcc-4.9/gcc/config/mips/n32-elf.h create mode 100644 gcc-4.9/gcc/config/mips/netbsd.h create mode 100644 gcc-4.9/gcc/config/mips/octeon.md create mode 100644 gcc-4.9/gcc/config/mips/predicates.md create mode 100644 gcc-4.9/gcc/config/mips/r3900.h create mode 100644 gcc-4.9/gcc/config/mips/rtems.h create mode 100644 gcc-4.9/gcc/config/mips/sb1.md create mode 100644 gcc-4.9/gcc/config/mips/sde.h create mode 100644 gcc-4.9/gcc/config/mips/sde.opt create mode 100644 gcc-4.9/gcc/config/mips/sdemtk.h create mode 100644 gcc-4.9/gcc/config/mips/sr71k.md create mode 100644 gcc-4.9/gcc/config/mips/st.h create mode 100644 gcc-4.9/gcc/config/mips/sync.md create mode 100644 gcc-4.9/gcc/config/mips/t-elf create mode 100644 gcc-4.9/gcc/config/mips/t-irix6 create mode 100644 gcc-4.9/gcc/config/mips/t-isa3264 create mode 100644 gcc-4.9/gcc/config/mips/t-linux64 create mode 100644 gcc-4.9/gcc/config/mips/t-mips create mode 100644 gcc-4.9/gcc/config/mips/t-mti-elf create mode 100644 gcc-4.9/gcc/config/mips/t-mti-linux create mode 100644 gcc-4.9/gcc/config/mips/t-r3900 create mode 100644 gcc-4.9/gcc/config/mips/t-rtems create mode 100644 gcc-4.9/gcc/config/mips/t-sb1 create mode 100644 gcc-4.9/gcc/config/mips/t-sde create mode 100644 gcc-4.9/gcc/config/mips/t-sdemtk create mode 100644 gcc-4.9/gcc/config/mips/t-sr71k create mode 100644 gcc-4.9/gcc/config/mips/t-st create mode 100644 gcc-4.9/gcc/config/mips/t-vr create mode 100644 gcc-4.9/gcc/config/mips/t-vxworks create mode 100644 gcc-4.9/gcc/config/mips/vr.h create mode 100644 gcc-4.9/gcc/config/mips/vxworks.h create mode 100644 gcc-4.9/gcc/config/mips/x-native create mode 100644 gcc-4.9/gcc/config/mips/xlp.md create mode 100644 gcc-4.9/gcc/config/mips/xlr.md create mode 100644 gcc-4.9/gcc/config/mmix/constraints.md create mode 100644 gcc-4.9/gcc/config/mmix/mmix-modes.def create mode 100644 gcc-4.9/gcc/config/mmix/mmix-protos.h create mode 100644 gcc-4.9/gcc/config/mmix/mmix.c create mode 100644 gcc-4.9/gcc/config/mmix/mmix.h create mode 100644 gcc-4.9/gcc/config/mmix/mmix.md create mode 100644 gcc-4.9/gcc/config/mmix/mmix.opt create mode 100644 gcc-4.9/gcc/config/mmix/predicates.md create mode 100644 gcc-4.9/gcc/config/mmix/t-mmix create mode 100644 gcc-4.9/gcc/config/mn10300/constraints.md create mode 100644 gcc-4.9/gcc/config/mn10300/linux.h create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300-modes.def create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300-opts.h create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300-protos.h create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.c create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.h create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.md create mode 100644 gcc-4.9/gcc/config/mn10300/mn10300.opt create mode 100644 gcc-4.9/gcc/config/mn10300/predicates.md create mode 100644 gcc-4.9/gcc/config/mn10300/t-mn10300 create mode 100644 gcc-4.9/gcc/config/moxie/constraints.md create mode 100644 gcc-4.9/gcc/config/moxie/moxie-protos.h create mode 100644 gcc-4.9/gcc/config/moxie/moxie.c create mode 100644 gcc-4.9/gcc/config/moxie/moxie.h create mode 100644 gcc-4.9/gcc/config/moxie/moxie.md create mode 100644 gcc-4.9/gcc/config/moxie/moxie.opt create mode 100644 gcc-4.9/gcc/config/moxie/predicates.md create mode 100644 gcc-4.9/gcc/config/moxie/rtems.h create mode 100644 gcc-4.9/gcc/config/moxie/t-moxie create mode 100644 gcc-4.9/gcc/config/moxie/uclinux.h create mode 100644 gcc-4.9/gcc/config/msp430/README.txt create mode 100644 gcc-4.9/gcc/config/msp430/constraints.md create mode 100644 gcc-4.9/gcc/config/msp430/msp430-c.c create mode 100644 gcc-4.9/gcc/config/msp430/msp430-modes.def create mode 100644 gcc-4.9/gcc/config/msp430/msp430-protos.h create mode 100644 gcc-4.9/gcc/config/msp430/msp430.c create mode 100644 gcc-4.9/gcc/config/msp430/msp430.h create mode 100644 gcc-4.9/gcc/config/msp430/msp430.md create mode 100644 gcc-4.9/gcc/config/msp430/msp430.opt create mode 100644 gcc-4.9/gcc/config/msp430/predicates.md create mode 100644 gcc-4.9/gcc/config/msp430/t-msp430 create mode 100644 gcc-4.9/gcc/config/nds32/constants.md create mode 100644 gcc-4.9/gcc/config/nds32/constraints.md create mode 100644 gcc-4.9/gcc/config/nds32/iterators.md create mode 100644 gcc-4.9/gcc/config/nds32/nds32-doubleword.md create mode 100644 gcc-4.9/gcc/config/nds32/nds32-intrinsic.md create mode 100644 gcc-4.9/gcc/config/nds32/nds32-modes.def create mode 100644 gcc-4.9/gcc/config/nds32/nds32-multiple.md create mode 100644 gcc-4.9/gcc/config/nds32/nds32-opts.h create mode 100644 gcc-4.9/gcc/config/nds32/nds32-peephole2.md create mode 100644 gcc-4.9/gcc/config/nds32/nds32-protos.h create mode 100644 gcc-4.9/gcc/config/nds32/nds32.c create mode 100644 gcc-4.9/gcc/config/nds32/nds32.h create mode 100644 gcc-4.9/gcc/config/nds32/nds32.md create mode 100644 gcc-4.9/gcc/config/nds32/nds32.opt create mode 100644 gcc-4.9/gcc/config/nds32/nds32_intrinsic.h create mode 100644 gcc-4.9/gcc/config/nds32/pipelines.md create mode 100644 gcc-4.9/gcc/config/nds32/predicates.md create mode 100644 gcc-4.9/gcc/config/nds32/t-mlibs create mode 100644 gcc-4.9/gcc/config/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/netbsd-elf.opt create mode 100644 gcc-4.9/gcc/config/netbsd.h create mode 100644 gcc-4.9/gcc/config/netbsd.opt create mode 100644 gcc-4.9/gcc/config/newlib-stdint.h create mode 100644 gcc-4.9/gcc/config/nios2/constraints.md create mode 100644 gcc-4.9/gcc/config/nios2/elf.h create mode 100644 gcc-4.9/gcc/config/nios2/elf.opt create mode 100644 gcc-4.9/gcc/config/nios2/linux.h create mode 100644 gcc-4.9/gcc/config/nios2/nios2-opts.h create mode 100644 gcc-4.9/gcc/config/nios2/nios2-protos.h create mode 100644 gcc-4.9/gcc/config/nios2/nios2.c create mode 100644 gcc-4.9/gcc/config/nios2/nios2.h create mode 100644 gcc-4.9/gcc/config/nios2/nios2.md create mode 100644 gcc-4.9/gcc/config/nios2/nios2.opt create mode 100644 gcc-4.9/gcc/config/nios2/predicates.md create mode 100644 gcc-4.9/gcc/config/nios2/t-nios2 create mode 100644 gcc-4.9/gcc/config/openbsd-libpthread.h create mode 100644 gcc-4.9/gcc/config/openbsd-oldgas.h create mode 100644 gcc-4.9/gcc/config/openbsd-stdint.h create mode 100644 gcc-4.9/gcc/config/openbsd.h create mode 100644 gcc-4.9/gcc/config/openbsd.opt create mode 100644 gcc-4.9/gcc/config/pa/constraints.md create mode 100644 gcc-4.9/gcc/config/pa/elf.h create mode 100644 gcc-4.9/gcc/config/pa/pa-64.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux.opt create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux10.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux10.opt create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1010.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1010.opt create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux11.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1111.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1111.opt create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1131.h create mode 100644 gcc-4.9/gcc/config/pa/pa-hpux1131.opt create mode 100644 gcc-4.9/gcc/config/pa/pa-linux.h create mode 100644 gcc-4.9/gcc/config/pa/pa-modes.def create mode 100644 gcc-4.9/gcc/config/pa/pa-openbsd.h create mode 100644 gcc-4.9/gcc/config/pa/pa-opts.h create mode 100644 gcc-4.9/gcc/config/pa/pa-protos.h create mode 100644 gcc-4.9/gcc/config/pa/pa.c create mode 100644 gcc-4.9/gcc/config/pa/pa.h create mode 100644 gcc-4.9/gcc/config/pa/pa.md create mode 100644 gcc-4.9/gcc/config/pa/pa.opt create mode 100644 gcc-4.9/gcc/config/pa/pa32-linux.h create mode 100644 gcc-4.9/gcc/config/pa/pa32-openbsd.h create mode 100644 gcc-4.9/gcc/config/pa/pa32-regs.h create mode 100644 gcc-4.9/gcc/config/pa/pa64-hpux.h create mode 100644 gcc-4.9/gcc/config/pa/pa64-hpux.opt create mode 100644 gcc-4.9/gcc/config/pa/pa64-linux.h create mode 100644 gcc-4.9/gcc/config/pa/pa64-regs.h create mode 100644 gcc-4.9/gcc/config/pa/pa64-start.h create mode 100644 gcc-4.9/gcc/config/pa/predicates.md create mode 100644 gcc-4.9/gcc/config/pa/som.h create mode 100644 gcc-4.9/gcc/config/pa/t-dce-thr create mode 100644 gcc-4.9/gcc/config/pa/t-linux create mode 100644 gcc-4.9/gcc/config/pdp11/constraints.md create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11-modes.def create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11-protos.h create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.c create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.h create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.md create mode 100644 gcc-4.9/gcc/config/pdp11/pdp11.opt create mode 100644 gcc-4.9/gcc/config/pdp11/predicates.md create mode 100644 gcc-4.9/gcc/config/pdp11/t-pdp11 create mode 100644 gcc-4.9/gcc/config/picochip/constraints.md create mode 100644 gcc-4.9/gcc/config/picochip/dfa_space.md create mode 100644 gcc-4.9/gcc/config/picochip/dfa_speed.md create mode 100644 gcc-4.9/gcc/config/picochip/picochip-protos.h create mode 100644 gcc-4.9/gcc/config/picochip/picochip.c create mode 100644 gcc-4.9/gcc/config/picochip/picochip.h create mode 100644 gcc-4.9/gcc/config/picochip/picochip.md create mode 100644 gcc-4.9/gcc/config/picochip/picochip.opt create mode 100644 gcc-4.9/gcc/config/picochip/predicates.md create mode 100644 gcc-4.9/gcc/config/picochip/t-picochip create mode 100644 gcc-4.9/gcc/config/print-sysroot-suffix.sh create mode 100644 gcc-4.9/gcc/config/rl78/constraints.md create mode 100644 gcc-4.9/gcc/config/rl78/predicates.md create mode 100644 gcc-4.9/gcc/config/rl78/rl78-c.c create mode 100644 gcc-4.9/gcc/config/rl78/rl78-expand.md create mode 100644 gcc-4.9/gcc/config/rl78/rl78-opts.h create mode 100644 gcc-4.9/gcc/config/rl78/rl78-protos.h create mode 100644 gcc-4.9/gcc/config/rl78/rl78-real.md create mode 100644 gcc-4.9/gcc/config/rl78/rl78-virt.md create mode 100644 gcc-4.9/gcc/config/rl78/rl78.c create mode 100644 gcc-4.9/gcc/config/rl78/rl78.h create mode 100644 gcc-4.9/gcc/config/rl78/rl78.md create mode 100644 gcc-4.9/gcc/config/rl78/rl78.opt create mode 100644 gcc-4.9/gcc/config/rl78/t-rl78 create mode 100644 gcc-4.9/gcc/config/rpath.opt create mode 100644 gcc-4.9/gcc/config/rs6000/40x.md create mode 100644 gcc-4.9/gcc/config/rs6000/440.md create mode 100644 gcc-4.9/gcc/config/rs6000/476.h create mode 100644 gcc-4.9/gcc/config/rs6000/476.md create mode 100644 gcc-4.9/gcc/config/rs6000/476.opt create mode 100644 gcc-4.9/gcc/config/rs6000/601.md create mode 100644 gcc-4.9/gcc/config/rs6000/603.md create mode 100644 gcc-4.9/gcc/config/rs6000/6xx.md create mode 100644 gcc-4.9/gcc/config/rs6000/7450.md create mode 100644 gcc-4.9/gcc/config/rs6000/750cl.h create mode 100644 gcc-4.9/gcc/config/rs6000/7xx.md create mode 100644 gcc-4.9/gcc/config/rs6000/8540.md create mode 100644 gcc-4.9/gcc/config/rs6000/a2.md create mode 100644 gcc-4.9/gcc/config/rs6000/aix-stdint.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix43.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix51.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix52.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix53.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix61.h create mode 100644 gcc-4.9/gcc/config/rs6000/aix64.opt create mode 100644 gcc-4.9/gcc/config/rs6000/altivec.h create mode 100644 gcc-4.9/gcc/config/rs6000/altivec.md create mode 100644 gcc-4.9/gcc/config/rs6000/biarch64.h create mode 100644 gcc-4.9/gcc/config/rs6000/cell.md create mode 100644 gcc-4.9/gcc/config/rs6000/constraints.md create mode 100644 gcc-4.9/gcc/config/rs6000/crypto.md create mode 100644 gcc-4.9/gcc/config/rs6000/darwin.h create mode 100644 gcc-4.9/gcc/config/rs6000/darwin.md create mode 100644 gcc-4.9/gcc/config/rs6000/darwin.opt create mode 100644 gcc-4.9/gcc/config/rs6000/darwin64.h create mode 100644 gcc-4.9/gcc/config/rs6000/darwin7.h create mode 100644 gcc-4.9/gcc/config/rs6000/darwin8.h create mode 100644 gcc-4.9/gcc/config/rs6000/default64.h create mode 100644 gcc-4.9/gcc/config/rs6000/dfp.md create mode 100644 gcc-4.9/gcc/config/rs6000/driver-rs6000.c create mode 100644 gcc-4.9/gcc/config/rs6000/e300c2c3.md create mode 100644 gcc-4.9/gcc/config/rs6000/e500.h create mode 100644 gcc-4.9/gcc/config/rs6000/e500mc.md create mode 100644 gcc-4.9/gcc/config/rs6000/e500mc64.md create mode 100644 gcc-4.9/gcc/config/rs6000/e5500.md create mode 100644 gcc-4.9/gcc/config/rs6000/e6500.md create mode 100644 gcc-4.9/gcc/config/rs6000/eabi.h create mode 100644 gcc-4.9/gcc/config/rs6000/eabialtivec.h create mode 100644 gcc-4.9/gcc/config/rs6000/eabisim.h create mode 100644 gcc-4.9/gcc/config/rs6000/eabispe.h create mode 100644 gcc-4.9/gcc/config/rs6000/freebsd.h create mode 100644 gcc-4.9/gcc/config/rs6000/freebsd64.h create mode 100755 gcc-4.9/gcc/config/rs6000/genopt.sh create mode 100644 gcc-4.9/gcc/config/rs6000/host-darwin.c create mode 100644 gcc-4.9/gcc/config/rs6000/host-ppc64-darwin.c create mode 100644 gcc-4.9/gcc/config/rs6000/htm.md create mode 100644 gcc-4.9/gcc/config/rs6000/htmintrin.h create mode 100644 gcc-4.9/gcc/config/rs6000/htmxlintrin.h create mode 100644 gcc-4.9/gcc/config/rs6000/linux.h create mode 100644 gcc-4.9/gcc/config/rs6000/linux64.h create mode 100644 gcc-4.9/gcc/config/rs6000/linux64.opt create mode 100644 gcc-4.9/gcc/config/rs6000/linuxaltivec.h create mode 100644 gcc-4.9/gcc/config/rs6000/linuxspe.h create mode 100644 gcc-4.9/gcc/config/rs6000/lynx.h create mode 100644 gcc-4.9/gcc/config/rs6000/milli.exp create mode 100644 gcc-4.9/gcc/config/rs6000/mpc.md create mode 100644 gcc-4.9/gcc/config/rs6000/netbsd.h create mode 100644 gcc-4.9/gcc/config/rs6000/option-defaults.h create mode 100644 gcc-4.9/gcc/config/rs6000/paired.h create mode 100644 gcc-4.9/gcc/config/rs6000/paired.md create mode 100644 gcc-4.9/gcc/config/rs6000/power4.md create mode 100644 gcc-4.9/gcc/config/rs6000/power5.md create mode 100644 gcc-4.9/gcc/config/rs6000/power6.md create mode 100644 gcc-4.9/gcc/config/rs6000/power7.md create mode 100644 gcc-4.9/gcc/config/rs6000/power8.md create mode 100644 gcc-4.9/gcc/config/rs6000/ppc-asm.h create mode 100644 gcc-4.9/gcc/config/rs6000/ppu_intrinsics.h create mode 100644 gcc-4.9/gcc/config/rs6000/predicates.md create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-builtin.def create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-c.c create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-cpus.def create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-linux.c create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-modes.def create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-opts.h create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-protos.h create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000-tables.opt create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.c create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.h create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.md create mode 100644 gcc-4.9/gcc/config/rs6000/rs6000.opt create mode 100644 gcc-4.9/gcc/config/rs6000/rs64.md create mode 100644 gcc-4.9/gcc/config/rs6000/rtems.h create mode 100644 gcc-4.9/gcc/config/rs6000/secureplt.h create mode 100644 gcc-4.9/gcc/config/rs6000/si2vmx.h create mode 100644 gcc-4.9/gcc/config/rs6000/singlefp.h create mode 100644 gcc-4.9/gcc/config/rs6000/spe.h create mode 100644 gcc-4.9/gcc/config/rs6000/spe.md create mode 100644 gcc-4.9/gcc/config/rs6000/spu2vmx.h create mode 100644 gcc-4.9/gcc/config/rs6000/sync.md create mode 100644 gcc-4.9/gcc/config/rs6000/sysv4.h create mode 100644 gcc-4.9/gcc/config/rs6000/sysv4.opt create mode 100644 gcc-4.9/gcc/config/rs6000/sysv4le.h create mode 100644 gcc-4.9/gcc/config/rs6000/t-aix43 create mode 100644 gcc-4.9/gcc/config/rs6000/t-aix52 create mode 100644 gcc-4.9/gcc/config/rs6000/t-darwin64 create mode 100644 gcc-4.9/gcc/config/rs6000/t-darwin8 create mode 100644 gcc-4.9/gcc/config/rs6000/t-fprules create mode 100644 gcc-4.9/gcc/config/rs6000/t-freebsd64 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64 create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64bele create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64le create mode 100644 gcc-4.9/gcc/config/rs6000/t-linux64lebe create mode 100644 gcc-4.9/gcc/config/rs6000/t-lynx create mode 100644 gcc-4.9/gcc/config/rs6000/t-netbsd create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppccomm create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppcendian create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppcgas create mode 100644 gcc-4.9/gcc/config/rs6000/t-ppcos create mode 100644 gcc-4.9/gcc/config/rs6000/t-rs6000 create mode 100644 gcc-4.9/gcc/config/rs6000/t-rtems create mode 100644 gcc-4.9/gcc/config/rs6000/t-spe create mode 100644 gcc-4.9/gcc/config/rs6000/t-vxworks create mode 100644 gcc-4.9/gcc/config/rs6000/t-vxworksae create mode 100644 gcc-4.9/gcc/config/rs6000/t-xilinx create mode 100644 gcc-4.9/gcc/config/rs6000/titan.md create mode 100644 gcc-4.9/gcc/config/rs6000/vec_types.h create mode 100644 gcc-4.9/gcc/config/rs6000/vector.md create mode 100644 gcc-4.9/gcc/config/rs6000/vsx.md create mode 100644 gcc-4.9/gcc/config/rs6000/vxworks.h create mode 100644 gcc-4.9/gcc/config/rs6000/x-aix create mode 100644 gcc-4.9/gcc/config/rs6000/x-darwin create mode 100644 gcc-4.9/gcc/config/rs6000/x-darwin64 create mode 100644 gcc-4.9/gcc/config/rs6000/x-linux-relax create mode 100644 gcc-4.9/gcc/config/rs6000/x-rs6000 create mode 100644 gcc-4.9/gcc/config/rs6000/xcoff.h create mode 100644 gcc-4.9/gcc/config/rs6000/xfpu.h create mode 100644 gcc-4.9/gcc/config/rs6000/xfpu.md create mode 100644 gcc-4.9/gcc/config/rs6000/xilinx.h create mode 100644 gcc-4.9/gcc/config/rs6000/xilinx.opt create mode 100644 gcc-4.9/gcc/config/rtems.h create mode 100644 gcc-4.9/gcc/config/rtems.opt create mode 100644 gcc-4.9/gcc/config/rx/constraints.md create mode 100644 gcc-4.9/gcc/config/rx/predicates.md create mode 100644 gcc-4.9/gcc/config/rx/rx-modes.def create mode 100644 gcc-4.9/gcc/config/rx/rx-opts.h create mode 100644 gcc-4.9/gcc/config/rx/rx-protos.h create mode 100644 gcc-4.9/gcc/config/rx/rx.c create mode 100644 gcc-4.9/gcc/config/rx/rx.h create mode 100644 gcc-4.9/gcc/config/rx/rx.md create mode 100644 gcc-4.9/gcc/config/rx/rx.opt create mode 100644 gcc-4.9/gcc/config/rx/t-rx create mode 100644 gcc-4.9/gcc/config/s390/2064.md create mode 100644 gcc-4.9/gcc/config/s390/2084.md create mode 100644 gcc-4.9/gcc/config/s390/2097.md create mode 100644 gcc-4.9/gcc/config/s390/2817.md create mode 100644 gcc-4.9/gcc/config/s390/2827.md create mode 100644 gcc-4.9/gcc/config/s390/constraints.md create mode 100644 gcc-4.9/gcc/config/s390/htmintrin.h create mode 100644 gcc-4.9/gcc/config/s390/htmxlintrin.h create mode 100644 gcc-4.9/gcc/config/s390/linux.h create mode 100644 gcc-4.9/gcc/config/s390/predicates.md create mode 100644 gcc-4.9/gcc/config/s390/s390-modes.def create mode 100644 gcc-4.9/gcc/config/s390/s390-opts.h create mode 100644 gcc-4.9/gcc/config/s390/s390-protos.h create mode 100644 gcc-4.9/gcc/config/s390/s390.c create mode 100644 gcc-4.9/gcc/config/s390/s390.h create mode 100644 gcc-4.9/gcc/config/s390/s390.md create mode 100644 gcc-4.9/gcc/config/s390/s390.opt create mode 100644 gcc-4.9/gcc/config/s390/s390intrin.h create mode 100644 gcc-4.9/gcc/config/s390/s390x.h create mode 100644 gcc-4.9/gcc/config/s390/t-linux64 create mode 100644 gcc-4.9/gcc/config/s390/tpf.h create mode 100644 gcc-4.9/gcc/config/s390/tpf.md create mode 100644 gcc-4.9/gcc/config/s390/tpf.opt create mode 100644 gcc-4.9/gcc/config/score/constraints.md create mode 100644 gcc-4.9/gcc/config/score/elf.h create mode 100644 gcc-4.9/gcc/config/score/predicates.md create mode 100644 gcc-4.9/gcc/config/score/score-conv.h create mode 100644 gcc-4.9/gcc/config/score/score-generic.md create mode 100644 gcc-4.9/gcc/config/score/score-modes.def create mode 100644 gcc-4.9/gcc/config/score/score-protos.h create mode 100644 gcc-4.9/gcc/config/score/score.c create mode 100644 gcc-4.9/gcc/config/score/score.h create mode 100644 gcc-4.9/gcc/config/score/score.md create mode 100644 gcc-4.9/gcc/config/score/score.opt create mode 100644 gcc-4.9/gcc/config/sh/constraints.md create mode 100644 gcc-4.9/gcc/config/sh/divcost-analysis create mode 100644 gcc-4.9/gcc/config/sh/divtab-sh4-300.c create mode 100644 gcc-4.9/gcc/config/sh/divtab-sh4.c create mode 100644 gcc-4.9/gcc/config/sh/divtab.c create mode 100644 gcc-4.9/gcc/config/sh/elf.h create mode 100644 gcc-4.9/gcc/config/sh/embed-elf.h create mode 100644 gcc-4.9/gcc/config/sh/iterators.md create mode 100644 gcc-4.9/gcc/config/sh/linux.h create mode 100644 gcc-4.9/gcc/config/sh/little.h create mode 100644 gcc-4.9/gcc/config/sh/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/sh/newlib.h create mode 100644 gcc-4.9/gcc/config/sh/predicates.md create mode 100644 gcc-4.9/gcc/config/sh/rtems.h create mode 100644 gcc-4.9/gcc/config/sh/rtemself.h create mode 100644 gcc-4.9/gcc/config/sh/sh-c.c create mode 100644 gcc-4.9/gcc/config/sh/sh-mem.cc create mode 100644 gcc-4.9/gcc/config/sh/sh-modes.def create mode 100644 gcc-4.9/gcc/config/sh/sh-protos.h create mode 100644 gcc-4.9/gcc/config/sh/sh.c create mode 100644 gcc-4.9/gcc/config/sh/sh.h create mode 100644 gcc-4.9/gcc/config/sh/sh.md create mode 100644 gcc-4.9/gcc/config/sh/sh.opt create mode 100644 gcc-4.9/gcc/config/sh/sh1.md create mode 100644 gcc-4.9/gcc/config/sh/sh4-300.md create mode 100644 gcc-4.9/gcc/config/sh/sh4.md create mode 100644 gcc-4.9/gcc/config/sh/sh4a.md create mode 100644 gcc-4.9/gcc/config/sh/sh64.h create mode 100644 gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc create mode 100644 gcc-4.9/gcc/config/sh/sh_treg_combine.cc create mode 100644 gcc-4.9/gcc/config/sh/shmedia.h create mode 100644 gcc-4.9/gcc/config/sh/shmedia.md create mode 100644 gcc-4.9/gcc/config/sh/sshmedia.h create mode 100644 gcc-4.9/gcc/config/sh/superh.h create mode 100644 gcc-4.9/gcc/config/sh/superh.opt create mode 100644 gcc-4.9/gcc/config/sh/sync.md create mode 100644 gcc-4.9/gcc/config/sh/t-linux create mode 100644 gcc-4.9/gcc/config/sh/t-netbsd-sh5-64 create mode 100644 gcc-4.9/gcc/config/sh/t-rtems create mode 100644 gcc-4.9/gcc/config/sh/t-sh create mode 100644 gcc-4.9/gcc/config/sh/t-sh64 create mode 100644 gcc-4.9/gcc/config/sh/t-vxworks create mode 100644 gcc-4.9/gcc/config/sh/ushmedia.h create mode 100644 gcc-4.9/gcc/config/sh/vxworks.h create mode 100644 gcc-4.9/gcc/config/sol2-10.h create mode 100644 gcc-4.9/gcc/config/sol2-bi.h create mode 100644 gcc-4.9/gcc/config/sol2-c.c create mode 100644 gcc-4.9/gcc/config/sol2-cxx.c create mode 100644 gcc-4.9/gcc/config/sol2-protos.h create mode 100644 gcc-4.9/gcc/config/sol2-stubs.c create mode 100644 gcc-4.9/gcc/config/sol2.c create mode 100644 gcc-4.9/gcc/config/sol2.h create mode 100644 gcc-4.9/gcc/config/sol2.opt create mode 100644 gcc-4.9/gcc/config/sparc/biarch64.h create mode 100644 gcc-4.9/gcc/config/sparc/constraints.md create mode 100644 gcc-4.9/gcc/config/sparc/cypress.md create mode 100644 gcc-4.9/gcc/config/sparc/default-64.h create mode 100644 gcc-4.9/gcc/config/sparc/driver-sparc.c create mode 100644 gcc-4.9/gcc/config/sparc/freebsd.h create mode 100644 gcc-4.9/gcc/config/sparc/hypersparc.md create mode 100644 gcc-4.9/gcc/config/sparc/leon.md create mode 100644 gcc-4.9/gcc/config/sparc/linux.h create mode 100644 gcc-4.9/gcc/config/sparc/linux64.h create mode 100644 gcc-4.9/gcc/config/sparc/long-double-switch.opt create mode 100644 gcc-4.9/gcc/config/sparc/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/sparc/niagara.md create mode 100644 gcc-4.9/gcc/config/sparc/niagara2.md create mode 100644 gcc-4.9/gcc/config/sparc/niagara4.md create mode 100644 gcc-4.9/gcc/config/sparc/openbsd1-64.h create mode 100644 gcc-4.9/gcc/config/sparc/openbsd64.h create mode 100644 gcc-4.9/gcc/config/sparc/predicates.md create mode 100644 gcc-4.9/gcc/config/sparc/rtemself.h create mode 100644 gcc-4.9/gcc/config/sparc/sol2.h create mode 100644 gcc-4.9/gcc/config/sparc/sp-elf.h create mode 100644 gcc-4.9/gcc/config/sparc/sp64-elf.h create mode 100644 gcc-4.9/gcc/config/sparc/sparc-c.c create mode 100644 gcc-4.9/gcc/config/sparc/sparc-modes.def create mode 100644 gcc-4.9/gcc/config/sparc/sparc-opts.h create mode 100644 gcc-4.9/gcc/config/sparc/sparc-protos.h create mode 100644 gcc-4.9/gcc/config/sparc/sparc.c create mode 100644 gcc-4.9/gcc/config/sparc/sparc.h create mode 100644 gcc-4.9/gcc/config/sparc/sparc.md create mode 100644 gcc-4.9/gcc/config/sparc/sparc.opt create mode 100644 gcc-4.9/gcc/config/sparc/sparclet.md create mode 100644 gcc-4.9/gcc/config/sparc/supersparc.md create mode 100644 gcc-4.9/gcc/config/sparc/sync.md create mode 100644 gcc-4.9/gcc/config/sparc/sysv4.h create mode 100644 gcc-4.9/gcc/config/sparc/t-elf create mode 100644 gcc-4.9/gcc/config/sparc/t-leon create mode 100644 gcc-4.9/gcc/config/sparc/t-leon3 create mode 100644 gcc-4.9/gcc/config/sparc/t-linux create mode 100644 gcc-4.9/gcc/config/sparc/t-linux64 create mode 100644 gcc-4.9/gcc/config/sparc/t-netbsd64 create mode 100644 gcc-4.9/gcc/config/sparc/t-rtems create mode 100644 gcc-4.9/gcc/config/sparc/t-rtems-64 create mode 100644 gcc-4.9/gcc/config/sparc/t-sol2-64 create mode 100644 gcc-4.9/gcc/config/sparc/t-sparc create mode 100644 gcc-4.9/gcc/config/sparc/t-vxworks create mode 100644 gcc-4.9/gcc/config/sparc/tso.h create mode 100644 gcc-4.9/gcc/config/sparc/ultra1_2.md create mode 100644 gcc-4.9/gcc/config/sparc/ultra3.md create mode 100644 gcc-4.9/gcc/config/sparc/visintrin.h create mode 100644 gcc-4.9/gcc/config/sparc/vxworks.h create mode 100644 gcc-4.9/gcc/config/sparc/x-sparc create mode 100644 gcc-4.9/gcc/config/spu/constraints.md create mode 100644 gcc-4.9/gcc/config/spu/predicates.md create mode 100644 gcc-4.9/gcc/config/spu/spu-builtins.def create mode 100644 gcc-4.9/gcc/config/spu/spu-builtins.md create mode 100644 gcc-4.9/gcc/config/spu/spu-c.c create mode 100644 gcc-4.9/gcc/config/spu/spu-elf.h create mode 100644 gcc-4.9/gcc/config/spu/spu-modes.def create mode 100644 gcc-4.9/gcc/config/spu/spu-protos.h create mode 100644 gcc-4.9/gcc/config/spu/spu.c create mode 100644 gcc-4.9/gcc/config/spu/spu.h create mode 100644 gcc-4.9/gcc/config/spu/spu.md create mode 100644 gcc-4.9/gcc/config/spu/spu.opt create mode 100644 gcc-4.9/gcc/config/spu/spu_cache.h create mode 100644 gcc-4.9/gcc/config/spu/spu_internals.h create mode 100644 gcc-4.9/gcc/config/spu/spu_intrinsics.h create mode 100644 gcc-4.9/gcc/config/spu/spu_mfcio.h create mode 100644 gcc-4.9/gcc/config/spu/t-spu-elf create mode 100644 gcc-4.9/gcc/config/spu/vec_types.h create mode 100644 gcc-4.9/gcc/config/spu/vmx2spu.h create mode 100644 gcc-4.9/gcc/config/stormy16/constraints.md create mode 100644 gcc-4.9/gcc/config/stormy16/predicates.md create mode 100644 gcc-4.9/gcc/config/stormy16/stormy-abi create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16-protos.h create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.c create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.h create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.md create mode 100644 gcc-4.9/gcc/config/stormy16/stormy16.opt create mode 100644 gcc-4.9/gcc/config/t-darwin create mode 100644 gcc-4.9/gcc/config/t-glibc create mode 100644 gcc-4.9/gcc/config/t-libunwind create mode 100644 gcc-4.9/gcc/config/t-linux create mode 100644 gcc-4.9/gcc/config/t-lynx create mode 100644 gcc-4.9/gcc/config/t-openbsd create mode 100644 gcc-4.9/gcc/config/t-pnt16-warn create mode 100644 gcc-4.9/gcc/config/t-rtems create mode 100644 gcc-4.9/gcc/config/t-slibgcc create mode 100644 gcc-4.9/gcc/config/t-sol2 create mode 100644 gcc-4.9/gcc/config/t-sysroot-suffix create mode 100644 gcc-4.9/gcc/config/t-vxworks create mode 100644 gcc-4.9/gcc/config/t-winnt create mode 100644 gcc-4.9/gcc/config/tilegx/constraints.md create mode 100644 gcc-4.9/gcc/config/tilegx/feedback.h create mode 100644 gcc-4.9/gcc/config/tilegx/linux.h create mode 100644 gcc-4.9/gcc/config/tilegx/mul-tables.c create mode 100644 gcc-4.9/gcc/config/tilegx/predicates.md create mode 100644 gcc-4.9/gcc/config/tilegx/sync.md create mode 100644 gcc-4.9/gcc/config/tilegx/t-tilegx create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-builtins.h create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-c.c create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-generic.md create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-modes.def create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-multiply.h create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-opts.h create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx-protos.h create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.c create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.h create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.md create mode 100644 gcc-4.9/gcc/config/tilegx/tilegx.opt create mode 100644 gcc-4.9/gcc/config/tilepro/constraints.md create mode 100644 gcc-4.9/gcc/config/tilepro/feedback.h create mode 100644 gcc-4.9/gcc/config/tilepro/gen-mul-tables.cc create mode 100644 gcc-4.9/gcc/config/tilepro/linux.h create mode 100644 gcc-4.9/gcc/config/tilepro/mul-tables.c create mode 100644 gcc-4.9/gcc/config/tilepro/predicates.md create mode 100644 gcc-4.9/gcc/config/tilepro/t-tilepro create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-builtins.h create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-c.c create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-generic.md create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-modes.def create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-multiply.h create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro-protos.h create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.c create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.h create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.md create mode 100644 gcc-4.9/gcc/config/tilepro/tilepro.opt create mode 100644 gcc-4.9/gcc/config/tm-dwarf2.h create mode 100644 gcc-4.9/gcc/config/usegas.h create mode 100644 gcc-4.9/gcc/config/usegld.h create mode 100644 gcc-4.9/gcc/config/v850/constraints.md create mode 100644 gcc-4.9/gcc/config/v850/predicates.md create mode 100644 gcc-4.9/gcc/config/v850/rtems.h create mode 100644 gcc-4.9/gcc/config/v850/t-rtems create mode 100644 gcc-4.9/gcc/config/v850/t-v850 create mode 100644 gcc-4.9/gcc/config/v850/v850-c.c create mode 100644 gcc-4.9/gcc/config/v850/v850-modes.def create mode 100644 gcc-4.9/gcc/config/v850/v850-opts.h create mode 100644 gcc-4.9/gcc/config/v850/v850-protos.h create mode 100644 gcc-4.9/gcc/config/v850/v850.c create mode 100644 gcc-4.9/gcc/config/v850/v850.h create mode 100644 gcc-4.9/gcc/config/v850/v850.md create mode 100644 gcc-4.9/gcc/config/v850/v850.opt create mode 100644 gcc-4.9/gcc/config/vax/builtins.md create mode 100644 gcc-4.9/gcc/config/vax/constraints.md create mode 100644 gcc-4.9/gcc/config/vax/elf.h create mode 100644 gcc-4.9/gcc/config/vax/elf.opt create mode 100644 gcc-4.9/gcc/config/vax/linux.h create mode 100644 gcc-4.9/gcc/config/vax/netbsd-elf.h create mode 100644 gcc-4.9/gcc/config/vax/openbsd.h create mode 100644 gcc-4.9/gcc/config/vax/openbsd1.h create mode 100644 gcc-4.9/gcc/config/vax/predicates.md create mode 100644 gcc-4.9/gcc/config/vax/vax-modes.def create mode 100644 gcc-4.9/gcc/config/vax/vax-protos.h create mode 100644 gcc-4.9/gcc/config/vax/vax.c create mode 100644 gcc-4.9/gcc/config/vax/vax.h create mode 100644 gcc-4.9/gcc/config/vax/vax.md create mode 100644 gcc-4.9/gcc/config/vax/vax.opt create mode 100644 gcc-4.9/gcc/config/vms/make-crtlmap.awk create mode 100644 gcc-4.9/gcc/config/vms/t-vms create mode 100644 gcc-4.9/gcc/config/vms/t-vmsnative create mode 100644 gcc-4.9/gcc/config/vms/vms-ar.c create mode 100644 gcc-4.9/gcc/config/vms/vms-c.c create mode 100644 gcc-4.9/gcc/config/vms/vms-crtlmap.map create mode 100644 gcc-4.9/gcc/config/vms/vms-f.c create mode 100644 gcc-4.9/gcc/config/vms/vms-ld.c create mode 100644 gcc-4.9/gcc/config/vms/vms-opts.h create mode 100644 gcc-4.9/gcc/config/vms/vms-protos.h create mode 100644 gcc-4.9/gcc/config/vms/vms-stdint.h create mode 100644 gcc-4.9/gcc/config/vms/vms.c create mode 100644 gcc-4.9/gcc/config/vms/vms.h create mode 100644 gcc-4.9/gcc/config/vms/vms.opt create mode 100644 gcc-4.9/gcc/config/vms/x-vms create mode 100644 gcc-4.9/gcc/config/vms/xm-vms.h create mode 100644 gcc-4.9/gcc/config/vx-common.h create mode 100644 gcc-4.9/gcc/config/vxworks-dummy.h create mode 100644 gcc-4.9/gcc/config/vxworks.c create mode 100644 gcc-4.9/gcc/config/vxworks.h create mode 100644 gcc-4.9/gcc/config/vxworks.opt create mode 100644 gcc-4.9/gcc/config/vxworksae.h create mode 100644 gcc-4.9/gcc/config/winnt-c.c create mode 100644 gcc-4.9/gcc/config/x-cflags-O1 create mode 100644 gcc-4.9/gcc/config/x-darwin create mode 100644 gcc-4.9/gcc/config/x-hpux create mode 100644 gcc-4.9/gcc/config/x-linux create mode 100644 gcc-4.9/gcc/config/x-openbsd create mode 100644 gcc-4.9/gcc/config/x-solaris create mode 100644 gcc-4.9/gcc/config/xtensa/constraints.md create mode 100644 gcc-4.9/gcc/config/xtensa/elf.h create mode 100644 gcc-4.9/gcc/config/xtensa/elf.opt create mode 100644 gcc-4.9/gcc/config/xtensa/linux.h create mode 100644 gcc-4.9/gcc/config/xtensa/predicates.md create mode 100644 gcc-4.9/gcc/config/xtensa/t-xtensa create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa-protos.h create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.c create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.h create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.md create mode 100644 gcc-4.9/gcc/config/xtensa/xtensa.opt (limited to 'gcc-4.9/gcc/config') diff --git a/gcc-4.9/gcc/config/README b/gcc-4.9/gcc/config/README new file mode 100644 index 000000000..60328ec5b --- /dev/null +++ b/gcc-4.9/gcc/config/README @@ -0,0 +1,5 @@ +This directory contains machine-specific files for the GNU C compiler. +It has a subdirectory for each basic CPU type. +The only files in this directory itself +are some .h files that pertain to particular operating systems +and are used for more than one CPU type. diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-arches.def b/gcc-4.9/gcc/config/aarch64/aarch64-arches.def new file mode 100644 index 000000000..4b796d8c9 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-arches.def @@ -0,0 +1,29 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Before using #include to read this file, define a macro: + + AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) + + The NAME is the name of the architecture, represented as a string + constant. The CORE is the identifier for a core representative of + this architecture. ARCH is the architecture revision. FLAGS are + the flags implied by the architecture. */ + +AARCH64_ARCH("armv8-a", generic, 8, AARCH64_FL_FOR_ARCH8) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c new file mode 100644 index 000000000..55cfe0ab2 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-builtins.c @@ -0,0 +1,1253 @@ +/* Builtins' description for AArch64 SIMD architecture. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "stor-layout.h" +#include "stringpool.h" +#include "calls.h" +#include "expr.h" +#include "tm_p.h" +#include "recog.h" +#include "langhooks.h" +#include "diagnostic-core.h" +#include "optabs.h" +#include "pointer-set.h" +#include "hash-table.h" +#include "vec.h" +#include "ggc.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimple-iterator.h" + +enum aarch64_simd_builtin_type_mode +{ + T_V8QI, + T_V4HI, + T_V2SI, + T_V2SF, + T_DI, + T_DF, + T_V16QI, + T_V8HI, + T_V4SI, + T_V4SF, + T_V2DI, + T_V2DF, + T_TI, + T_EI, + T_OI, + T_XI, + T_SI, + T_SF, + T_HI, + T_QI, + T_MAX +}; + +#define v8qi_UP T_V8QI +#define v4hi_UP T_V4HI +#define v2si_UP T_V2SI +#define v2sf_UP T_V2SF +#define di_UP T_DI +#define df_UP T_DF +#define v16qi_UP T_V16QI +#define v8hi_UP T_V8HI +#define v4si_UP T_V4SI +#define v4sf_UP T_V4SF +#define v2di_UP T_V2DI +#define v2df_UP T_V2DF +#define ti_UP T_TI +#define ei_UP T_EI +#define oi_UP T_OI +#define xi_UP T_XI +#define si_UP T_SI +#define sf_UP T_SF +#define hi_UP T_HI +#define qi_UP T_QI + +#define UP(X) X##_UP + +#define SIMD_MAX_BUILTIN_ARGS 5 + +enum aarch64_type_qualifiers +{ + /* T foo. */ + qualifier_none = 0x0, + /* unsigned T foo. */ + qualifier_unsigned = 0x1, /* 1 << 0 */ + /* const T foo. */ + qualifier_const = 0x2, /* 1 << 1 */ + /* T *foo. */ + qualifier_pointer = 0x4, /* 1 << 2 */ + /* const T *foo. */ + qualifier_const_pointer = 0x6, /* qualifier_const | qualifier_pointer */ + /* Used when expanding arguments if an operand could + be an immediate. */ + qualifier_immediate = 0x8, /* 1 << 3 */ + qualifier_maybe_immediate = 0x10, /* 1 << 4 */ + /* void foo (...). */ + qualifier_void = 0x20, /* 1 << 5 */ + /* Some patterns may have internal operands, this qualifier is an + instruction to the initialisation code to skip this operand. */ + qualifier_internal = 0x40, /* 1 << 6 */ + /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum + rather than using the type of the operand. */ + qualifier_map_mode = 0x80, /* 1 << 7 */ + /* qualifier_pointer | qualifier_map_mode */ + qualifier_pointer_map_mode = 0x84, + /* qualifier_const_pointer | qualifier_map_mode */ + qualifier_const_pointer_map_mode = 0x86, + /* Polynomial types. */ + qualifier_poly = 0x100 +}; + +typedef struct +{ + const char *name; + enum aarch64_simd_builtin_type_mode mode; + const enum insn_code code; + unsigned int fcode; + enum aarch64_type_qualifiers *qualifiers; +} aarch64_simd_builtin_datum; + +static enum aarch64_type_qualifiers +aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none }; +#define TYPES_UNOP (aarch64_types_unop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned }; +#define TYPES_UNOPU (aarch64_types_unopu_qualifiers) +#define TYPES_CREATE (aarch64_types_unop_qualifiers) +#define TYPES_REINTERP (aarch64_types_unop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; +#define TYPES_BINOP (aarch64_types_binop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; +#define TYPES_BINOPU (aarch64_types_binopu_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_poly, qualifier_poly }; +#define TYPES_BINOPP (aarch64_types_binopp_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; +#define TYPES_TERNOP (aarch64_types_ternop_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_unsigned }; +#define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_quadop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, + qualifier_none, qualifier_none }; +#define TYPES_QUADOP (aarch64_types_quadop_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_immediate }; +#define TYPES_GETLANE (aarch64_types_getlane_qualifiers) +#define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; +#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; +#define TYPES_SETLANE (aarch64_types_setlane_qualifiers) +#define TYPES_SHIFTINSERT (aarch64_types_setlane_qualifiers) +#define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none }; +#define TYPES_COMBINE (aarch64_types_combine_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_const_pointer_map_mode }; +#define TYPES_LOAD1 (aarch64_types_load1_qualifiers) +#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers) + +static enum aarch64_type_qualifiers +aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_unsigned, + qualifier_poly, qualifier_poly }; +#define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, + qualifier_none, qualifier_none }; +#define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_unsigned }; +#define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers) + +/* The first argument (return type) of a store should be void type, + which we represent with qualifier_void. Their first operand will be + a DImode pointer to the location to store to, so we must use + qualifier_map_mode | qualifier_pointer to build a pointer to the + element type of the vector. */ +static enum aarch64_type_qualifiers +aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; +#define TYPES_STORE1 (aarch64_types_store1_qualifiers) +#define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) + +#define CF0(N, X) CODE_FOR_aarch64_##N##X +#define CF1(N, X) CODE_FOR_##N##X##1 +#define CF2(N, X) CODE_FOR_##N##X##2 +#define CF3(N, X) CODE_FOR_##N##X##3 +#define CF4(N, X) CODE_FOR_##N##X##4 +#define CF10(N, X) CODE_FOR_##N##X + +#define VAR1(T, N, MAP, A) \ + {#N, UP (A), CF##MAP (N, A), 0, TYPES_##T}, +#define VAR2(T, N, MAP, A, B) \ + VAR1 (T, N, MAP, A) \ + VAR1 (T, N, MAP, B) +#define VAR3(T, N, MAP, A, B, C) \ + VAR2 (T, N, MAP, A, B) \ + VAR1 (T, N, MAP, C) +#define VAR4(T, N, MAP, A, B, C, D) \ + VAR3 (T, N, MAP, A, B, C) \ + VAR1 (T, N, MAP, D) +#define VAR5(T, N, MAP, A, B, C, D, E) \ + VAR4 (T, N, MAP, A, B, C, D) \ + VAR1 (T, N, MAP, E) +#define VAR6(T, N, MAP, A, B, C, D, E, F) \ + VAR5 (T, N, MAP, A, B, C, D, E) \ + VAR1 (T, N, MAP, F) +#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ + VAR6 (T, N, MAP, A, B, C, D, E, F) \ + VAR1 (T, N, MAP, G) +#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ + VAR1 (T, N, MAP, H) +#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR1 (T, N, MAP, I) +#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR1 (T, N, MAP, J) +#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR1 (T, N, MAP, K) +#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ + VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR1 (T, N, MAP, L) + +/* BUILTIN_ macros should expand to cover the same range of + modes as is given for each define_mode_iterator in + config/aarch64/iterators.md. */ + +#define BUILTIN_DX(T, N, MAP) \ + VAR2 (T, N, MAP, di, df) +#define BUILTIN_GPF(T, N, MAP) \ + VAR2 (T, N, MAP, sf, df) +#define BUILTIN_SDQ_I(T, N, MAP) \ + VAR4 (T, N, MAP, qi, hi, si, di) +#define BUILTIN_SD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, hi, si) +#define BUILTIN_V2F(T, N, MAP) \ + VAR2 (T, N, MAP, v2sf, v2df) +#define BUILTIN_VALL(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df) +#define BUILTIN_VALLDI(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df, di) +#define BUILTIN_VALLDIF(T, N, MAP) \ + VAR12 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df, di, df) +#define BUILTIN_VB(T, N, MAP) \ + VAR2 (T, N, MAP, v8qi, v16qi) +#define BUILTIN_VD(T, N, MAP) \ + VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf) +#define BUILTIN_VDC(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VDIC(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VDN(T, N, MAP) \ + VAR3 (T, N, MAP, v4hi, v2si, di) +#define BUILTIN_VDQ(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDQF(T, N, MAP) \ + VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQH(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v8hi) +#define BUILTIN_VDQHS(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQIF(T, N, MAP) \ + VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) +#define BUILTIN_VDQM(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQV(T, N, MAP) \ + VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si) +#define BUILTIN_VDQQH(T, N, MAP) \ + VAR4 (T, N, MAP, v8qi, v16qi, v4hi, v8hi) +#define BUILTIN_VDQ_BHSI(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQ_I(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDW(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_BHSI(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v2si) +#define BUILTIN_VD_RE(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VQ(T, N, MAP) \ + VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df) +#define BUILTIN_VQN(T, N, MAP) \ + VAR3 (T, N, MAP, v8hi, v4si, v2di) +#define BUILTIN_VQW(T, N, MAP) \ + VAR3 (T, N, MAP, v16qi, v8hi, v4si) +#define BUILTIN_VQ_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v8hi, v4si) +#define BUILTIN_VQ_S(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VSDQ_HSI(T, N, MAP) \ + VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si) +#define BUILTIN_VSDQ_I(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) +#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) +#define BUILTIN_VSDQ_I_DI(T, N, MAP) \ + VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) +#define BUILTIN_VSD_HSI(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v2si, hi, si) +#define BUILTIN_VSQN_HSDI(T, N, MAP) \ + VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di) +#define BUILTIN_VSTRUCT(T, N, MAP) \ + VAR3 (T, N, MAP, oi, ci, xi) + +static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { +#include "aarch64-simd-builtins.def" +}; + +#undef VAR1 +#define VAR1(T, N, MAP, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, + +enum aarch64_builtins +{ + AARCH64_BUILTIN_MIN, + AARCH64_SIMD_BUILTIN_BASE, +#include "aarch64-simd-builtins.def" + AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE + + ARRAY_SIZE (aarch64_simd_builtin_data), + AARCH64_BUILTIN_MAX +}; + +static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; + +#define NUM_DREG_TYPES 6 +#define NUM_QREG_TYPES 6 + +/* Return a tree for a signed or unsigned argument of either + the mode specified by MODE, or the inner mode of MODE. */ +tree +aarch64_build_scalar_type (enum machine_mode mode, + bool unsigned_p, + bool poly_p) +{ +#undef INT_TYPES +#define INT_TYPES \ + AARCH64_TYPE_BUILDER (QI) \ + AARCH64_TYPE_BUILDER (HI) \ + AARCH64_TYPE_BUILDER (SI) \ + AARCH64_TYPE_BUILDER (DI) \ + AARCH64_TYPE_BUILDER (EI) \ + AARCH64_TYPE_BUILDER (OI) \ + AARCH64_TYPE_BUILDER (CI) \ + AARCH64_TYPE_BUILDER (XI) \ + AARCH64_TYPE_BUILDER (TI) \ + +/* Statically declare all the possible types we might need. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + static tree X##_aarch64_type_node_p = NULL; \ + static tree X##_aarch64_type_node_s = NULL; \ + static tree X##_aarch64_type_node_u = NULL; + + INT_TYPES + + static tree float_aarch64_type_node = NULL; + static tree double_aarch64_type_node = NULL; + + gcc_assert (!VECTOR_MODE_P (mode)); + +/* If we've already initialised this type, don't initialise it again, + otherwise ask for a new type of the correct size. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + case X##mode: \ + if (unsigned_p) \ + return (X##_aarch64_type_node_u \ + ? X##_aarch64_type_node_u \ + : X##_aarch64_type_node_u \ + = make_unsigned_type (GET_MODE_PRECISION (mode))); \ + else if (poly_p) \ + return (X##_aarch64_type_node_p \ + ? X##_aarch64_type_node_p \ + : X##_aarch64_type_node_p \ + = make_unsigned_type (GET_MODE_PRECISION (mode))); \ + else \ + return (X##_aarch64_type_node_s \ + ? X##_aarch64_type_node_s \ + : X##_aarch64_type_node_s \ + = make_signed_type (GET_MODE_PRECISION (mode))); \ + break; + + switch (mode) + { + INT_TYPES + case SFmode: + if (!float_aarch64_type_node) + { + float_aarch64_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (float_aarch64_type_node) = FLOAT_TYPE_SIZE; + layout_type (float_aarch64_type_node); + } + return float_aarch64_type_node; + break; + case DFmode: + if (!double_aarch64_type_node) + { + double_aarch64_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (double_aarch64_type_node) = DOUBLE_TYPE_SIZE; + layout_type (double_aarch64_type_node); + } + return double_aarch64_type_node; + break; + default: + gcc_unreachable (); + } +} + +tree +aarch64_build_vector_type (enum machine_mode mode, + bool unsigned_p, + bool poly_p) +{ + tree eltype; + +#define VECTOR_TYPES \ + AARCH64_TYPE_BUILDER (V16QI) \ + AARCH64_TYPE_BUILDER (V8HI) \ + AARCH64_TYPE_BUILDER (V4SI) \ + AARCH64_TYPE_BUILDER (V2DI) \ + AARCH64_TYPE_BUILDER (V8QI) \ + AARCH64_TYPE_BUILDER (V4HI) \ + AARCH64_TYPE_BUILDER (V2SI) \ + \ + AARCH64_TYPE_BUILDER (V4SF) \ + AARCH64_TYPE_BUILDER (V2DF) \ + AARCH64_TYPE_BUILDER (V2SF) \ +/* Declare our "cache" of values. */ +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + static tree X##_aarch64_type_node_s = NULL; \ + static tree X##_aarch64_type_node_u = NULL; \ + static tree X##_aarch64_type_node_p = NULL; + + VECTOR_TYPES + + gcc_assert (VECTOR_MODE_P (mode)); + +#undef AARCH64_TYPE_BUILDER +#define AARCH64_TYPE_BUILDER(X) \ + case X##mode: \ + if (unsigned_p) \ + return X##_aarch64_type_node_u \ + ? X##_aarch64_type_node_u \ + : X##_aarch64_type_node_u \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + else if (poly_p) \ + return X##_aarch64_type_node_p \ + ? X##_aarch64_type_node_p \ + : X##_aarch64_type_node_p \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + else \ + return X##_aarch64_type_node_s \ + ? X##_aarch64_type_node_s \ + : X##_aarch64_type_node_s \ + = build_vector_type_for_mode (aarch64_build_scalar_type \ + (GET_MODE_INNER (mode), \ + unsigned_p, poly_p), mode); \ + break; + + switch (mode) + { + default: + eltype = aarch64_build_scalar_type (GET_MODE_INNER (mode), + unsigned_p, poly_p); + return build_vector_type_for_mode (eltype, mode); + break; + VECTOR_TYPES + } +} + +tree +aarch64_build_type (enum machine_mode mode, bool unsigned_p, bool poly_p) +{ + if (VECTOR_MODE_P (mode)) + return aarch64_build_vector_type (mode, unsigned_p, poly_p); + else + return aarch64_build_scalar_type (mode, unsigned_p, poly_p); +} + +tree +aarch64_build_signed_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, false, false); +} + +tree +aarch64_build_unsigned_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, true, false); +} + +tree +aarch64_build_poly_type (enum machine_mode mode) +{ + return aarch64_build_type (mode, false, true); +} + +static void +aarch64_init_simd_builtins (void) +{ + unsigned int i, fcode = AARCH64_SIMD_BUILTIN_BASE + 1; + + /* Signed scalar type nodes. */ + tree aarch64_simd_intQI_type_node = aarch64_build_signed_type (QImode); + tree aarch64_simd_intHI_type_node = aarch64_build_signed_type (HImode); + tree aarch64_simd_intSI_type_node = aarch64_build_signed_type (SImode); + tree aarch64_simd_intDI_type_node = aarch64_build_signed_type (DImode); + tree aarch64_simd_intTI_type_node = aarch64_build_signed_type (TImode); + tree aarch64_simd_intEI_type_node = aarch64_build_signed_type (EImode); + tree aarch64_simd_intOI_type_node = aarch64_build_signed_type (OImode); + tree aarch64_simd_intCI_type_node = aarch64_build_signed_type (CImode); + tree aarch64_simd_intXI_type_node = aarch64_build_signed_type (XImode); + + /* Unsigned scalar type nodes. */ + tree aarch64_simd_intUQI_type_node = aarch64_build_unsigned_type (QImode); + tree aarch64_simd_intUHI_type_node = aarch64_build_unsigned_type (HImode); + tree aarch64_simd_intUSI_type_node = aarch64_build_unsigned_type (SImode); + tree aarch64_simd_intUDI_type_node = aarch64_build_unsigned_type (DImode); + + /* Poly scalar type nodes. */ + tree aarch64_simd_polyQI_type_node = aarch64_build_poly_type (QImode); + tree aarch64_simd_polyHI_type_node = aarch64_build_poly_type (HImode); + tree aarch64_simd_polyDI_type_node = aarch64_build_poly_type (DImode); + tree aarch64_simd_polyTI_type_node = aarch64_build_poly_type (TImode); + + /* Float type nodes. */ + tree aarch64_simd_float_type_node = aarch64_build_signed_type (SFmode); + tree aarch64_simd_double_type_node = aarch64_build_signed_type (DFmode); + + /* Define typedefs which exactly correspond to the modes we are basing vector + types on. If you change these names you'll need to change + the table used by aarch64_mangle_type too. */ + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intQI_type_node, + "__builtin_aarch64_simd_qi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intHI_type_node, + "__builtin_aarch64_simd_hi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intSI_type_node, + "__builtin_aarch64_simd_si"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_float_type_node, + "__builtin_aarch64_simd_sf"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intDI_type_node, + "__builtin_aarch64_simd_di"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_double_type_node, + "__builtin_aarch64_simd_df"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyQI_type_node, + "__builtin_aarch64_simd_poly8"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyHI_type_node, + "__builtin_aarch64_simd_poly16"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyDI_type_node, + "__builtin_aarch64_simd_poly64"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_polyTI_type_node, + "__builtin_aarch64_simd_poly128"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intTI_type_node, + "__builtin_aarch64_simd_ti"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intEI_type_node, + "__builtin_aarch64_simd_ei"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intOI_type_node, + "__builtin_aarch64_simd_oi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intCI_type_node, + "__builtin_aarch64_simd_ci"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intXI_type_node, + "__builtin_aarch64_simd_xi"); + + /* Unsigned integer types for various mode sizes. */ + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUQI_type_node, + "__builtin_aarch64_simd_uqi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUHI_type_node, + "__builtin_aarch64_simd_uhi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUSI_type_node, + "__builtin_aarch64_simd_usi"); + (*lang_hooks.types.register_builtin_type) (aarch64_simd_intUDI_type_node, + "__builtin_aarch64_simd_udi"); + + for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) + { + bool print_type_signature_p = false; + char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; + aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; + const char *const modenames[] = + { + "v8qi", "v4hi", "v2si", "v2sf", "di", "df", + "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", + "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi" + }; + const enum machine_mode modes[] = + { + V8QImode, V4HImode, V2SImode, V2SFmode, DImode, DFmode, + V16QImode, V8HImode, V4SImode, V4SFmode, V2DImode, + V2DFmode, TImode, EImode, OImode, XImode, SImode, + SFmode, HImode, QImode + }; + char namebuf[60]; + tree ftype = NULL; + tree fndecl = NULL; + + gcc_assert (ARRAY_SIZE (modenames) == T_MAX); + + d->fcode = fcode; + + /* We must track two variables here. op_num is + the operand number as in the RTL pattern. This is + required to access the mode (e.g. V4SF mode) of the + argument, from which the base type can be derived. + arg_num is an index in to the qualifiers data, which + gives qualifiers to the type (e.g. const unsigned). + The reason these two variables may differ by one is the + void return type. While all return types take the 0th entry + in the qualifiers array, there is no operand for them in the + RTL pattern. */ + int op_num = insn_data[d->code].n_operands - 1; + int arg_num = d->qualifiers[0] & qualifier_void + ? op_num + 1 + : op_num; + tree return_type = void_type_node, args = void_list_node; + tree eltype; + + /* Build a function type directly from the insn_data for this + builtin. The build_function_type () function takes care of + removing duplicates for us. */ + for (; op_num >= 0; arg_num--, op_num--) + { + enum machine_mode op_mode = insn_data[d->code].operand[op_num].mode; + enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; + + if (qualifiers & qualifier_unsigned) + { + type_signature[arg_num] = 'u'; + print_type_signature_p = true; + } + else if (qualifiers & qualifier_poly) + { + type_signature[arg_num] = 'p'; + print_type_signature_p = true; + } + else + type_signature[arg_num] = 's'; + + /* Skip an internal operand for vget_{low, high}. */ + if (qualifiers & qualifier_internal) + continue; + + /* Some builtins have different user-facing types + for certain arguments, encoded in d->mode. */ + if (qualifiers & qualifier_map_mode) + op_mode = modes[d->mode]; + + /* For pointers, we want a pointer to the basic type + of the vector. */ + if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) + op_mode = GET_MODE_INNER (op_mode); + + eltype = aarch64_build_type (op_mode, + qualifiers & qualifier_unsigned, + qualifiers & qualifier_poly); + + /* Add qualifiers. */ + if (qualifiers & qualifier_const) + eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); + + if (qualifiers & qualifier_pointer) + eltype = build_pointer_type (eltype); + + /* If we have reached arg_num == 0, we are at a non-void + return type. Otherwise, we are still processing + arguments. */ + if (arg_num == 0) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } + + ftype = build_function_type (return_type, args); + + gcc_assert (ftype != NULL); + + if (print_type_signature_p) + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s_%s", + d->name, modenames[d->mode], type_signature); + else + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s%s", + d->name, modenames[d->mode]); + + fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, + NULL, NULL_TREE); + aarch64_builtin_decls[fcode] = fndecl; + } +} + +void +aarch64_init_builtins (void) +{ + if (TARGET_SIMD) + aarch64_init_simd_builtins (); +} + +tree +aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= AARCH64_BUILTIN_MAX) + return error_mark_node; + + return aarch64_builtin_decls[code]; +} + +typedef enum +{ + SIMD_ARG_COPY_TO_REG, + SIMD_ARG_CONSTANT, + SIMD_ARG_STOP +} builtin_simd_arg; + +static rtx +aarch64_simd_expand_args (rtx target, int icode, int have_retval, + tree exp, ...) +{ + va_list ap; + rtx pat; + tree arg[SIMD_MAX_BUILTIN_ARGS]; + rtx op[SIMD_MAX_BUILTIN_ARGS]; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode[SIMD_MAX_BUILTIN_ARGS]; + int argc = 0; + + if (have_retval + && (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode))) + target = gen_reg_rtx (tmode); + + va_start (ap, exp); + + for (;;) + { + builtin_simd_arg thisarg = (builtin_simd_arg) va_arg (ap, int); + + if (thisarg == SIMD_ARG_STOP) + break; + else + { + arg[argc] = CALL_EXPR_ARG (exp, argc); + op[argc] = expand_normal (arg[argc]); + mode[argc] = insn_data[icode].operand[argc + have_retval].mode; + + switch (thisarg) + { + case SIMD_ARG_COPY_TO_REG: + if (POINTER_TYPE_P (TREE_TYPE (arg[argc]))) + op[argc] = convert_memory_address (Pmode, op[argc]); + /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */ + if (!(*insn_data[icode].operand[argc + have_retval].predicate) + (op[argc], mode[argc])) + op[argc] = copy_to_mode_reg (mode[argc], op[argc]); + break; + + case SIMD_ARG_CONSTANT: + if (!(*insn_data[icode].operand[argc + have_retval].predicate) + (op[argc], mode[argc])) + error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, " + "expected %", argc + 1); + break; + + case SIMD_ARG_STOP: + gcc_unreachable (); + } + + argc++; + } + } + + va_end (ap); + + if (have_retval) + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (target, op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + else + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + + if (!pat) + return 0; + + emit_insn (pat); + + return target; +} + +/* Expand an AArch64 AdvSIMD builtin(intrinsic). */ +rtx +aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) +{ + aarch64_simd_builtin_datum *d = + &aarch64_simd_builtin_data[fcode - (AARCH64_SIMD_BUILTIN_BASE + 1)]; + enum insn_code icode = d->code; + builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS]; + int num_args = insn_data[d->code].n_operands; + int is_void = 0; + int k; + + is_void = !!(d->qualifiers[0] & qualifier_void); + + num_args += is_void; + + for (k = 1; k < num_args; k++) + { + /* We have four arrays of data, each indexed in a different fashion. + qualifiers - element 0 always describes the function return type. + operands - element 0 is either the operand for return value (if + the function has a non-void return type) or the operand for the + first argument. + expr_args - element 0 always holds the first argument. + args - element 0 is always used for the return type. */ + int qualifiers_k = k; + int operands_k = k - is_void; + int expr_args_k = k - 1; + + if (d->qualifiers[qualifiers_k] & qualifier_immediate) + args[k] = SIMD_ARG_CONSTANT; + else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) + { + rtx arg + = expand_normal (CALL_EXPR_ARG (exp, + (expr_args_k))); + /* Handle constants only if the predicate allows it. */ + bool op_const_int_p = + (CONST_INT_P (arg) + && (*insn_data[icode].operand[operands_k].predicate) + (arg, insn_data[icode].operand[operands_k].mode)); + args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; + } + else + args[k] = SIMD_ARG_COPY_TO_REG; + + } + args[k] = SIMD_ARG_STOP; + + /* The interface to aarch64_simd_expand_args expects a 0 if + the function is void, and a 1 if it is not. */ + return aarch64_simd_expand_args + (target, icode, !is_void, exp, + args[1], + args[2], + args[3], + args[4], + SIMD_ARG_STOP); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient. */ +rtx +aarch64_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + int fcode = DECL_FUNCTION_CODE (fndecl); + + if (fcode >= AARCH64_SIMD_BUILTIN_BASE) + return aarch64_simd_expand_builtin (fcode, exp, target); + + return NULL_RTX; +} + +tree +aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) +{ + enum machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 +#define AARCH64_FIND_FRINT_VARIANT(N) \ + (AARCH64_CHECK_BUILTIN_MODE (2, D) \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ + : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ + : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ + ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ + : NULL_TREE))) + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Fmode && out_n == C \ + && in_mode == N##Fmode && in_n == C) + case BUILT_IN_FLOOR: + case BUILT_IN_FLOORF: + return AARCH64_FIND_FRINT_VARIANT (floor); + case BUILT_IN_CEIL: + case BUILT_IN_CEILF: + return AARCH64_FIND_FRINT_VARIANT (ceil); + case BUILT_IN_TRUNC: + case BUILT_IN_TRUNCF: + return AARCH64_FIND_FRINT_VARIANT (btrunc); + case BUILT_IN_ROUND: + case BUILT_IN_ROUNDF: + return AARCH64_FIND_FRINT_VARIANT (round); + case BUILT_IN_NEARBYINT: + case BUILT_IN_NEARBYINTF: + return AARCH64_FIND_FRINT_VARIANT (nearbyint); + case BUILT_IN_SQRT: + case BUILT_IN_SQRTF: + return AARCH64_FIND_FRINT_VARIANT (sqrt); +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == SImode && out_n == C \ + && in_mode == N##Imode && in_n == C) + case BUILT_IN_CLZ: + { + if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; + return NULL_TREE; + } +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Imode && out_n == C \ + && in_mode == N##Fmode && in_n == C) + case BUILT_IN_LFLOOR: + case BUILT_IN_LFLOORF: + case BUILT_IN_LLFLOOR: + case BUILT_IN_IFLOORF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + case BUILT_IN_LCEIL: + case BUILT_IN_LCEILF: + case BUILT_IN_LLCEIL: + case BUILT_IN_ICEILF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + case BUILT_IN_LROUND: + case BUILT_IN_IROUNDF: + { + enum aarch64_builtins builtin; + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; + else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; + else + return NULL_TREE; + + return aarch64_builtin_decls[builtin]; + } + + default: + return NULL_TREE; + } + } + + return NULL_TREE; +} + +#undef VAR1 +#define VAR1(T, N, MAP, A) \ + case AARCH64_SIMD_BUILTIN_##T##_##N##A: + +tree +aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, + bool ignore ATTRIBUTE_UNUSED) +{ + int fcode = DECL_FUNCTION_CODE (fndecl); + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + + switch (fcode) + { + BUILTIN_VALLDI (UNOP, abs, 2) + return fold_build1 (ABS_EXPR, type, args[0]); + break; + BUILTIN_VALLDI (BINOP, cmge, 0) + return fold_build2 (GE_EXPR, type, args[0], args[1]); + break; + BUILTIN_VALLDI (BINOP, cmgt, 0) + return fold_build2 (GT_EXPR, type, args[0], args[1]); + break; + BUILTIN_VALLDI (BINOP, cmeq, 0) + return fold_build2 (EQ_EXPR, type, args[0], args[1]); + break; + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + { + tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]); + tree vec_zero_node = build_zero_cst (type); + return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); + break; + } + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) + return fold_build1 (FLOAT_EXPR, type, args[0]); + default: + break; + } + + return NULL_TREE; +} + +bool +aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + bool changed = false; + gimple stmt = gsi_stmt (*gsi); + tree call = gimple_call_fn (stmt); + tree fndecl; + gimple new_stmt = NULL; + if (call) + { + fndecl = gimple_call_fndecl (stmt); + if (fndecl) + { + int fcode = DECL_FUNCTION_CODE (fndecl); + int nargs = gimple_call_num_args (stmt); + tree *args = (nargs > 0 + ? gimple_call_arg_ptr (stmt, 0) + : &error_mark_node); + + switch (fcode) + { + BUILTIN_VALL (UNOP, reduc_splus_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_PLUS_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + BUILTIN_VDQIF (UNOP, reduc_smax_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_MAX_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + BUILTIN_VDQIF (UNOP, reduc_smin_, 10) + new_stmt = gimple_build_assign_with_ops ( + REDUC_MIN_EXPR, + gimple_call_lhs (stmt), + args[0], + NULL_TREE); + break; + + default: + break; + } + } + } + + if (new_stmt) + { + gsi_replace (gsi, new_stmt, true); + changed = true; + } + + return changed; +} + +#undef AARCH64_CHECK_BUILTIN_MODE +#undef AARCH64_FIND_FRINT_VARIANT +#undef BUILTIN_DX +#undef BUILTIN_SDQ_I +#undef BUILTIN_SD_HSI +#undef BUILTIN_V2F +#undef BUILTIN_VALL +#undef BUILTIN_VB +#undef BUILTIN_VD +#undef BUILTIN_VDC +#undef BUILTIN_VDIC +#undef BUILTIN_VDN +#undef BUILTIN_VDQ +#undef BUILTIN_VDQF +#undef BUILTIN_VDQH +#undef BUILTIN_VDQHS +#undef BUILTIN_VDQIF +#undef BUILTIN_VDQM +#undef BUILTIN_VDQV +#undef BUILTIN_VDQ_BHSI +#undef BUILTIN_VDQ_I +#undef BUILTIN_VDW +#undef BUILTIN_VD_BHSI +#undef BUILTIN_VD_HSI +#undef BUILTIN_VD_RE +#undef BUILTIN_VQ +#undef BUILTIN_VQN +#undef BUILTIN_VQW +#undef BUILTIN_VQ_HSI +#undef BUILTIN_VQ_S +#undef BUILTIN_VSDQ_HSI +#undef BUILTIN_VSDQ_I +#undef BUILTIN_VSDQ_I_BHSI +#undef BUILTIN_VSDQ_I_DI +#undef BUILTIN_VSD_HSI +#undef BUILTIN_VSQN_HSDI +#undef BUILTIN_VSTRUCT +#undef CF0 +#undef CF1 +#undef CF2 +#undef CF3 +#undef CF4 +#undef CF10 +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 +#undef VAR11 + diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-cores.def b/gcc-4.9/gcc/config/aarch64/aarch64-cores.def new file mode 100644 index 000000000..9319249e6 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-cores.def @@ -0,0 +1,42 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This is a list of cores that implement AArch64. + + Before using #include to read this file, define a macro: + + AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS) + + The CORE_NAME is the name of the core, represented as a string constant. + The CORE_IDENT is the name of the core, represented as an identifier. + The SCHEDULER_IDENT is the name of the core for which scheduling decisions + will be made, represented as an identifier. + ARCH is the architecture revision implemented by the chip. + FLAGS are the bitwise-or of the traits that apply to that core. + This need not include flags implied by the architecture. + COSTS is the name of the rtx_costs routine to use. */ + +/* V8 Architecture Processors. */ + +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa53) +AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) + +/* V8 big.LITTLE implementations. */ + +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h new file mode 100644 index 000000000..adec7e7ba --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf-raw.h @@ -0,0 +1,33 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Support for bare-metal builds. */ +#ifndef GCC_AARCH64_ELF_RAW_H +#define GCC_AARCH64_ELF_RAW_H + +#define STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" +#define ENDFILE_SPEC " crtend%O%s crtn%O%s" + +#ifndef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ + -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" +#endif + +#endif /* GCC_AARCH64_ELF_RAW_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-elf.h b/gcc-4.9/gcc/config/aarch64/aarch64-elf.h new file mode 100644 index 000000000..15ab630de --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-elf.h @@ -0,0 +1,161 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_AARCH64_ELF_H +#define GCC_AARCH64_ELF_H + + +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + aarch64_asm_output_labelref (FILE, NAME) + +#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \ + do \ + { \ + assemble_name (FILE, NAME1); \ + fputs (" = ", FILE); \ + assemble_name (FILE, NAME2); \ + fputc ('\n', FILE); \ + } while (0) + +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.bss" + +#define CTORS_SECTION_ASM_OP "\t.section\t.init_array,\"aw\",%init_array" +#define DTORS_SECTION_ASM_OP "\t.section\t.fini_array,\"aw\",%fini_array" + +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#define INIT_ARRAY_SECTION_ASM_OP CTORS_SECTION_ASM_OP +#define FINI_ARRAY_SECTION_ASM_OP DTORS_SECTION_ASM_OP + +/* Since we use .init_array/.fini_array we don't need the markers at + the start and end of the ctors/dtors arrays. */ +#define CTOR_LIST_BEGIN asm (CTORS_SECTION_ASM_OP) +#define CTOR_LIST_END /* empty */ +#define DTOR_LIST_BEGIN asm (DTORS_SECTION_ASM_OP) +#define DTOR_LIST_END /* empty */ + +#undef TARGET_ASM_CONSTRUCTOR +#define TARGET_ASM_CONSTRUCTOR aarch64_elf_asm_constructor + +#undef TARGET_ASM_DESTRUCTOR +#define TARGET_ASM_DESTRUCTOR aarch64_elf_asm_destructor + +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN +/* Support for -falign-* switches. Use .p2align to ensure that code + sections are padded with NOP instructions, rather than zeros. */ +#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP) \ + do \ + { \ + if ((LOG) != 0) \ + { \ + if ((MAX_SKIP) == 0) \ + fprintf ((FILE), "\t.p2align %d\n", (int) (LOG)); \ + else \ + fprintf ((FILE), "\t.p2align %d,,%d\n", \ + (int) (LOG), (int) (MAX_SKIP)); \ + } \ + } while (0) + +#endif /* HAVE_GAS_MAX_SKIP_P2ALIGN */ + +#define JUMP_TABLES_IN_TEXT_SECTION 0 + +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + do { \ + switch (GET_MODE (BODY)) \ + { \ + case QImode: \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d - %LLrtx%d) / 4\n", \ + VALUE, REL); \ + break; \ + case HImode: \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d - %LLrtx%d) / 4\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + case DImode: /* See comment in aarch64_output_casesi. */ \ + asm_fprintf (STREAM, "\t.word\t(%LL%d - %LLrtx%d) / 4\n", \ + VALUE, REL); \ + break; \ + default: \ + gcc_unreachable (); \ + } \ + } while (0) + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + fprintf(STREAM, "\t.align\t%d\n", (int)POWER) + +#define ASM_COMMENT_START "//" + +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" + +#define GLOBAL_ASM_OP "\t.global\t" + +#ifdef TARGET_BIG_ENDIAN_DEFAULT +#define ENDIAN_SPEC "-mbig-endian" +#else +#define ENDIAN_SPEC "-mlittle-endian" +#endif + +#if TARGET_DATA_MODEL == 1 +#define ABI_SPEC "-mabi=lp64" +#define MULTILIB_DEFAULTS { "mabi=lp64" } +#elif TARGET_DATA_MODEL == 2 +#define ABI_SPEC "-mabi=ilp32" +#define MULTILIB_DEFAULTS { "mabi=ilp32" } +#else +#error "Unknown or undefined TARGET_DATA_MODEL!" +#endif + +/* Force the default endianness and ABI flags onto the command line + in order to make the other specs easier to write. */ +#undef DRIVER_SELF_SPECS +#define DRIVER_SELF_SPECS \ + " %{!mbig-endian:%{!mlittle-endian:" ENDIAN_SPEC "}}" \ + " %{!mabi=*:" ABI_SPEC "}" + +#ifdef HAVE_AS_MABI_OPTION +#define ASM_MABI_SPEC "%{mabi=*:-mabi=%*}" +#else +#define ASM_MABI_SPEC "%{mabi=lp64:}" +#endif + +#ifndef ASM_SPEC +#define ASM_SPEC "\ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%{march=*:-march=%*} \ +%(asm_cpu_spec)" \ +ASM_MABI_SPEC +#endif + +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "%%%s" + +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION aarch64_elf_asm_named_section + +/* Stabs debug not required. */ +#undef DBX_DEBUGGING_INFO + +#endif /* GCC_AARCH64_ELF_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-linux.h b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h new file mode 100644 index 000000000..a8f077156 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-linux.h @@ -0,0 +1,47 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_AARCH64_LINUX_H +#define GCC_AARCH64_LINUX_H + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" + +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + +#define LINUX_TARGET_LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ + -X \ + %{mbig-endian:-EB} %{mlittle-endian:-EL} \ + -maarch64linux%{mbig-endian:b}" + +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +#endif /* GCC_AARCH64_LINUX_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-modes.def b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def new file mode 100644 index 000000000..1d2cc7679 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-modes.def @@ -0,0 +1,55 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +CC_MODE (CCFP); +CC_MODE (CCFPE); +CC_MODE (CC_SWP); +CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS). */ +CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS). */ +CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ +CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ +VECTOR_MODES (FLOAT, 8); /* V2SF. */ +VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ + +/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */ +INT_MODE (OI, 32); + +/* Opaque integer modes for 3, 6 or 8 Neon double registers (2 is + TImode). */ +INT_MODE (EI, 24); +INT_MODE (CI, 48); +INT_MODE (XI, 64); + +/* Vector modes for register lists. */ +VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI. */ +VECTOR_MODES (FLOAT, 32); /* V8SF V4DF. */ + +VECTOR_MODES (INT, 48); /* V32QI V16HI V8SI V4DI. */ +VECTOR_MODES (FLOAT, 48); /* V8SF V4DF. */ + +VECTOR_MODES (INT, 64); /* V32QI V16HI V8SI V4DI. */ +VECTOR_MODES (FLOAT, 64); /* V8SF V4DF. */ + +/* Quad float: 128-bit floating mode for long doubles. */ +FLOAT_MODE (TF, 16, ieee_quad_format); diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def b/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def new file mode 100644 index 000000000..1aa65d32a --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-option-extensions.def @@ -0,0 +1,38 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This is a list of ISA extentsions in AArch64. + + Before using #include to read this file, define a macro: + + AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF) + + EXT_NAME is the name of the extension, represented as a string constant. + FLAGS_ON are the bitwise-or of the features that the extension adds. + FLAGS_OFF are the bitwise-or of the features that the extension removes. */ + +/* V8 Architecture Extensions. + This list currently contains example extensions for CPUs that implement + AArch64, and therefore serves as a template for adding more CPUs in the + future. */ + +AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO) +AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-opts.h b/gcc-4.9/gcc/config/aarch64/aarch64-opts.h new file mode 100644 index 000000000..370931536 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-opts.h @@ -0,0 +1,64 @@ +/* Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Definitions for option handling for AArch64. */ + +#ifndef GCC_AARCH64_OPTS_H +#define GCC_AARCH64_OPTS_H + +/* The various cores that implement AArch64. */ +enum aarch64_processor +{ +#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \ + INTERNAL_IDENT, +#include "aarch64-cores.def" +#undef AARCH64_CORE + /* Used to indicate that no processor has been specified. */ + generic, + /* Used to mark the end of the processor table. */ + aarch64_none +}; + +/* TLS types. */ +enum aarch64_tls_type { + TLS_TRADITIONAL, + TLS_DESCRIPTORS +}; + +/* The code model defines the address generation strategy. + Most have a PIC and non-PIC variant. */ +enum aarch64_code_model { + /* Static code and data fit within a 1MB region. + Not fully implemented, mostly treated as SMALL. */ + AARCH64_CMODEL_TINY, + /* Static code, data and GOT/PLT fit within a 1MB region. + Not fully implemented, mostly treated as SMALL_PIC. */ + AARCH64_CMODEL_TINY_PIC, + /* Static code and data fit within a 4GB region. + The default non-PIC code model. */ + AARCH64_CMODEL_SMALL, + /* Static code, data and GOT/PLT fit within a 4GB region. + The default PIC code model. */ + AARCH64_CMODEL_SMALL_PIC, + /* No assumptions about addresses of code and data. + The PIC variant is not yet implemented. */ + AARCH64_CMODEL_LARGE +}; + +#endif diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h new file mode 100644 index 000000000..5542f023b --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h @@ -0,0 +1,292 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#ifndef GCC_AARCH64_PROTOS_H +#define GCC_AARCH64_PROTOS_H + +/* + SYMBOL_CONTEXT_ADR + The symbol is used in a load-address operation. + SYMBOL_CONTEXT_MEM + The symbol is used as the address in a MEM. + */ +enum aarch64_symbol_context +{ + SYMBOL_CONTEXT_MEM, + SYMBOL_CONTEXT_ADR +}; + +/* SYMBOL_SMALL_ABSOLUTE: Generate symbol accesses through + high and lo relocs that calculate the base address using a PC + relative reloc. + So to get the address of foo, we generate + adrp x0, foo + add x0, x0, :lo12:foo + + To load or store something to foo, we could use the corresponding + load store variants that generate an + ldr x0, [x0,:lo12:foo] + or + str x1, [x0, :lo12:foo] + + This corresponds to the small code model of the compiler. + + SYMBOL_SMALL_GOT: Similar to the one above but this + gives us the GOT entry of the symbol being referred to : + Thus calculating the GOT entry for foo is done using the + following sequence of instructions. The ADRP instruction + gets us to the page containing the GOT entry of the symbol + and the got_lo12 gets us the actual offset in it. + + adrp x0, :got:foo + ldr x0, [x0, :gotoff_lo12:foo] + + This corresponds to the small PIC model of the compiler. + + SYMBOL_SMALL_TLSGD + SYMBOL_SMALL_TLSDESC + SYMBOL_SMALL_GOTTPREL + SYMBOL_SMALL_TPREL + Each of of these represents a thread-local symbol, and corresponds to the + thread local storage relocation operator for the symbol being referred to. + + SYMBOL_TINY_ABSOLUTE + + Generate symbol accesses as a PC relative address using a single + instruction. To compute the address of symbol foo, we generate: + + ADR x0, foo + + SYMBOL_TINY_GOT + + Generate symbol accesses via the GOT using a single PC relative + instruction. To compute the address of symbol foo, we generate: + + ldr t0, :got:foo + + The value of foo can subsequently read using: + + ldrb t0, [t0] + + SYMBOL_FORCE_TO_MEM : Global variables are addressed using + constant pool. All variable addresses are spilled into constant + pools. The constant pools themselves are addressed using PC + relative accesses. This only works for the large code model. + */ +enum aarch64_symbol_type +{ + SYMBOL_SMALL_ABSOLUTE, + SYMBOL_SMALL_GOT, + SYMBOL_SMALL_TLSGD, + SYMBOL_SMALL_TLSDESC, + SYMBOL_SMALL_GOTTPREL, + SYMBOL_SMALL_TPREL, + SYMBOL_TINY_ABSOLUTE, + SYMBOL_TINY_GOT, + SYMBOL_FORCE_TO_MEM +}; + +/* A set of tuning parameters contains references to size and time + cost models and vectors for address cost calculations, register + move costs and memory move costs. */ + +/* Additional cost for addresses. */ +struct cpu_addrcost_table +{ + const int pre_modify; + const int post_modify; + const int register_offset; + const int register_extend; + const int imm_offset; +}; + +/* Additional costs for register copies. Cost is for one register. */ +struct cpu_regmove_cost +{ + const int GP2GP; + const int GP2FP; + const int FP2GP; + const int FP2FP; +}; + +/* Cost for vector insn classes. */ +struct cpu_vector_cost +{ + const int scalar_stmt_cost; /* Cost of any scalar operation, + excluding load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, + excluding load, store, + vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector + operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_unalign_store_cost; /* Cost of unaligned vector store. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch. */ + const int cond_not_taken_branch_cost; /* Cost of not taken branch. */ +}; + +struct tune_params +{ + const struct cpu_cost_table *const insn_extra_cost; + const struct cpu_addrcost_table *const addr_cost; + const struct cpu_regmove_cost *const regmove_cost; + const struct cpu_vector_cost *const vec_costs; + const int memmov_cost; + const int issue_rate; +}; + +HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); +bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); +bool aarch64_cannot_change_mode_class (enum machine_mode, + enum machine_mode, + enum reg_class); +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); +bool aarch64_constant_address_p (rtx); +bool aarch64_float_const_zero_rtx_p (rtx); +bool aarch64_function_arg_regno_p (unsigned); +bool aarch64_gen_movmemqi (rtx *); +bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); +bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx); +bool aarch64_is_long_call_p (rtx); +bool aarch64_label_mentioned_p (rtx); +bool aarch64_legitimate_pic_operand_p (rtx); +bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); +bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, + enum machine_mode); +char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode); +char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned); +bool aarch64_pad_arg_upward (enum machine_mode, const_tree); +bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool); +bool aarch64_regno_ok_for_base_p (int, bool); +bool aarch64_regno_ok_for_index_p (int, bool); +bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode); +bool aarch64_simd_imm_zero_p (rtx, enum machine_mode); +bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode); +bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool); +bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool, + struct simd_immediate_info *); +bool aarch64_symbolic_address_p (rtx); +bool aarch64_uimm12_shift (HOST_WIDE_INT); +const char *aarch64_output_casesi (rtx *); +const char *aarch64_rewrite_selected_cpu (const char *name); + +enum aarch64_symbol_type aarch64_classify_symbol (rtx, + enum aarch64_symbol_context); +enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); +enum reg_class aarch64_regno_regclass (unsigned); +int aarch64_asm_preferred_eh_data_format (int, int); +int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode); +int aarch64_hard_regno_nregs (unsigned, enum machine_mode); +int aarch64_simd_attr_length_move (rtx); +int aarch64_uxt_size (int, HOST_WIDE_INT); +rtx aarch64_final_eh_return_addr (void); +rtx aarch64_legitimize_reload_address (rtx *, enum machine_mode, int, int, int); +const char *aarch64_output_move_struct (rtx *operands); +rtx aarch64_return_addr (int, rtx); +rtx aarch64_simd_gen_const_vector_dup (enum machine_mode, int); +bool aarch64_simd_mem_operand_p (rtx); +rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool); +rtx aarch64_tls_get_addr (void); +tree aarch64_fold_builtin (tree, int, tree *, bool); +unsigned aarch64_dbx_register_number (unsigned); +unsigned aarch64_trampoline_size (void); +void aarch64_asm_output_labelref (FILE *, const char *); +void aarch64_elf_asm_named_section (const char *, unsigned, tree); +void aarch64_expand_epilogue (bool); +void aarch64_expand_mov_immediate (rtx, rtx); +void aarch64_expand_prologue (void); +void aarch64_expand_vector_init (rtx, rtx); +void aarch64_function_profiler (FILE *, int); +void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, + const_tree, unsigned); +void aarch64_init_expanders (void); +void aarch64_print_operand (FILE *, rtx, char); +void aarch64_print_operand_address (FILE *, rtx); + +/* Initialize builtins for SIMD intrinsics. */ +void init_aarch64_simd_builtins (void); + +void aarch64_simd_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int); + +/* Emit code to place a AdvSIMD pair result in memory locations (with equal + registers). */ +void aarch64_simd_emit_pair_result_insn (enum machine_mode, + rtx (*intfn) (rtx, rtx, rtx), rtx, + rtx); + +/* Expand builtins for SIMD intrinsics. */ +rtx aarch64_simd_expand_builtin (int, tree, rtx); + +void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); + +/* Emit code for reinterprets. */ +void aarch64_simd_reinterpret (rtx, rtx); + +void aarch64_split_128bit_move (rtx, rtx); + +bool aarch64_split_128bit_move_p (rtx, rtx); + +void aarch64_split_simd_combine (rtx, rtx, rtx); + +void aarch64_split_simd_move (rtx, rtx); + +/* Check for a legitimate floating point constant for FMOV. */ +bool aarch64_float_const_representable_p (rtx); + +#if defined (RTX_CODE) + +bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool); +enum machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); +rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); +rtx aarch64_load_tp (rtx); + +void aarch64_expand_compare_and_swap (rtx op[]); +void aarch64_split_compare_and_swap (rtx op[]); +void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); + +#endif /* RTX_CODE */ + +void aarch64_init_builtins (void); +rtx aarch64_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED); +tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED); + +tree +aarch64_builtin_vectorized_function (tree fndecl, + tree type_out, + tree type_in); + +extern void aarch64_split_combinev16qi (rtx operands[3]); +extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); +extern bool +aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); +#endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def new file mode 100644 index 000000000..c9b7570e5 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd-builtins.def @@ -0,0 +1,395 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* In the list below, the BUILTIN_ macros expand to create + builtins for each of the modes described by . When adding + new builtins to this list, a helpful idiom to follow is to add + a line for each pattern in the md file. Thus, ADDP, which has one + pattern defined for the VD_BHSI iterator, and one for DImode, has two + entries below. + + Parameter 1 is the 'type' of the intrinsic. This is used to + describe the type modifiers (for example; unsigned) applied to + each of the parameters to the intrinsic function. + + Parameter 2 is the name of the intrinsic. This is appended + to `__builtin_aarch64_` to give the intrinsic name + as exported to the front-ends. + + Parameter 3 describes how to map from the name to the CODE_FOR_ + macro holding the RTL pattern for the intrinsic. This mapping is: + 0 - CODE_FOR_aarch64_ + 1-9 - CODE_FOR_<1-9> + 10 - CODE_FOR_. */ + + BUILTIN_VD_RE (CREATE, create, 0) + BUILTIN_VDC (COMBINE, combine, 0) + BUILTIN_VB (BINOP, pmul, 0) + BUILTIN_VDQF (UNOP, sqrt, 2) + BUILTIN_VD_BHSI (BINOP, addp, 0) + VAR1 (UNOP, addp, 0, di) + BUILTIN_VDQ_BHSI (UNOP, clz, 2) + + BUILTIN_VALL (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VALL (GETLANE, be_checked_get_lane, 0) + + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) + BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) + BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) + BUILTIN_VDC (REINTERP, reinterpretv2si, 0) + BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) + BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) + BUILTIN_VQ (REINTERP, reinterpretv4si, 0) + BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv2di, 0) + BUILTIN_VQ (REINTERP, reinterpretv2df, 0) + + BUILTIN_VDQ_I (BINOP, dup_lane, 0) + /* Implemented by aarch64_qshl. */ + BUILTIN_VSDQ_I (BINOP, sqshl, 0) + BUILTIN_VSDQ_I (BINOP, uqshl, 0) + BUILTIN_VSDQ_I (BINOP, sqrshl, 0) + BUILTIN_VSDQ_I (BINOP, uqrshl, 0) + /* Implemented by aarch64_. */ + BUILTIN_VSDQ_I (BINOP, sqadd, 0) + BUILTIN_VSDQ_I (BINOP, uqadd, 0) + BUILTIN_VSDQ_I (BINOP, sqsub, 0) + BUILTIN_VSDQ_I (BINOP, uqsub, 0) + /* Implemented by aarch64_qadd. */ + BUILTIN_VSDQ_I (BINOP, suqadd, 0) + BUILTIN_VSDQ_I (BINOP, usqadd, 0) + + /* Implemented by aarch64_get_dreg. */ + BUILTIN_VDC (GETLANE, get_dregoi, 0) + BUILTIN_VDC (GETLANE, get_dregci, 0) + BUILTIN_VDC (GETLANE, get_dregxi, 0) + /* Implemented by aarch64_get_qreg. */ + BUILTIN_VQ (GETLANE, get_qregoi, 0) + BUILTIN_VQ (GETLANE, get_qregci, 0) + BUILTIN_VQ (GETLANE, get_qregxi, 0) + /* Implemented by aarch64_set_qreg. */ + BUILTIN_VQ (SETLANE, set_qregoi, 0) + BUILTIN_VQ (SETLANE, set_qregci, 0) + BUILTIN_VQ (SETLANE, set_qregxi, 0) + /* Implemented by aarch64_ld. */ + BUILTIN_VDC (LOADSTRUCT, ld2, 0) + BUILTIN_VDC (LOADSTRUCT, ld3, 0) + BUILTIN_VDC (LOADSTRUCT, ld4, 0) + /* Implemented by aarch64_ld. */ + BUILTIN_VQ (LOADSTRUCT, ld2, 0) + BUILTIN_VQ (LOADSTRUCT, ld3, 0) + BUILTIN_VQ (LOADSTRUCT, ld4, 0) + /* Implemented by aarch64_st. */ + BUILTIN_VDC (STORESTRUCT, st2, 0) + BUILTIN_VDC (STORESTRUCT, st3, 0) + BUILTIN_VDC (STORESTRUCT, st4, 0) + /* Implemented by aarch64_st. */ + BUILTIN_VQ (STORESTRUCT, st2, 0) + BUILTIN_VQ (STORESTRUCT, st3, 0) + BUILTIN_VQ (STORESTRUCT, st4, 0) + + BUILTIN_VQW (BINOP, saddl2, 0) + BUILTIN_VQW (BINOP, uaddl2, 0) + BUILTIN_VQW (BINOP, ssubl2, 0) + BUILTIN_VQW (BINOP, usubl2, 0) + BUILTIN_VQW (BINOP, saddw2, 0) + BUILTIN_VQW (BINOP, uaddw2, 0) + BUILTIN_VQW (BINOP, ssubw2, 0) + BUILTIN_VQW (BINOP, usubw2, 0) + /* Implemented by aarch64_l. */ + BUILTIN_VDW (BINOP, saddl, 0) + BUILTIN_VDW (BINOP, uaddl, 0) + BUILTIN_VDW (BINOP, ssubl, 0) + BUILTIN_VDW (BINOP, usubl, 0) + /* Implemented by aarch64_w. */ + BUILTIN_VDW (BINOP, saddw, 0) + BUILTIN_VDW (BINOP, uaddw, 0) + BUILTIN_VDW (BINOP, ssubw, 0) + BUILTIN_VDW (BINOP, usubw, 0) + /* Implemented by aarch64_h. */ + BUILTIN_VQ_S (BINOP, shadd, 0) + BUILTIN_VQ_S (BINOP, uhadd, 0) + BUILTIN_VQ_S (BINOP, srhadd, 0) + BUILTIN_VQ_S (BINOP, urhadd, 0) + /* Implemented by aarch64_hn. */ + BUILTIN_VQN (BINOP, addhn, 0) + BUILTIN_VQN (BINOP, raddhn, 0) + /* Implemented by aarch64_hn2. */ + BUILTIN_VQN (TERNOP, addhn2, 0) + BUILTIN_VQN (TERNOP, raddhn2, 0) + + BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0) + /* Implemented by aarch64_qmovn. */ + BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) + BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) + /* Implemented by aarch64_s. */ + BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) + BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) + + BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0) + /* Implemented by aarch64_sqdmll. */ + BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0) + /* Implemented by aarch64_sqdmll_n. */ + BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0) + BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0) + + BUILTIN_VSD_HSI (BINOP, sqdmull, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0) + BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0) + BUILTIN_VD_HSI (BINOP, sqdmull_n, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0) + /* Implemented by aarch64_sqdmulh. */ + BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0) + BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0) + /* Implemented by aarch64_sqdmulh_lane. */ + BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0) + BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0) + BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0) + + BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) + /* Implemented by aarch64_shl. */ + BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) + BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) + + BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) + VAR1 (SHIFTIMM, ashr_simd, 0, di) + BUILTIN_VDQ_I (SHIFTIMM, lshr, 3) + VAR1 (USHIFTIMM, lshr_simd, 0, di) + /* Implemented by aarch64_shr_n. */ + BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) + /* Implemented by aarch64_sra_n. */ + BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) + /* Implemented by aarch64_shll_n. */ + BUILTIN_VDW (SHIFTIMM, sshll_n, 0) + BUILTIN_VDW (SHIFTIMM, ushll_n, 0) + /* Implemented by aarch64_shll2_n. */ + BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) + BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) + /* Implemented by aarch64_qshrn_n. */ + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) + /* Implemented by aarch64_si_n. */ + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) + /* Implemented by aarch64_qshl_n. */ + BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) + + /* Implemented by aarch64_cm. */ + BUILTIN_VALLDI (BINOP, cmeq, 0) + BUILTIN_VALLDI (BINOP, cmge, 0) + BUILTIN_VALLDI (BINOP, cmgt, 0) + BUILTIN_VALLDI (BINOP, cmle, 0) + BUILTIN_VALLDI (BINOP, cmlt, 0) + /* Implemented by aarch64_cm. */ + BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) + + /* Implemented by reduc_plus_. */ + BUILTIN_VALL (UNOP, reduc_splus_, 10) + BUILTIN_VDQ (UNOP, reduc_uplus_, 10) + + /* Implemented by reduc__. */ + BUILTIN_VDQIF (UNOP, reduc_smax_, 10) + BUILTIN_VDQIF (UNOP, reduc_smin_, 10) + BUILTIN_VDQ_BHSI (UNOP, reduc_umax_, 10) + BUILTIN_VDQ_BHSI (UNOP, reduc_umin_, 10) + BUILTIN_VDQF (UNOP, reduc_smax_nan_, 10) + BUILTIN_VDQF (UNOP, reduc_smin_nan_, 10) + + /* Implemented by 3. + smax variants map to fmaxnm, + smax_nan variants map to fmax. */ + BUILTIN_VDQIF (BINOP, smax, 3) + BUILTIN_VDQIF (BINOP, smin, 3) + BUILTIN_VDQ_BHSI (BINOP, umax, 3) + BUILTIN_VDQ_BHSI (BINOP, umin, 3) + BUILTIN_VDQF (BINOP, smax_nan, 3) + BUILTIN_VDQF (BINOP, smin_nan, 3) + + /* Implemented by 2. */ + BUILTIN_VDQF (UNOP, btrunc, 2) + BUILTIN_VDQF (UNOP, ceil, 2) + BUILTIN_VDQF (UNOP, floor, 2) + BUILTIN_VDQF (UNOP, nearbyint, 2) + BUILTIN_VDQF (UNOP, rint, 2) + BUILTIN_VDQF (UNOP, round, 2) + BUILTIN_VDQF (UNOP, frintn, 2) + + /* Implemented by l2. */ + VAR1 (UNOP, lbtruncv2sf, 2, v2si) + VAR1 (UNOP, lbtruncv4sf, 2, v4si) + VAR1 (UNOP, lbtruncv2df, 2, v2di) + + VAR1 (UNOP, lbtruncuv2sf, 2, v2si) + VAR1 (UNOP, lbtruncuv4sf, 2, v4si) + VAR1 (UNOP, lbtruncuv2df, 2, v2di) + + VAR1 (UNOP, lroundv2sf, 2, v2si) + VAR1 (UNOP, lroundv4sf, 2, v4si) + VAR1 (UNOP, lroundv2df, 2, v2di) + /* Implemented by l2. */ + VAR1 (UNOP, lroundsf, 2, si) + VAR1 (UNOP, lrounddf, 2, di) + + VAR1 (UNOP, lrounduv2sf, 2, v2si) + VAR1 (UNOP, lrounduv4sf, 2, v4si) + VAR1 (UNOP, lrounduv2df, 2, v2di) + VAR1 (UNOP, lroundusf, 2, si) + VAR1 (UNOP, lroundudf, 2, di) + + VAR1 (UNOP, lceilv2sf, 2, v2si) + VAR1 (UNOP, lceilv4sf, 2, v4si) + VAR1 (UNOP, lceilv2df, 2, v2di) + + VAR1 (UNOP, lceiluv2sf, 2, v2si) + VAR1 (UNOP, lceiluv4sf, 2, v4si) + VAR1 (UNOP, lceiluv2df, 2, v2di) + VAR1 (UNOP, lceilusf, 2, si) + VAR1 (UNOP, lceiludf, 2, di) + + VAR1 (UNOP, lfloorv2sf, 2, v2si) + VAR1 (UNOP, lfloorv4sf, 2, v4si) + VAR1 (UNOP, lfloorv2df, 2, v2di) + + VAR1 (UNOP, lflooruv2sf, 2, v2si) + VAR1 (UNOP, lflooruv4sf, 2, v4si) + VAR1 (UNOP, lflooruv2df, 2, v2di) + VAR1 (UNOP, lfloorusf, 2, si) + VAR1 (UNOP, lfloorudf, 2, di) + + VAR1 (UNOP, lfrintnv2sf, 2, v2si) + VAR1 (UNOP, lfrintnv4sf, 2, v4si) + VAR1 (UNOP, lfrintnv2df, 2, v2di) + VAR1 (UNOP, lfrintnsf, 2, si) + VAR1 (UNOP, lfrintndf, 2, di) + + VAR1 (UNOP, lfrintnuv2sf, 2, v2si) + VAR1 (UNOP, lfrintnuv4sf, 2, v4si) + VAR1 (UNOP, lfrintnuv2df, 2, v2di) + VAR1 (UNOP, lfrintnusf, 2, si) + VAR1 (UNOP, lfrintnudf, 2, di) + + /* Implemented by 2. */ + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) + + VAR1 (UNOP, floatunsv2si, 2, v2sf) + VAR1 (UNOP, floatunsv4si, 2, v4sf) + VAR1 (UNOP, floatunsv2di, 2, v2df) + + /* Implemented by + aarch64_. */ + BUILTIN_VALL (BINOP, zip1, 0) + BUILTIN_VALL (BINOP, zip2, 0) + BUILTIN_VALL (BINOP, uzp1, 0) + BUILTIN_VALL (BINOP, uzp2, 0) + BUILTIN_VALL (BINOP, trn1, 0) + BUILTIN_VALL (BINOP, trn2, 0) + + /* Implemented by + aarch64_frecp. */ + BUILTIN_GPF (UNOP, frecpe, 0) + BUILTIN_GPF (BINOP, frecps, 0) + BUILTIN_GPF (UNOP, frecpx, 0) + + BUILTIN_VDQF (UNOP, frecpe, 0) + BUILTIN_VDQF (BINOP, frecps, 0) + + BUILTIN_VALLDI (UNOP, abs, 2) + + VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf) + VAR1 (BINOP, float_truncate_hi_, 0, v4sf) + + VAR1 (UNOP, float_extend_lo_, 0, v2df) + VAR1 (UNOP, float_truncate_lo_, 0, v2sf) + + /* Implemented by aarch64_ld1. */ + BUILTIN_VALL (LOAD1, ld1, 0) + + /* Implemented by aarch64_st1. */ + BUILTIN_VALL (STORE1, st1, 0) + + /* Implemented by fma4. */ + BUILTIN_VDQF (TERNOP, fma, 4) + + /* Implemented by aarch64_simd_bsl. */ + BUILTIN_VDQQH (BSL_P, simd_bsl, 0) + BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0) + BUILTIN_VALLDIF (BSL_S, simd_bsl, 0) + + /* Implemented by aarch64_crypto_aes. */ + VAR1 (BINOPU, crypto_aese, 0, v16qi) + VAR1 (BINOPU, crypto_aesd, 0, v16qi) + VAR1 (UNOPU, crypto_aesmc, 0, v16qi) + VAR1 (UNOPU, crypto_aesimc, 0, v16qi) + + /* Implemented by aarch64_crypto_sha1. */ + VAR1 (UNOPU, crypto_sha1h, 0, si) + VAR1 (BINOPU, crypto_sha1su1, 0, v4si) + VAR1 (TERNOPU, crypto_sha1c, 0, v4si) + VAR1 (TERNOPU, crypto_sha1m, 0, v4si) + VAR1 (TERNOPU, crypto_sha1p, 0, v4si) + VAR1 (TERNOPU, crypto_sha1su0, 0, v4si) + + /* Implemented by aarch64_crypto_sha256. */ + VAR1 (TERNOPU, crypto_sha256h, 0, v4si) + VAR1 (TERNOPU, crypto_sha256h2, 0, v4si) + VAR1 (BINOPU, crypto_sha256su0, 0, v4si) + VAR1 (TERNOPU, crypto_sha256su1, 0, v4si) + + /* Implemented by aarch64_crypto_pmull. */ + VAR1 (BINOPP, crypto_pmull, 0, di) + VAR1 (BINOPP, crypto_pmull, 0, v2di) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-simd.md b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md new file mode 100644 index 000000000..6048d605c --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-simd.md @@ -0,0 +1,4363 @@ +;; Machine description for AArch64 AdvSIMD architecture. +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_expand "mov" + [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] + "TARGET_SIMD" + " + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (mode, operands[1]); + " +) + +(define_expand "movmisalign" + [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] + "TARGET_SIMD" +{ + /* This pattern is not permitted to fail during expansion: if both arguments + are non-registers (e.g. memory := constant, which can be created by the + auto-vectorizer), force operand 1 into a register. */ + if (!register_operand (operands[0], mode) + && !register_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); +}) + +(define_insn "aarch64_simd_dup" + [(set (match_operand:VDQ 0 "register_operand" "=w, w") + (vec_duplicate:VDQ (match_operand: 1 "register_operand" "r, w")))] + "TARGET_SIMD" + "@ + dup\\t%0., %1 + dup\\t%0., %1.[0]" + [(set_attr "type" "neon_from_gp, neon_dup")] +) + +(define_insn "aarch64_simd_dup" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (vec_duplicate:VDQF (match_operand: 1 "register_operand" "w")))] + "TARGET_SIMD" + "dup\\t%0., %1.[0]" + [(set_attr "type" "neon_dup")] +) + +(define_insn "aarch64_dup_lane" + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL + (vec_select: + (match_operand:VALL 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + )))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "dup\\t%0., %1.[%2]"; + } + [(set_attr "type" "neon_dup")] +) + +(define_insn "aarch64_dup_lane_" + [(set (match_operand:VALL 0 "register_operand" "=w") + (vec_duplicate:VALL + (vec_select: + (match_operand: 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]) + )))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[2]))); + return "dup\\t%0., %1.[%2]"; + } + [(set_attr "type" "neon_dup")] +) + +(define_insn "*aarch64_simd_mov" + [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" + "=w, m, w, ?r, ?w, ?r, w") + (match_operand:VD 1 "aarch64_simd_general_operand" + "m, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + switch (which_alternative) + { + case 0: return "ldr\\t%d0, %1"; + case 1: return "str\\t%d1, %0"; + case 2: return "orr\t%0., %1., %1."; + case 3: return "umov\t%0, %1.d[0]"; + case 4: return "ins\t%0.d[0], %1"; + case 5: return "mov\t%0, %1"; + case 6: + return aarch64_output_simd_mov_immediate (operands[1], + mode, 64); + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\ + neon_logic, neon_to_gp, neon_from_gp,\ + mov_reg, neon_move")] +) + +(define_insn "*aarch64_simd_mov" + [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand" + "=w, m, w, ?r, ?w, ?r, w") + (match_operand:VQ 1 "aarch64_simd_general_operand" + "m, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + switch (which_alternative) + { + case 0: + return "ldr\\t%q0, %1"; + case 1: + return "str\\t%q1, %0"; + case 2: + return "orr\t%0., %1., %1."; + case 3: + case 4: + case 5: + return "#"; + case 6: + return aarch64_output_simd_mov_immediate (operands[1], mode, 128); + default: + gcc_unreachable (); + } +} + [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\ + neon_logic, multiple, multiple, multiple,\ + neon_move") + (set_attr "length" "4,4,4,8,8,8,4")] +) + +(define_split + [(set (match_operand:VQ 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (DImode, rdest); + src[0] = gen_rtx_REG (DImode, rsrc); + dest[1] = gen_rtx_REG (DImode, rdest + 1); + src[1] = gen_rtx_REG (DImode, rsrc + 1); + + aarch64_simd_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:VQ 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) + || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" + [(const_int 0)] +{ + aarch64_split_simd_move (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_split_simd_mov" + [(set (match_operand:VQ 0) + (match_operand:VQ 1))] + "TARGET_SIMD" + { + rtx dst = operands[0]; + rtx src = operands[1]; + + if (GP_REGNUM_P (REGNO (src))) + { + rtx src_low_part = gen_lowpart (mode, src); + rtx src_high_part = gen_highpart (mode, src); + + emit_insn + (gen_move_lo_quad_ (dst, src_low_part)); + emit_insn + (gen_move_hi_quad_ (dst, src_high_part)); + } + + else + { + rtx dst_low_part = gen_lowpart (mode, dst); + rtx dst_high_part = gen_highpart (mode, dst); + rtx lo = aarch64_simd_vect_par_cnst_half (mode, false); + rtx hi = aarch64_simd_vect_par_cnst_half (mode, true); + + emit_insn + (gen_aarch64_simd_mov_from_low (dst_low_part, src, lo)); + emit_insn + (gen_aarch64_simd_mov_from_high (dst_high_part, src, hi)); + } + DONE; + } +) + +(define_insn "aarch64_simd_mov_from_low" + [(set (match_operand: 0 "register_operand" "=r") + (vec_select: + (match_operand:VQ 1 "register_operand" "w") + (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] + "TARGET_SIMD && reload_completed" + "umov\t%0, %1.d[0]" + [(set_attr "type" "neon_to_gp") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_simd_mov_from_high" + [(set (match_operand: 0 "register_operand" "=r") + (vec_select: + (match_operand:VQ 1 "register_operand" "w") + (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] + "TARGET_SIMD && reload_completed" + "umov\t%0, %1.d[1]" + [(set_attr "type" "neon_to_gp") + (set_attr "length" "4") + ]) + +(define_insn "orn3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w")) + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "orn\t%0., %2., %1." + [(set_attr "type" "neon_logic")] +) + +(define_insn "bic3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w")) + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "bic\t%0., %2., %1." + [(set_attr "type" "neon_logic")] +) + +(define_insn "add3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (plus:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "add\t%0., %1., %2." + [(set_attr "type" "neon_add")] +) + +(define_insn "sub3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (minus:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "sub\t%0., %1., %2." + [(set_attr "type" "neon_sub")] +) + +(define_insn "mul3" + [(set (match_operand:VDQM 0 "register_operand" "=w") + (mult:VDQM (match_operand:VDQM 1 "register_operand" "w") + (match_operand:VDQM 2 "register_operand" "w")))] + "TARGET_SIMD" + "mul\t%0., %1., %2." + [(set_attr "type" "neon_mul_")] +) + +(define_insn "*aarch64_mul3_elt" + [(set (match_operand:VMUL 0 "register_operand" "=w") + (mult:VMUL + (vec_duplicate:VMUL + (vec_select: + (match_operand:VMUL 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VMUL 3 "register_operand" "w")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "mul\\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_mul__scalar")] +) + +(define_insn "*aarch64_mul3_elt_" + [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") + (mult:VMUL_CHANGE_NLANES + (vec_duplicate:VMUL_CHANGE_NLANES + (vec_select: + (match_operand: 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[2]))); + return "mul\\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_mul__scalar")] +) + +(define_insn "*aarch64_mul3_elt_to_128df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (mult:V2DF + (vec_duplicate:V2DF + (match_operand:DF 2 "register_operand" "w")) + (match_operand:V2DF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fmul\\t%0.2d, %1.2d, %2.d[0]" + [(set_attr "type" "neon_fp_mul_d_scalar_q")] +) + +(define_insn "*aarch64_mul3_elt_to_64v2df" + [(set (match_operand:DF 0 "register_operand" "=w") + (mult:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand")])) + (match_operand:DF 3 "register_operand" "w")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); + return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; + } + [(set_attr "type" "neon_fp_mul_d_scalar_q")] +) + +(define_insn "neg2" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))] + "TARGET_SIMD" + "neg\t%0., %1." + [(set_attr "type" "neon_neg")] +) + +(define_insn "abs2" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))] + "TARGET_SIMD" + "abs\t%0., %1." + [(set_attr "type" "neon_abs")] +) + +(define_insn "abd_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))))] + "TARGET_SIMD" + "sabd\t%0., %1., %2." + [(set_attr "type" "neon_abd")] +) + +(define_insn "aba_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w"))) + (match_operand:VDQ_BHSI 3 "register_operand" "0")))] + "TARGET_SIMD" + "saba\t%0., %1., %2." + [(set_attr "type" "neon_arith_acc")] +) + +(define_insn "fabd_3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (abs:VDQF (minus:VDQF + (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%0., %1., %2." + [(set_attr "type" "neon_fp_abd_")] +) + +(define_insn "*fabd_scalar3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (abs:GPF (minus:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fabd\t%0, %1, %2" + [(set_attr "type" "neon_fp_abd_")] +) + +(define_insn "and3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (and:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "and\t%0., %1., %2." + [(set_attr "type" "neon_logic")] +) + +(define_insn "ior3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ior:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "orr\t%0., %1., %2." + [(set_attr "type" "neon_logic")] +) + +(define_insn "xor3" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (xor:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "eor\t%0., %1., %2." + [(set_attr "type" "neon_logic")] +) + +(define_insn "one_cmpl2" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))] + "TARGET_SIMD" + "not\t%0., %1." + [(set_attr "type" "neon_logic")] +) + +(define_insn "aarch64_simd_vec_set" + [(set (match_operand:VQ_S 0 "register_operand" "=w,w") + (vec_merge:VQ_S + (vec_duplicate:VQ_S + (match_operand: 1 "register_operand" "r,w")) + (match_operand:VQ_S 3 "register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_SIMD" + { + int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2]))); + operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); + switch (which_alternative) + { + case 0: + return "ins\\t%0.[%p2], %w1"; + case 1: + return "ins\\t%0.[%p2], %1.[0]"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_from_gp, neon_ins")] +) + +(define_insn "aarch64_simd_lshr" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))] + "TARGET_SIMD" + "ushr\t%0., %1., %2" + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "aarch64_simd_ashr" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))] + "TARGET_SIMD" + "sshr\t%0., %1., %2" + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "aarch64_simd_imm_shl" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "aarch64_simd_lshift_imm" "Dl")))] + "TARGET_SIMD" + "shl\t%0., %1., %2" + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "aarch64_simd_reg_sshl" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")))] + "TARGET_SIMD" + "sshl\t%0., %1., %2." + [(set_attr "type" "neon_shift_reg")] +) + +(define_insn "aarch64_simd_reg_shl_unsigned" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_SIMD" + "ushl\t%0., %1., %2." + [(set_attr "type" "neon_shift_reg")] +) + +(define_insn "aarch64_simd_reg_shl_signed" + [(set (match_operand:VDQ 0 "register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_SIMD" + "sshl\t%0., %1., %2." + [(set_attr "type" "neon_shift_reg")] +) + +(define_expand "ashl3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")] + "TARGET_SIMD" +{ + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + int shift_amount; + + if (CONST_INT_P (operands[2])) + { + shift_amount = INTVAL (operands[2]); + if (shift_amount >= 0 && shift_amount < bit_width) + { + rtx tmp = aarch64_simd_gen_const_vector_dup (mode, + shift_amount); + emit_insn (gen_aarch64_simd_imm_shl (operands[0], + operands[1], + tmp)); + DONE; + } + else + { + operands[2] = force_reg (SImode, operands[2]); + } + } + else if (MEM_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + + if (REG_P (operands[2])) + { + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_aarch64_simd_dup (tmp, + convert_to_mode (mode, + operands[2], + 0))); + emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], + tmp)); + DONE; + } + else + FAIL; +} +) + +(define_expand "lshr3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")] + "TARGET_SIMD" +{ + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + int shift_amount; + + if (CONST_INT_P (operands[2])) + { + shift_amount = INTVAL (operands[2]); + if (shift_amount > 0 && shift_amount <= bit_width) + { + rtx tmp = aarch64_simd_gen_const_vector_dup (mode, + shift_amount); + emit_insn (gen_aarch64_simd_lshr (operands[0], + operands[1], + tmp)); + DONE; + } + else + operands[2] = force_reg (SImode, operands[2]); + } + else if (MEM_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + + if (REG_P (operands[2])) + { + rtx tmp = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (mode); + emit_insn (gen_negsi2 (tmp, operands[2])); + emit_insn (gen_aarch64_simd_dup (tmp1, + convert_to_mode (mode, + tmp, 0))); + emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], + operands[1], + tmp1)); + DONE; + } + else + FAIL; +} +) + +(define_expand "ashr3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")] + "TARGET_SIMD" +{ + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + int shift_amount; + + if (CONST_INT_P (operands[2])) + { + shift_amount = INTVAL (operands[2]); + if (shift_amount > 0 && shift_amount <= bit_width) + { + rtx tmp = aarch64_simd_gen_const_vector_dup (mode, + shift_amount); + emit_insn (gen_aarch64_simd_ashr (operands[0], + operands[1], + tmp)); + DONE; + } + else + operands[2] = force_reg (SImode, operands[2]); + } + else if (MEM_P (operands[2])) + { + operands[2] = force_reg (SImode, operands[2]); + } + + if (REG_P (operands[2])) + { + rtx tmp = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (mode); + emit_insn (gen_negsi2 (tmp, operands[2])); + emit_insn (gen_aarch64_simd_dup (tmp1, + convert_to_mode (mode, + tmp, 0))); + emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], + operands[1], + tmp1)); + DONE; + } + else + FAIL; +} +) + +(define_expand "vashl3" + [(match_operand:VDQ 0 "register_operand" "") + (match_operand:VDQ 1 "register_operand" "") + (match_operand:VDQ 2 "register_operand" "")] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], + operands[2])); + DONE; +}) + +;; Using mode VQ_S as there is no V2DImode neg! +;; Negating individual lanes most certainly offsets the +;; gain from vectorization. +(define_expand "vashr3" + [(match_operand:VQ_S 0 "register_operand" "") + (match_operand:VQ_S 1 "register_operand" "") + (match_operand:VQ_S 2 "register_operand" "")] + "TARGET_SIMD" +{ + rtx neg = gen_reg_rtx (mode); + emit (gen_neg2 (neg, operands[2])); + emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], operands[1], + neg)); + DONE; +}) + +;; DI vector shift +(define_expand "aarch64_ashr_simddi" + [(match_operand:DI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "w") + (match_operand:SI 2 "aarch64_shift_imm64_di" "")] + "TARGET_SIMD" + { + if (INTVAL (operands[2]) == 64) + emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1])); + else + emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); + DONE; + } +) + +;; SIMD shift by 64. This pattern is a special case as standard pattern does +;; not handle NEON shifts by 64. +(define_insn "aarch64_sshr_simddi" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI + [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))] + "TARGET_SIMD" + "sshr\t%d0, %d1, 64" + [(set_attr "type" "neon_shift_imm")] +) + +(define_expand "vlshr3" + [(match_operand:VQ_S 0 "register_operand" "") + (match_operand:VQ_S 1 "register_operand" "") + (match_operand:VQ_S 2 "register_operand" "")] + "TARGET_SIMD" +{ + rtx neg = gen_reg_rtx (mode); + emit (gen_neg2 (neg, operands[2])); + emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], operands[1], + neg)); + DONE; +}) + +(define_expand "aarch64_lshr_simddi" + [(match_operand:DI 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "w") + (match_operand:SI 2 "aarch64_shift_imm64_di" "")] + "TARGET_SIMD" + { + if (INTVAL (operands[2]) == 64) + emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1])); + else + emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); + DONE; + } +) + +;; SIMD shift by 64. This pattern is a special case as standard pattern does +;; not handle NEON shifts by 64. +(define_insn "aarch64_ushr_simddi" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI + [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))] + "TARGET_SIMD" + "ushr\t%d0, %d1, 64" + [(set_attr "type" "neon_shift_imm")] +) + +(define_expand "vec_set" + [(match_operand:VQ_S 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_aarch64_simd_vec_set (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; + } +) + +(define_insn "aarch64_simd_vec_setv2di" + [(set (match_operand:V2DI 0 "register_operand" "=w,w") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "register_operand" "r,w")) + (match_operand:V2DI 3 "register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_SIMD" + { + int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2]))); + operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); + switch (which_alternative) + { + case 0: + return "ins\\t%0.d[%p2], %1"; + case 1: + return "ins\\t%0.d[%p2], %1.d[0]"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_from_gp, neon_ins_q")] +) + +(define_expand "vec_setv2di" + [(match_operand:V2DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; + } +) + +(define_insn "aarch64_simd_vec_set" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (vec_merge:VDQF + (vec_duplicate:VDQF + (match_operand: 1 "register_operand" "w")) + (match_operand:VDQF 3 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "TARGET_SIMD" + { + int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2]))); + + operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); + return "ins\t%0.[%p2], %1.[0]"; + } + [(set_attr "type" "neon_ins")] +) + +(define_expand "vec_set" + [(match_operand:VDQF 0 "register_operand" "+w") + (match_operand: 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_aarch64_simd_vec_set (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; + } +) + + +(define_insn "aarch64_mla" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w") + (match_operand:VQ_S 3 "register_operand" "w")) + (match_operand:VQ_S 1 "register_operand" "0")))] + "TARGET_SIMD" + "mla\t%0., %2., %3." + [(set_attr "type" "neon_mla_")] +) + +(define_insn "*aarch64_mla_elt" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select: + (match_operand:VDQHS 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w")) + (match_operand:VDQHS 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "mla\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_mla__scalar")] +) + +(define_insn "*aarch64_mla_elt_" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (plus:VDQHS + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select: + (match_operand: 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w")) + (match_operand:VDQHS 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[2]))); + return "mla\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_mla__scalar")] +) + +(define_insn "aarch64_mls" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0") + (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w") + (match_operand:VQ_S 3 "register_operand" "w"))))] + "TARGET_SIMD" + "mls\t%0., %2., %3." + [(set_attr "type" "neon_mla_")] +) + +(define_insn "*aarch64_mls_elt" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 4 "register_operand" "0") + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select: + (match_operand:VDQHS 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "mls\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_mla__scalar")] +) + +(define_insn "*aarch64_mls_elt_" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (minus:VDQHS + (match_operand:VDQHS 4 "register_operand" "0") + (mult:VDQHS + (vec_duplicate:VDQHS + (vec_select: + (match_operand: 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQHS 3 "register_operand" "w"))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[2]))); + return "mls\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_mla__scalar")] +) + +;; Max/Min operations. +(define_insn "3" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VQ_S 2 "register_operand" "w")))] + "TARGET_SIMD" + "\t%0., %1., %2." + [(set_attr "type" "neon_minmax")] +) + +;; Move into low-half clearing high half to 0. + +(define_insn "move_lo_quad_" + [(set (match_operand:VQ 0 "register_operand" "=w,w,w") + (vec_concat:VQ + (match_operand: 1 "register_operand" "w,r,r") + (vec_duplicate: (const_int 0))))] + "TARGET_SIMD" + "@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" + [(set_attr "type" "neon_dup,fmov,neon_dup") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +) + +;; Move into high-half. + +(define_insn "aarch64_simd_move_hi_quad_" + [(set (match_operand:VQ 0 "register_operand" "+w,w") + (vec_concat:VQ + (vec_select: + (match_dup 0) + (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) + (match_operand: 1 "register_operand" "w,r")))] + "TARGET_SIMD" + "@ + ins\\t%0.d[1], %1.d[0] + ins\\t%0.d[1], %1" + [(set_attr "type" "neon_ins") + (set_attr "length" "4")] +) + +(define_expand "move_hi_quad_" + [(match_operand:VQ 0 "register_operand" "") + (match_operand: 1 "register_operand" "")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, false); + emit_insn (gen_aarch64_simd_move_hi_quad_ (operands[0], + operands[1], p)); + DONE; +}) + +;; Narrowing operations. + +;; For doubles. +(define_insn "aarch64_simd_vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=w") + (truncate: (match_operand:VQN 1 "register_operand" "w")))] + "TARGET_SIMD" + "xtn\\t%0., %1." + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VDN 1 "register_operand" "") + (match_operand:VDN 2 "register_operand" "")] + "TARGET_SIMD" +{ + rtx tempreg = gen_reg_rtx (mode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_move_lo_quad_ (tempreg, operands[lo])); + emit_insn (gen_move_hi_quad_ (tempreg, operands[hi])); + emit_insn (gen_aarch64_simd_vec_pack_trunc_ (operands[0], tempreg)); + DONE; +}) + +;; For quads. + +(define_insn "vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "+&w") + (vec_concat: + (truncate: (match_operand:VQN 1 "register_operand" "w")) + (truncate: (match_operand:VQN 2 "register_operand" "w"))))] + "TARGET_SIMD" + { + if (BYTES_BIG_ENDIAN) + return "xtn\\t%0., %2.\;xtn2\\t%0., %1."; + else + return "xtn\\t%0., %1.\;xtn2\\t%0., %2."; + } + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +;; Widening operations. + +(define_insn "aarch64_simd_vec_unpack_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (ANY_EXTEND: (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_lo_half" "") + )))] + "TARGET_SIMD" + "shll %0., %1., 0" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_simd_vec_unpack_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (ANY_EXTEND: (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_hi_half" "") + )))] + "TARGET_SIMD" + "shll2 %0., %1., 0" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (ANY_EXTEND: (match_operand:VQW 1 "register_operand"))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_simd_vec_unpack_hi_ (operands[0], + operands[1], p)); + DONE; + } +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (ANY_EXTEND: (match_operand:VQW 1 "register_operand" ""))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, false); + emit_insn (gen_aarch64_simd_vec_unpack_lo_ (operands[0], + operands[1], p)); + DONE; + } +) + +;; Widening arithmetic. + +(define_insn "*aarch64_mlal_lo" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "mlal\t%0., %2., %4." + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "*aarch64_mlal_hi" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3)))) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "mlal2\t%0., %2., %4." + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "*aarch64_mlsl_lo" + [(set (match_operand: 0 "register_operand" "=w") + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "mlsl\t%0., %2., %4." + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "*aarch64_mlsl_hi" + [(set (match_operand: 0 "register_operand" "=w") + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 4 "register_operand" "w") + (match_dup 3))))))] + "TARGET_SIMD" + "mlsl2\t%0., %2., %4." + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "*aarch64_mlal" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (mult: + (ANY_EXTEND: + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND: + (match_operand:VDW 2 "register_operand" "w"))) + (match_operand: 3 "register_operand" "0")))] + "TARGET_SIMD" + "mlal\t%0., %1., %2." + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "*aarch64_mlsl" + [(set (match_operand: 0 "register_operand" "=w") + (minus: + (match_operand: 1 "register_operand" "0") + (mult: + (ANY_EXTEND: + (match_operand:VDW 2 "register_operand" "w")) + (ANY_EXTEND: + (match_operand:VDW 3 "register_operand" "w")))))] + "TARGET_SIMD" + "mlsl\t%0., %2., %3." + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "aarch64_simd_vec_mult_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (ANY_EXTEND: (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "mull\\t%0., %1., %2." + [(set_attr "type" "neon_mul__long")] +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (ANY_EXTEND: (match_operand:VQW 1 "register_operand" "")) + (ANY_EXTEND: (match_operand:VQW 2 "register_operand" ""))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, false); + emit_insn (gen_aarch64_simd_vec_mult_lo_ (operands[0], + operands[1], + operands[2], p)); + DONE; + } +) + +(define_insn "aarch64_simd_vec_mult_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (ANY_EXTEND: (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "mull2\\t%0., %1., %2." + [(set_attr "type" "neon_mul__long")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (ANY_EXTEND: (match_operand:VQW 1 "register_operand" "")) + (ANY_EXTEND: (match_operand:VQW 2 "register_operand" ""))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_simd_vec_mult_hi_ (operands[0], + operands[1], + operands[2], p)); + DONE; + + } +) + +;; FP vector operations. +;; AArch64 AdvSIMD supports single-precision (32-bit) and +;; double-precision (64-bit) floating-point data types and arithmetic as +;; defined by the IEEE 754-2008 standard. This makes them vectorizable +;; without the need for -ffast-math or -funsafe-math-optimizations. +;; +;; Floating-point operations can raise an exception. Vectorizing such +;; operations are safe because of reasons explained below. +;; +;; ARMv8 permits an extension to enable trapped floating-point +;; exception handling, however this is an optional feature. In the +;; event of a floating-point exception being raised by vectorised +;; code then: +;; 1. If trapped floating-point exceptions are available, then a trap +;; will be taken when any lane raises an enabled exception. A trap +;; handler may determine which lane raised the exception. +;; 2. Alternatively a sticky exception flag is set in the +;; floating-point status register (FPSR). Software may explicitly +;; test the exception flags, in which case the tests will either +;; prevent vectorisation, allowing precise identification of the +;; failing operation, or if tested outside of vectorisable regions +;; then the specific operation and lane are not of interest. + +;; FP arithmetic operations. + +(define_insn "add3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (plus:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fadd\\t%0., %1., %2." + [(set_attr "type" "neon_fp_addsub_")] +) + +(define_insn "sub3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (minus:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fsub\\t%0., %1., %2." + [(set_attr "type" "neon_fp_addsub_")] +) + +(define_insn "mul3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (mult:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fmul\\t%0., %1., %2." + [(set_attr "type" "neon_fp_mul_")] +) + +(define_insn "div3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (div:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fdiv\\t%0., %1., %2." + [(set_attr "type" "neon_fp_div_")] +) + +(define_insn "neg2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fneg\\t%0., %1." + [(set_attr "type" "neon_fp_neg_")] +) + +(define_insn "abs2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fabs\\t%0., %1." + [(set_attr "type" "neon_fp_abs_")] +) + +(define_insn "fma4" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w") + (match_operand:VDQF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmla\\t%0., %1., %2." + [(set_attr "type" "neon_fp_mla_")] +) + +(define_insn "*aarch64_fma4_elt" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF + (vec_duplicate:VDQF + (vec_select: + (match_operand:VDQF 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQF 3 "register_operand" "w") + (match_operand:VDQF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "fmla\\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_fp_mla__scalar")] +) + +(define_insn "*aarch64_fma4_elt_" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (fma:VDQSF + (vec_duplicate:VDQSF + (vec_select: + (match_operand: 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQSF 3 "register_operand" "w") + (match_operand:VDQSF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[2]))); + return "fmla\\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_fp_mla__scalar")] +) + +(define_insn "*aarch64_fma4_elt_to_128df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (fma:V2DF + (vec_duplicate:V2DF + (match_operand:DF 1 "register_operand" "w")) + (match_operand:V2DF 2 "register_operand" "w") + (match_operand:V2DF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmla\\t%0.2d, %2.2d, %1.2d[0]" + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +(define_insn "*aarch64_fma4_elt_to_64v2df" + [(set (match_operand:DF 0 "register_operand" "=w") + (fma:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand")])) + (match_operand:DF 3 "register_operand" "w") + (match_operand:DF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); + return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; + } + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +(define_insn "fnma4" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF + (match_operand:VDQF 1 "register_operand" "w") + (neg:VDQF + (match_operand:VDQF 2 "register_operand" "w")) + (match_operand:VDQF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmls\\t%0., %1., %2." + [(set_attr "type" "neon_fp_mla_")] +) + +(define_insn "*aarch64_fnma4_elt" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (fma:VDQF + (neg:VDQF + (match_operand:VDQF 3 "register_operand" "w")) + (vec_duplicate:VDQF + (vec_select: + (match_operand:VDQF 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "fmls\\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_fp_mla__scalar")] +) + +(define_insn "*aarch64_fnma4_elt_" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (fma:VDQSF + (neg:VDQSF + (match_operand:VDQSF 3 "register_operand" "w")) + (vec_duplicate:VDQSF + (vec_select: + (match_operand: 1 "register_operand" "") + (parallel [(match_operand:SI 2 "immediate_operand")]))) + (match_operand:VDQSF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[2]))); + return "fmls\\t%0., %3., %1.[%2]"; + } + [(set_attr "type" "neon_fp_mla__scalar")] +) + +(define_insn "*aarch64_fnma4_elt_to_128df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (fma:V2DF + (neg:V2DF + (match_operand:V2DF 2 "register_operand" "w")) + (vec_duplicate:V2DF + (match_operand:DF 1 "register_operand" "w")) + (match_operand:V2DF 3 "register_operand" "0")))] + "TARGET_SIMD" + "fmls\\t%0.2d, %2.2d, %1.2d[0]" + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +(define_insn "*aarch64_fnma4_elt_to_64v2df" + [(set (match_operand:DF 0 "register_operand" "=w") + (fma:DF + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand")])) + (neg:DF + (match_operand:DF 3 "register_operand" "w")) + (match_operand:DF 4 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); + return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; + } + [(set_attr "type" "neon_fp_mla_d_scalar_q")] +) + +;; Vector versions of the floating-point frint patterns. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +(define_insn "2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] + FRINT))] + "TARGET_SIMD" + "frint\\t%0., %1." + [(set_attr "type" "neon_fp_round_")] +) + +;; Vector versions of the fcvt standard patterns. +;; Expands to lbtrunc, lround, lceil, lfloor +(define_insn "l2" + [(set (match_operand: 0 "register_operand" "=w") + (FIXUORS: (unspec: + [(match_operand:VDQF 1 "register_operand" "w")] + FCVT)))] + "TARGET_SIMD" + "fcvt\\t%0., %1." + [(set_attr "type" "neon_fp_to_int_")] +) + +(define_expand "2" + [(set (match_operand: 0 "register_operand") + (FIXUORS: (unspec: + [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ)))] + "TARGET_SIMD" + {}) + +(define_expand "2" + [(set (match_operand: 0 "register_operand") + (FIXUORS: (unspec: + [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ)))] + "TARGET_SIMD" + {}) + +(define_expand "ftrunc2" + [(set (match_operand:VDQF 0 "register_operand") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] + UNSPEC_FRINTZ))] + "TARGET_SIMD" + {}) + +(define_insn "2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (FLOATUORS:VDQF + (match_operand: 1 "register_operand" "w")))] + "TARGET_SIMD" + "cvtf\\t%0., %1." + [(set_attr "type" "neon_int_to_fp_")] +) + +;; Conversions between vectors of floats and doubles. +;; Contains a mix of patterns to match standard pattern names +;; and those for intrinsics. + +;; Float widening operations. + +(define_insn "vec_unpacks_lo_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (parallel [(const_int 0) (const_int 1)]) + )))] + "TARGET_SIMD" + "fcvtl\\t%0.2d, %1.2s" + [(set_attr "type" "neon_fp_cvt_widen_s")] +) + +(define_insn "aarch64_float_extend_lo_v2df" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (match_operand:V2SF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fcvtl\\t%0.2d, %1.2s" + [(set_attr "type" "neon_fp_cvt_widen_s")] +) + +(define_insn "vec_unpacks_hi_v4sf" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (parallel [(const_int 2) (const_int 3)]) + )))] + "TARGET_SIMD" + "fcvtl2\\t%0.2d, %1.4s" + [(set_attr "type" "neon_fp_cvt_widen_s")] +) + +;; Float narrowing operations. + +(define_insn "aarch64_float_truncate_lo_v2sf" + [(set (match_operand:V2SF 0 "register_operand" "=w") + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fcvtn\\t%0.2s, %1.2d" + [(set_attr "type" "neon_fp_cvt_narrow_d_q")] +) + +(define_insn "aarch64_float_truncate_hi_v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (vec_concat:V4SF + (match_operand:V2SF 1 "register_operand" "0") + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "w"))))] + "TARGET_SIMD" + "fcvtn2\\t%0.4s, %2.2d" + [(set_attr "type" "neon_fp_cvt_narrow_d_q")] +) + +(define_expand "vec_pack_trunc_v2df" + [(set (match_operand:V4SF 0 "register_operand") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "register_operand")) + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand")) + ))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2SFmode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); + emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], + tmp, operands[hi])); + DONE; + } +) + +(define_expand "vec_pack_trunc_df" + [(set (match_operand:V2SF 0 "register_operand") + (vec_concat:V2SF + (float_truncate:SF + (match_operand:DF 1 "register_operand")) + (float_truncate:SF + (match_operand:DF 2 "register_operand")) + ))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (V2SFmode); + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); + emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); + emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); + DONE; + } +) + +(define_insn "aarch64_vmls" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (minus:VDQF (match_operand:VDQF 1 "register_operand" "0") + (mult:VDQF (match_operand:VDQF 2 "register_operand" "w") + (match_operand:VDQF 3 "register_operand" "w"))))] + "TARGET_SIMD" + "fmls\\t%0., %2., %3." + [(set_attr "type" "neon_fp_mla__scalar")] +) + +;; FP Max/Min +;; Max/Min are introduced by idiom recognition by GCC's mid-end. An +;; expression like: +;; a = (b < c) ? b : c; +;; is idiom-matched as MIN_EXPR only if -ffinite-math-only is enabled +;; either explicitly or indirectly via -ffast-math. +;; +;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. +;; The 'smax' and 'smin' RTL standard pattern names do not specify which +;; operand will be returned when both operands are zero (i.e. they may not +;; honour signed zeroes), or when either operand is NaN. Therefore GCC +;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring +;; NaNs. + +(define_insn "3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")))] + "TARGET_SIMD" + "fnm\\t%0., %1., %2." + [(set_attr "type" "neon_fp_minmax_")] +) + +(define_insn "3" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + FMAXMIN_UNS))] + "TARGET_SIMD" + "\\t%0., %1., %2." + [(set_attr "type" "neon_fp_minmax_")] +) + +;; 'across lanes' add. + +(define_insn "reduc_plus_" + [(set (match_operand:VDQV 0 "register_operand" "=w") + (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] + SUADDV))] + "TARGET_SIMD" + "add\\t%0, %1." + [(set_attr "type" "neon_reduc_add")] +) + +(define_insn "reduc_plus_v2si" + [(set (match_operand:V2SI 0 "register_operand" "=w") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] + SUADDV))] + "TARGET_SIMD" + "addp\\t%0.2s, %1.2s, %1.2s" + [(set_attr "type" "neon_reduc_add")] +) + +(define_insn "reduc_splus_" + [(set (match_operand:V2F 0 "register_operand" "=w") + (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] + UNSPEC_FADDV))] + "TARGET_SIMD" + "faddp\\t%0, %1." + [(set_attr "type" "neon_fp_reduc_add_")] +) + +(define_insn "aarch64_addpv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] + UNSPEC_FADDV))] + "TARGET_SIMD" + "faddp\\t%0.4s, %1.4s, %1.4s" + [(set_attr "type" "neon_fp_reduc_add_s_q")] +) + +(define_expand "reduc_splus_v4sf" + [(set (match_operand:V4SF 0 "register_operand") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] + UNSPEC_FADDV))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1])); + emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0])); + DONE; +}) + +(define_insn "clz2" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "clz\\t%0., %1." + [(set_attr "type" "neon_cls")] +) + +;; 'across lanes' max and min ops. + +(define_insn "reduc__" + [(set (match_operand:VDQV_S 0 "register_operand" "=w") + (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] + MAXMINV))] + "TARGET_SIMD" + "v\\t%0, %1." + [(set_attr "type" "neon_reduc_minmax")] +) + +(define_insn "reduc__v2si" + [(set (match_operand:V2SI 0 "register_operand" "=w") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] + MAXMINV))] + "TARGET_SIMD" + "p\\t%0.2s, %1.2s, %1.2s" + [(set_attr "type" "neon_reduc_minmax")] +) + +(define_insn "reduc__" + [(set (match_operand:V2F 0 "register_operand" "=w") + (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] + FMAXMINV))] + "TARGET_SIMD" + "p\\t%0, %1." + [(set_attr "type" "neon_fp_reduc_minmax_")] +) + +(define_insn "reduc__v4sf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] + FMAXMINV))] + "TARGET_SIMD" + "v\\t%s0, %1.4s" + [(set_attr "type" "neon_fp_reduc_minmax_s_q")] +) + +;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register +;; allocation. +;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which +;; to select. +;; +;; Thus our BSL is of the form: +;; op0 = bsl (mask, op2, op3) +;; We can use any of: +;; +;; if (op0 = mask) +;; bsl mask, op1, op2 +;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) +;; bit op0, op2, mask +;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) +;; bif op0, op1, mask + +(define_insn "aarch64_simd_bsl_internal" + [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w") + (ior:VALLDIF + (and:VALLDIF + (match_operand: 1 "register_operand" " 0,w,w") + (match_operand:VALLDIF 2 "register_operand" " w,w,0")) + (and:VALLDIF + (not: + (match_dup: 1)) + (match_operand:VALLDIF 3 "register_operand" " w,0,w")) + ))] + "TARGET_SIMD" + "@ + bsl\\t%0., %2., %3. + bit\\t%0., %2., %1. + bif\\t%0., %3., %1." + [(set_attr "type" "neon_bsl")] +) + +(define_expand "aarch64_simd_bsl" + [(match_operand:VALLDIF 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand:VALLDIF 2 "register_operand") + (match_operand:VALLDIF 3 "register_operand")] + "TARGET_SIMD" +{ + /* We can't alias operands together if they have different modes. */ + operands[1] = gen_lowpart (mode, operands[1]); + emit_insn (gen_aarch64_simd_bsl_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "aarch64_vcond_internal" + [(set (match_operand:VDQ 0 "register_operand") + (if_then_else:VDQ + (match_operator 3 "comparison_operator" + [(match_operand:VDQ 4 "register_operand") + (match_operand:VDQ 5 "nonmemory_operand")]) + (match_operand:VDQ 1 "nonmemory_operand") + (match_operand:VDQ 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + int inverse = 0, has_zero_imm_form = 0; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx mask = gen_reg_rtx (mode); + + switch (GET_CODE (operands[3])) + { + case LE: + case LT: + case NE: + inverse = 1; + /* Fall through. */ + case GE: + case GT: + case EQ: + has_zero_imm_form = 1; + break; + case LEU: + case LTU: + inverse = 1; + break; + default: + break; + } + + if (!REG_P (operands[5]) + && (operands[5] != CONST0_RTX (mode) || !has_zero_imm_form)) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case LT: + case GE: + emit_insn (gen_aarch64_cmge (mask, operands[4], operands[5])); + break; + + case LE: + case GT: + emit_insn (gen_aarch64_cmgt (mask, operands[4], operands[5])); + break; + + case LTU: + case GEU: + emit_insn (gen_aarch64_cmgeu (mask, operands[4], operands[5])); + break; + + case LEU: + case GTU: + emit_insn (gen_aarch64_cmgtu (mask, operands[4], operands[5])); + break; + + case NE: + case EQ: + emit_insn (gen_aarch64_cmeq (mask, operands[4], operands[5])); + break; + + default: + gcc_unreachable (); + } + + if (inverse) + { + op1 = operands[2]; + op2 = operands[1]; + } + + /* If we have (a = (b CMP c) ? -1 : 0); + Then we can simply move the generated mask. */ + + if (op1 == CONSTM1_RTX (mode) + && op2 == CONST0_RTX (mode)) + emit_move_insn (operands[0], mask); + else + { + if (!REG_P (op1)) + op1 = force_reg (mode, op1); + if (!REG_P (op2)) + op2 = force_reg (mode, op2); + emit_insn (gen_aarch64_simd_bsl (operands[0], mask, + op1, op2)); + } + + DONE; +}) + +(define_expand "aarch64_vcond_internal" + [(set (match_operand:VDQF_COND 0 "register_operand") + (if_then_else:VDQF + (match_operator 3 "comparison_operator" + [(match_operand:VDQF 4 "register_operand") + (match_operand:VDQF 5 "nonmemory_operand")]) + (match_operand:VDQF_COND 1 "nonmemory_operand") + (match_operand:VDQF_COND 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + int inverse = 0; + int use_zero_form = 0; + int swap_bsl_operands = 0; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx mask = gen_reg_rtx (mode); + rtx tmp = gen_reg_rtx (mode); + + rtx (*base_comparison) (rtx, rtx, rtx); + rtx (*complimentary_comparison) (rtx, rtx, rtx); + + switch (GET_CODE (operands[3])) + { + case GE: + case GT: + case LE: + case LT: + case EQ: + if (operands[5] == CONST0_RTX (mode)) + { + use_zero_form = 1; + break; + } + /* Fall through. */ + default: + if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case UNLT: + inverse = 1; + /* Fall through. */ + case GE: + case UNGE: + case ORDERED: + case UNORDERED: + base_comparison = gen_aarch64_cmge; + complimentary_comparison = gen_aarch64_cmgt; + break; + case LE: + case UNLE: + inverse = 1; + /* Fall through. */ + case GT: + case UNGT: + base_comparison = gen_aarch64_cmgt; + complimentary_comparison = gen_aarch64_cmge; + break; + case EQ: + case NE: + case UNEQ: + base_comparison = gen_aarch64_cmeq; + complimentary_comparison = gen_aarch64_cmeq; + break; + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case LE: + case GT: + case GE: + case EQ: + /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. + As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: + a GE b -> a GE b + a GT b -> a GT b + a LE b -> b GE a + a LT b -> b GT a + a EQ b -> a EQ b + Note that there also exist direct comparison against 0 forms, + so catch those as a special case. */ + if (use_zero_form) + { + inverse = 0; + switch (GET_CODE (operands[3])) + { + case LT: + base_comparison = gen_aarch64_cmlt; + break; + case LE: + base_comparison = gen_aarch64_cmle; + break; + default: + /* Do nothing, other zero form cases already have the correct + base_comparison. */ + break; + } + } + + if (!inverse) + emit_insn (base_comparison (mask, operands[4], operands[5])); + else + emit_insn (complimentary_comparison (mask, operands[5], operands[4])); + break; + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case NE: + /* FCM returns false for lanes which are unordered, so if we use + the inverse of the comparison we actually want to emit, then + swap the operands to BSL, we will end up with the correct result. + Note that a NE NaN and NaN NE b are true for all a, b. + + Our transformations are: + a GE b -> !(b GT a) + a GT b -> !(b GE a) + a LE b -> !(a GT b) + a LT b -> !(a GE b) + a NE b -> !(a EQ b) */ + + if (inverse) + emit_insn (base_comparison (mask, operands[4], operands[5])); + else + emit_insn (complimentary_comparison (mask, operands[5], operands[4])); + + swap_bsl_operands = 1; + break; + case UNEQ: + /* We check (a > b || b > a). combining these comparisons give us + true iff !(a != b && a ORDERED b), swapping the operands to BSL + will then give us (a == b || a UNORDERED b) as intended. */ + + emit_insn (gen_aarch64_cmgt (mask, operands[4], operands[5])); + emit_insn (gen_aarch64_cmgt (tmp, operands[5], operands[4])); + emit_insn (gen_ior3 (mask, mask, tmp)); + swap_bsl_operands = 1; + break; + case UNORDERED: + /* Operands are ORDERED iff (a > b || b >= a). + Swapping the operands to BSL will give the UNORDERED case. */ + swap_bsl_operands = 1; + /* Fall through. */ + case ORDERED: + emit_insn (gen_aarch64_cmgt (tmp, operands[4], operands[5])); + emit_insn (gen_aarch64_cmge (mask, operands[5], operands[4])); + emit_insn (gen_ior3 (mask, mask, tmp)); + break; + default: + gcc_unreachable (); + } + + if (swap_bsl_operands) + { + op1 = operands[2]; + op2 = operands[1]; + } + + /* If we have (a = (b CMP c) ? -1 : 0); + Then we can simply move the generated mask. */ + + if (op1 == CONSTM1_RTX (mode) + && op2 == CONST0_RTX (mode)) + emit_move_insn (operands[0], mask); + else + { + if (!REG_P (op1)) + op1 = force_reg (mode, op1); + if (!REG_P (op2)) + op2 = force_reg (mode, op2); + emit_insn (gen_aarch64_simd_bsl (operands[0], mask, + op1, op2)); + } + + DONE; +}) + +(define_expand "vcond" + [(set (match_operand:VALL 0 "register_operand") + (if_then_else:VALL + (match_operator 3 "comparison_operator" + [(match_operand:VALL 4 "register_operand") + (match_operand:VALL 5 "nonmemory_operand")]) + (match_operand:VALL 1 "nonmemory_operand") + (match_operand:VALL 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) + +(define_expand "vcond" + [(set (match_operand: 0 "register_operand") + (if_then_else: + (match_operator 3 "comparison_operator" + [(match_operand:VDQF 4 "register_operand") + (match_operand:VDQF 5 "nonmemory_operand")]) + (match_operand: 1 "nonmemory_operand") + (match_operand: 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal ( + operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) + +(define_expand "vcondu" + [(set (match_operand:VDQ 0 "register_operand") + (if_then_else:VDQ + (match_operator 3 "comparison_operator" + [(match_operand:VDQ 4 "register_operand") + (match_operand:VDQ 5 "nonmemory_operand")]) + (match_operand:VDQ 1 "nonmemory_operand") + (match_operand:VDQ 2 "nonmemory_operand")))] + "TARGET_SIMD" +{ + emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])); + DONE; +}) + +;; Patterns for AArch64 SIMD Intrinsics. + +(define_expand "aarch64_create" + [(match_operand:VD_RE 0 "register_operand" "") + (match_operand:DI 1 "general_operand" "")] + "TARGET_SIMD" +{ + rtx src = gen_lowpart (mode, operands[1]); + emit_move_insn (operands[0], src); + DONE; +}) + +;; Lane extraction with sign extension to general purpose register. +(define_insn "*aarch64_get_lane_extend" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI + (vec_select: + (match_operand:VDQQH 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "smov\\t%0, %1.[%2]"; + } + [(set_attr "type" "neon_to_gp")] +) + +(define_insn "*aarch64_get_lane_zero_extendsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VDQQH 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + return "umov\\t%w0, %1.[%2]"; + } + [(set_attr "type" "neon_to_gp")] +) + +(define_expand "aarch64_be_checked_get_lane" + [(match_operand: 0 "aarch64_simd_nonimmediate_operand") + (match_operand:VALL 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + emit_insn (gen_aarch64_get_lane (operands[0], + operands[1], + operands[2])); + DONE; + } +) + +;; Lane extraction of a value, neither sign nor zero extension +;; is guaranteed so upper bits should be considered undefined. +(define_insn "aarch64_get_lane" + [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") + (vec_select: + (match_operand:VALL 1 "register_operand" "w, w, w") + (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] + "TARGET_SIMD" + { + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + switch (which_alternative) + { + case 0: + return "umov\\t%0, %1.[%2]"; + case 1: + return "dup\\t%0, %1.[%2]"; + case 2: + return "st1\\t{%1.}[%2], %0"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_to_gp, neon_dup, neon_store1_one_lane")] +) + +(define_expand "aarch64_get_lanedi" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv8qi" + [(match_operand:V8QI 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv4hi" + [(match_operand:V4HI 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2si" + [(match_operand:V2SI 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2sf" + [(match_operand:V2SF 0 "register_operand" "") + (match_operand:VDC 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretdi" + [(match_operand:DI 0 "register_operand" "") + (match_operand:VD_RE 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv8hi" + [(match_operand:V8HI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv4si" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2di" + [(match_operand:V2DI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "aarch64_reinterpretv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + +;; In this insn, operand 1 should be low, and operand 2 the high part of the +;; dest vector. + +(define_insn "*aarch64_combinez" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: + (match_operand:VDIC 1 "register_operand" "w") + (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))] + "TARGET_SIMD" + "mov\\t%0.8b, %1.8b" + [(set_attr "type" "neon_move")] +) + +(define_insn_and_split "aarch64_combine" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_simd_combine (operands[0], operands[1], operands[2]); + DONE; +} +[(set_attr "type" "multiple")] +) + +(define_expand "aarch64_simd_combine" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: (match_operand:VDC 1 "register_operand" "w") + (match_operand:VDC 2 "register_operand" "w")))] + "TARGET_SIMD" + { + emit_insn (gen_move_lo_quad_ (operands[0], operands[1])); + emit_insn (gen_move_hi_quad_ (operands[0], operands[2])); + DONE; + } +[(set_attr "type" "multiple")] +) + +;; l. + +(define_insn "aarch64_l_hi_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ADDSUB: (ANY_EXTEND: (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "l2\t%0., %1., %2." + [(set_attr "type" "neon__long")] +) + +(define_insn "aarch64_l_lo_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ADDSUB: (ANY_EXTEND: (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) + (ANY_EXTEND: (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_dup 3)))))] + "TARGET_SIMD" + "l\t%0., %1., %2." + [(set_attr "type" "neon__long")] +) + + +(define_expand "aarch64_saddl2" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_saddl_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_uaddl2" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_uaddl_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_ssubl2" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_ssubl_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_usubl2" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_usubl_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_insn "aarch64_l" + [(set (match_operand: 0 "register_operand" "=w") + (ADDSUB: (ANY_EXTEND: + (match_operand:VDW 1 "register_operand" "w")) + (ANY_EXTEND: + (match_operand:VDW 2 "register_operand" "w"))))] + "TARGET_SIMD" + "l %0., %1., %2." + [(set_attr "type" "neon__long")] +) + +;; w. + +(define_insn "aarch64_w" + [(set (match_operand: 0 "register_operand" "=w") + (ADDSUB: (match_operand: 1 "register_operand" "w") + (ANY_EXTEND: + (match_operand:VDW 2 "register_operand" "w"))))] + "TARGET_SIMD" + "w\\t%0., %1., %2." + [(set_attr "type" "neon__widen")] +) + +(define_insn "aarch64_w2_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ADDSUB: (match_operand: 1 "register_operand" "w") + (ANY_EXTEND: + (vec_select: + (match_operand:VQW 2 "register_operand" "w") + (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] + "TARGET_SIMD" + "w2\\t%0., %1., %2." + [(set_attr "type" "neon__widen")] +) + +(define_expand "aarch64_saddw2" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_saddw2_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_uaddw2" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_uaddw2_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + + +(define_expand "aarch64_ssubw2" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_ssubw2_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "aarch64_usubw2" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQW 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_usubw2_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +;; h. + +(define_insn "aarch64_h" + [(set (match_operand:VQ_S 0 "register_operand" "=w") + (unspec:VQ_S [(match_operand:VQ_S 1 "register_operand" "w") + (match_operand:VQ_S 2 "register_operand" "w")] + HADDSUB))] + "TARGET_SIMD" + "h\\t%0., %1., %2." + [(set_attr "type" "neon__halve")] +) + +;; hn. + +(define_insn "aarch64_hn" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "register_operand" "w")] + ADDSUBHN))] + "TARGET_SIMD" + "hn\\t%0., %1., %2." + [(set_attr "type" "neon__halve_narrow_q")] +) + +(define_insn "aarch64_hn2" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 "register_operand" "0") + (match_operand:VQN 2 "register_operand" "w") + (match_operand:VQN 3 "register_operand" "w")] + ADDSUBHN2))] + "TARGET_SIMD" + "hn2\\t%0., %2., %3." + [(set_attr "type" "neon__halve_narrow_q")] +) + +;; pmul. + +(define_insn "aarch64_pmul" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] + UNSPEC_PMUL))] + "TARGET_SIMD" + "pmul\\t%0., %1., %2." + [(set_attr "type" "neon_mul_")] +) + +;; q + +(define_insn "aarch64_" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:VSDQ_I 2 "register_operand" "w")))] + "TARGET_SIMD" + "\\t%0, %1, %2" + [(set_attr "type" "neon_")] +) + +;; suqadd and usqadd + +(define_insn "aarch64_qadd" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") + (match_operand:VSDQ_I 2 "register_operand" "w")] + USSUQADD))] + "TARGET_SIMD" + "qadd\\t%0, %2" + [(set_attr "type" "neon_qadd")] +) + +;; sqmovun + +(define_insn "aarch64_sqmovun" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VSQN_HSDI 1 "register_operand" "w")] + UNSPEC_SQXTUN))] + "TARGET_SIMD" + "sqxtun\\t%0, %1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + ) + +;; sqmovn and uqmovn + +(define_insn "aarch64_qmovn" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VSQN_HSDI 1 "register_operand" "w")] + SUQMOVN))] + "TARGET_SIMD" + "qxtn\\t%0, %1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] + ) + +;; q + +(define_insn "aarch64_s" + [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w") + (UNQOPS:VSDQ_I_BHSI + (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))] + "TARGET_SIMD" + "s\\t%0, %1" + [(set_attr "type" "neon_")] +) + +;; sqdmulh. + +(define_insn "aarch64_sqdmulh" + [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") + (unspec:VSDQ_HSI + [(match_operand:VSDQ_HSI 1 "register_operand" "w") + (match_operand:VSDQ_HSI 2 "register_operand" "w")] + VQDMULH))] + "TARGET_SIMD" + "sqdmulh\\t%0, %1, %2" + [(set_attr "type" "neon_sat_mul_")] +) + +;; sqdmulh_lane + +(define_insn "aarch64_sqdmulh_lane" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return \"sqdmulh\\t%0., %1., %2.[%3]\";" + [(set_attr "type" "neon_sat_mul__scalar")] +) + +(define_insn "aarch64_sqdmulh_laneq" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return \"sqdmulh\\t%0., %1., %2.[%3]\";" + [(set_attr "type" "neon_sat_mul__scalar")] +) + +(define_insn "aarch64_sqdmulh_lane" + [(set (match_operand:SD_HSI 0 "register_operand" "=w") + (unspec:SD_HSI + [(match_operand:SD_HSI 1 "register_operand" "w") + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return \"sqdmulh\\t%0, %1, %2.[%3]\";" + [(set_attr "type" "neon_sat_mul__scalar")] +) + +;; vqdml[sa]l + +(define_insn "aarch64_sqdmll" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VSD_HSI 2 "register_operand" "w")) + (sign_extend: + (match_operand:VSD_HSI 3 "register_operand" "w"))) + (const_int 1))))] + "TARGET_SIMD" + "sqdmll\\t%0, %2, %3" + [(set_attr "type" "neon_sat_mla__long")] +) + +;; vqdml[sa]l_lane + +(define_insn "aarch64_sqdmll_lane_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmll_lane_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_expand "aarch64_sqdmlal_lane" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "aarch64_sqdmlal_laneq" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + emit_insn (gen_aarch64_sqdmlal_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "aarch64_sqdmlsl_lane" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +(define_expand "aarch64_sqdmlsl_laneq" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "0") + (match_operand:VSD_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + emit_insn (gen_aarch64_sqdmlsl_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + DONE; +}) + +;; vqdml[sa]l_n + +(define_insn "aarch64_sqdmll_n" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (match_operand: 3 "register_operand" "")))) + (const_int 1))))] + "TARGET_SIMD" + "sqdmll\\t%0, %2, %3.[0]" + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +;; sqdml[as]l2 + +(define_insn "aarch64_sqdmll2_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 3 "register_operand" "w") + (match_dup 4)))) + (const_int 1))))] + "TARGET_SIMD" + "sqdmll2\\t%0, %2, %3" + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_expand "aarch64_sqdmlal2" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_sqdmlal2_internal (operands[0], operands[1], + operands[2], operands[3], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_sqdmlsl2_internal (operands[0], operands[1], + operands[2], operands[3], p)); + DONE; +}) + +;; vqdml[sa]l2_lane + +(define_insn "aarch64_sqdmll2_lane_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]) + )))) + (const_int 1))))] + "TARGET_SIMD" + { + operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); + return + "sqdmll2\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_expand "aarch64_sqdmlal2_lane" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlal2_laneq" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2_lane" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode) / 2); + emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2_laneq" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], + operands[2], operands[3], + operands[4], p)); + DONE; +}) + +(define_insn "aarch64_sqdmll2_n_internal" + [(set (match_operand: 0 "register_operand" "=w") + (SBINQOPS: + (match_operand: 1 "register_operand" "0") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (match_operand: 3 "register_operand" "")))) + (const_int 1))))] + "TARGET_SIMD" + "sqdmll2\\t%0, %2, %3.[0]" + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_expand "aarch64_sqdmlal2_n" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_sqdmlal2_n_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_sqdmlsl2_n" + [(match_operand: 0 "register_operand" "=w") + (match_operand: 1 "register_operand" "w") + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand: 3 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_sqdmlsl2_n_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +;; vqdmull + +(define_insn "aarch64_sqdmull" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VSD_HSI 1 "register_operand" "w")) + (sign_extend: + (match_operand:VSD_HSI 2 "register_operand" "w"))) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull\\t%0, %1, %2" + [(set_attr "type" "neon_sat_mul__long")] +) + +;; vqdmull_lane + +(define_insn "aarch64_sqdmull_lane_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "aarch64_sqdmull_lane_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_expand "aarch64_sqdmull_lane" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VSD_HSI 1 "register_operand" "w") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + emit_insn (gen_aarch64_sqdmull_lane_internal (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "aarch64_sqdmull_laneq" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VD_HSI 1 "register_operand" "w") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + emit_insn (gen_aarch64_sqdmull_lane_internal + (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +;; vqdmull_n + +(define_insn "aarch64_sqdmull_n" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 1 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (match_operand: 2 "register_operand" ""))) + ) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull\\t%0, %1, %2.[0]" + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +;; vqdmull2 + + + +(define_insn "aarch64_sqdmull2_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_dup 3))) + ) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull2\\t%0, %1, %2" + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_expand "aarch64_sqdmull2" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand: 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_sqdmull2_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +;; vqdmull2_lane + +(define_insn "aarch64_sqdmull2_lane_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) + )) + (const_int 1)))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "sqdmull2\\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_expand "aarch64_sqdmull2_lane" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode) / 2); + emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +(define_expand "aarch64_sqdmull2_laneq" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand: 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], + operands[2], operands[3], + p)); + DONE; +}) + +;; vqdmull2_n + +(define_insn "aarch64_sqdmull2_n_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_duplicate: + (match_operand: 2 "register_operand" ""))) + ) + (const_int 1)))] + "TARGET_SIMD" + "sqdmull2\\t%0, %1, %2.[0]" + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_expand "aarch64_sqdmull2_n" + [(match_operand: 0 "register_operand" "=w") + (match_operand:VQ_HSI 1 "register_operand" "w") + (match_operand: 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (mode, true); + emit_insn (gen_aarch64_sqdmull2_n_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +;; vshl + +(define_insn "aarch64_shl" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI + [(match_operand:VSDQ_I_DI 1 "register_operand" "w") + (match_operand:VSDQ_I_DI 2 "register_operand" "w")] + VSHL))] + "TARGET_SIMD" + "shl\\t%0, %1, %2"; + [(set_attr "type" "neon_shift_reg")] +) + + +;; vqshl + +(define_insn "aarch64_qshl" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (unspec:VSDQ_I + [(match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:VSDQ_I 2 "register_operand" "w")] + VQSHL))] + "TARGET_SIMD" + "qshl\\t%0, %1, %2"; + [(set_attr "type" "neon_sat_shift_reg")] +) + +;; vshll_n + +(define_insn "aarch64_shll_n" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VDW 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 0, bit_width + 1); + if (INTVAL (operands[2]) == bit_width) + { + return \"shll\\t%0., %1., %2\"; + } + else { + return \"shll\\t%0., %1., %2\"; + }" + [(set_attr "type" "neon_shift_imm_long")] +) + +;; vshll_high_n + +(define_insn "aarch64_shll2_n" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 0, bit_width + 1); + if (INTVAL (operands[2]) == bit_width) + { + return \"shll2\\t%0., %1., %2\"; + } + else { + return \"shll2\\t%0., %1., %2\"; + }" + [(set_attr "type" "neon_shift_imm_long")] +) + +;; vrshr_n + +(define_insn "aarch64_shr_n" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VRSHR_N))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 1, bit_width + 1); + return \"shr\\t%0, %1, %2\";" + [(set_attr "type" "neon_sat_shift_imm")] +) + +;; v(r)sra_n + +(define_insn "aarch64_sra_n" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") + (match_operand:VSDQ_I_DI 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + VSRA))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[3], 1, bit_width + 1); + return \"sra\\t%0, %2, %3\";" + [(set_attr "type" "neon_shift_acc")] +) + +;; vsi_n + +(define_insn "aarch64_si_n" + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") + (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") + (match_operand:VSDQ_I_DI 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + VSLRI))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[3], 1 - , + bit_width - + 1); + return \"si\\t%0, %2, %3\";" + [(set_attr "type" "neon_shift_imm")] +) + +;; vqshl(u) + +(define_insn "aarch64_qshl_n" + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VQSHL_N))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 0, bit_width); + return \"qshl\\t%0, %1, %2\";" + [(set_attr "type" "neon_sat_shift_imm")] +) + + +;; vq(r)shr(u)n_n + +(define_insn "aarch64_qshrn_n" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand:VSQN_HSDI 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + VQSHRN_N))] + "TARGET_SIMD" + "* + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + aarch64_simd_const_bounds (operands[2], 1, bit_width + 1); + return \"qshrn\\t%0, %1, %2\";" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + + +;; cm(eq|ge|gt|lt|le) +;; Note, we have constraints for Dz and Z as different expanders +;; have different ideas of what should be passed to this pattern. + +(define_insn "aarch64_cm" + [(set (match_operand: 0 "register_operand" "=w,w") + (neg: + (COMPARISONS: + (match_operand:VDQ 1 "register_operand" "w,w") + (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz") + )))] + "TARGET_SIMD" + "@ + cm\t%0, %, % + cm\t%0, %1, #0" + [(set_attr "type" "neon_compare, neon_compare_zero")] +) + +(define_insn_and_split "aarch64_cmdi" + [(set (match_operand:DI 0 "register_operand" "=w,w,r") + (neg:DI + (COMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,w,r") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") + ))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" + "@ + cm\t%d0, %d, %d + cm\t%d0, %d1, #0 + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); + rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); + rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] +) + +;; cm(hs|hi) + +(define_insn "aarch64_cm" + [(set (match_operand: 0 "register_operand" "=w") + (neg: + (UCOMPARISONS: + (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w") + )))] + "TARGET_SIMD" + "cm\t%0, %, %" + [(set_attr "type" "neon_compare")] +) + +(define_insn_and_split "aarch64_cmdi" + [(set (match_operand:DI 0 "register_operand" "=w,r") + (neg:DI + (UCOMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,r") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") + ))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" + "@ + cm\t%d0, %d, %d + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + enum machine_mode mode = CCmode; + rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); + rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "type" "neon_compare, neon_compare_zero")] +) + +;; cmtst + +(define_insn "aarch64_cmtst" + [(set (match_operand: 0 "register_operand" "=w") + (neg: + (ne: + (and:VDQ + (match_operand:VDQ 1 "register_operand" "w") + (match_operand:VDQ 2 "register_operand" "w")) + (vec_duplicate: (const_int 0)))))] + "TARGET_SIMD" + "cmtst\t%0, %1, %2" + [(set_attr "type" "neon_tst")] +) + +(define_insn_and_split "aarch64_cmtstdi" + [(set (match_operand:DI 0 "register_operand" "=w,r") + (neg:DI + (ne:DI + (and:DI + (match_operand:DI 1 "register_operand" "w,r") + (match_operand:DI 2 "register_operand" "w,r")) + (const_int 0)))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" + "@ + cmtst\t%d0, %d1, %d2 + #" + "reload_completed + /* We need to prevent the split from + happening in the 'w' constraint cases. */ + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); + enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); + rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); + rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + [(set_attr "type" "neon_tst")] +) + +;; fcm(eq|ge|gt|le|lt) + +(define_insn "aarch64_cm" + [(set (match_operand: 0 "register_operand" "=w,w") + (neg: + (COMPARISONS: + (match_operand:VALLF 1 "register_operand" "w,w") + (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") + )))] + "TARGET_SIMD" + "@ + fcm\t%0, %, % + fcm\t%0, %1, 0" + [(set_attr "type" "neon_fp_compare_")] +) + +;; fac(ge|gt) +;; Note we can also handle what would be fac(le|lt) by +;; generating fac(ge|gt). + +(define_insn "*aarch64_fac" + [(set (match_operand: 0 "register_operand" "=w") + (neg: + (FAC_COMPARISONS: + (abs:VALLF (match_operand:VALLF 1 "register_operand" "w")) + (abs:VALLF (match_operand:VALLF 2 "register_operand" "w")) + )))] + "TARGET_SIMD" + "fac\t%0, %, %" + [(set_attr "type" "neon_fp_compare_")] +) + +;; addp + +(define_insn "aarch64_addp" + [(set (match_operand:VD_BHSI 0 "register_operand" "=w") + (unspec:VD_BHSI + [(match_operand:VD_BHSI 1 "register_operand" "w") + (match_operand:VD_BHSI 2 "register_operand" "w")] + UNSPEC_ADDP))] + "TARGET_SIMD" + "addp\t%0, %1, %2" + [(set_attr "type" "neon_reduc_add")] +) + +(define_insn "aarch64_addpdi" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI + [(match_operand:V2DI 1 "register_operand" "w")] + UNSPEC_ADDP))] + "TARGET_SIMD" + "addp\t%d0, %1.2d" + [(set_attr "type" "neon_reduc_add")] +) + +;; sqrt + +(define_insn "sqrt2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] + "TARGET_SIMD" + "fsqrt\\t%0., %1." + [(set_attr "type" "neon_fp_sqrt_")] +) + +;; Patterns for vector struct loads and stores. + +(define_insn "vec_load_lanesoi" + [(set (match_operand:OI 0 "register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD2))] + "TARGET_SIMD" + "ld2\\t{%S0. - %T0.}, %1" + [(set_attr "type" "neon_load2_2reg")] +) + +(define_insn "vec_store_lanesoi" + [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:OI [(match_operand:OI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" + "st2\\t{%S1. - %T1.}, %0" + [(set_attr "type" "neon_store2_2reg")] +) + +(define_insn "vec_load_lanesci" + [(set (match_operand:CI 0 "register_operand" "=w") + (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD3))] + "TARGET_SIMD" + "ld3\\t{%S0. - %U0.}, %1" + [(set_attr "type" "neon_load3_3reg")] +) + +(define_insn "vec_store_lanesci" + [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:CI [(match_operand:CI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" + "st3\\t{%S1. - %U1.}, %0" + [(set_attr "type" "neon_store3_3reg")] +) + +(define_insn "vec_load_lanesxi" + [(set (match_operand:XI 0 "register_operand" "=w") + (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD4))] + "TARGET_SIMD" + "ld4\\t{%S0. - %V0.}, %1" + [(set_attr "type" "neon_load4_4reg")] +) + +(define_insn "vec_store_lanesxi" + [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:XI [(match_operand:XI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" + "st4\\t{%S1. - %V1.}, %0" + [(set_attr "type" "neon_store4_4reg")] +) + +;; Reload patterns for AdvSIMD register list operands. + +(define_expand "mov" + [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))] + "TARGET_SIMD" +{ + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (mode, operands[1]); + } +}) + +(define_insn "*aarch64_mov" + [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") + (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" + +{ + switch (which_alternative) + { + case 0: return "#"; + case 1: return "st1\\t{%S1.16b - %1.16b}, %0"; + case 2: return "ld1\\t{%S0.16b - %0.16b}, %1"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_move,neon_store_reg_q,\ + neon_load_reg_q") + (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] +) + +(define_insn "aarch64_be_ld1" + [(set (match_operand:VALLDI 0 "register_operand" "=w") + (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD1))] + "TARGET_SIMD" + "ld1\\t{%0}, %1" + [(set_attr "type" "neon_load1_1reg")] +) + +(define_insn "aarch64_be_st1" + [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")] + UNSPEC_ST1))] + "TARGET_SIMD" + "st1\\t{%1}, %0" + [(set_attr "type" "neon_store1_1reg")] +) + +(define_split + [(set (match_operand:OI 0 "register_operand" "") + (match_operand:OI 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (TFmode, rdest); + src[0] = gen_rtx_REG (TFmode, rsrc); + dest[1] = gen_rtx_REG (TFmode, rdest + 1); + src[1] = gen_rtx_REG (TFmode, rsrc + 1); + + aarch64_simd_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:CI 0 "register_operand" "") + (match_operand:CI 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[3], src[3]; + + dest[0] = gen_rtx_REG (TFmode, rdest); + src[0] = gen_rtx_REG (TFmode, rsrc); + dest[1] = gen_rtx_REG (TFmode, rdest + 1); + src[1] = gen_rtx_REG (TFmode, rsrc + 1); + dest[2] = gen_rtx_REG (TFmode, rdest + 2); + src[2] = gen_rtx_REG (TFmode, rsrc + 2); + + aarch64_simd_disambiguate_copy (operands, dest, src, 3); +}) + +(define_split + [(set (match_operand:XI 0 "register_operand" "") + (match_operand:XI 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 6) (match_dup 7))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[4], src[4]; + + dest[0] = gen_rtx_REG (TFmode, rdest); + src[0] = gen_rtx_REG (TFmode, rsrc); + dest[1] = gen_rtx_REG (TFmode, rdest + 1); + src[1] = gen_rtx_REG (TFmode, rsrc + 1); + dest[2] = gen_rtx_REG (TFmode, rdest + 2); + src[2] = gen_rtx_REG (TFmode, rsrc + 2); + dest[3] = gen_rtx_REG (TFmode, rdest + 3); + src[3] = gen_rtx_REG (TFmode, rsrc + 3); + + aarch64_simd_disambiguate_copy (operands, dest, src, 4); +}) + +(define_insn "aarch64_ld2_dreg" + [(set (match_operand:OI 0 "register_operand" "=w") + (subreg:OI + (vec_concat: + (vec_concat: + (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD2) + (vec_duplicate:VD (const_int 0))) + (vec_concat: + (unspec:VD [(match_dup 1)] + UNSPEC_LD2) + (vec_duplicate:VD (const_int 0)))) 0))] + "TARGET_SIMD" + "ld2\\t{%S0. - %T0.}, %1" + [(set_attr "type" "neon_load2_2reg")] +) + +(define_insn "aarch64_ld2_dreg" + [(set (match_operand:OI 0 "register_operand" "=w") + (subreg:OI + (vec_concat: + (vec_concat: + (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD2) + (const_int 0)) + (vec_concat: + (unspec:DX [(match_dup 1)] + UNSPEC_LD2) + (const_int 0))) 0))] + "TARGET_SIMD" + "ld1\\t{%S0.1d - %T0.1d}, %1" + [(set_attr "type" "neon_load1_2reg")] +) + +(define_insn "aarch64_ld3_dreg" + [(set (match_operand:CI 0 "register_operand" "=w") + (subreg:CI + (vec_concat: + (vec_concat: + (vec_concat: + (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD3) + (vec_duplicate:VD (const_int 0))) + (vec_concat: + (unspec:VD [(match_dup 1)] + UNSPEC_LD3) + (vec_duplicate:VD (const_int 0)))) + (vec_concat: + (unspec:VD [(match_dup 1)] + UNSPEC_LD3) + (vec_duplicate:VD (const_int 0)))) 0))] + "TARGET_SIMD" + "ld3\\t{%S0. - %U0.}, %1" + [(set_attr "type" "neon_load3_3reg")] +) + +(define_insn "aarch64_ld3_dreg" + [(set (match_operand:CI 0 "register_operand" "=w") + (subreg:CI + (vec_concat: + (vec_concat: + (vec_concat: + (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD3) + (const_int 0)) + (vec_concat: + (unspec:DX [(match_dup 1)] + UNSPEC_LD3) + (const_int 0))) + (vec_concat: + (unspec:DX [(match_dup 1)] + UNSPEC_LD3) + (const_int 0))) 0))] + "TARGET_SIMD" + "ld1\\t{%S0.1d - %U0.1d}, %1" + [(set_attr "type" "neon_load1_3reg")] +) + +(define_insn "aarch64_ld4_dreg" + [(set (match_operand:XI 0 "register_operand" "=w") + (subreg:XI + (vec_concat: + (vec_concat: + (vec_concat: + (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0))) + (vec_concat: + (unspec:VD [(match_dup 1)] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0)))) + (vec_concat: + (vec_concat: + (unspec:VD [(match_dup 1)] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0))) + (vec_concat: + (unspec:VD [(match_dup 1)] + UNSPEC_LD4) + (vec_duplicate:VD (const_int 0))))) 0))] + "TARGET_SIMD" + "ld4\\t{%S0. - %V0.}, %1" + [(set_attr "type" "neon_load4_4reg")] +) + +(define_insn "aarch64_ld4_dreg" + [(set (match_operand:XI 0 "register_operand" "=w") + (subreg:XI + (vec_concat: + (vec_concat: + (vec_concat: + (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")] + UNSPEC_LD4) + (const_int 0)) + (vec_concat: + (unspec:DX [(match_dup 1)] + UNSPEC_LD4) + (const_int 0))) + (vec_concat: + (vec_concat: + (unspec:DX [(match_dup 1)] + UNSPEC_LD4) + (const_int 0)) + (vec_concat: + (unspec:DX [(match_dup 1)] + UNSPEC_LD4) + (const_int 0)))) 0))] + "TARGET_SIMD" + "ld1\\t{%S0.1d - %V0.1d}, %1" + [(set_attr "type" "neon_load1_4reg")] +) + +(define_expand "aarch64_ld" + [(match_operand:VSTRUCT 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + emit_insn (gen_aarch64_ld_dreg (operands[0], mem)); + DONE; +}) + +(define_expand "aarch64_ld1" + [(match_operand:VALL 0 "register_operand") + (match_operand:DI 1 "register_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_be_ld1 (operands[0], mem)); + else + emit_move_insn (operands[0], mem); + DONE; +}) + +(define_expand "aarch64_ld" + [(match_operand:VSTRUCT 0 "register_operand" "=w") + (match_operand:DI 1 "register_operand" "r") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[1]); + + emit_insn (gen_vec_load_lanes (operands[0], mem)); + DONE; +}) + +;; Expanders for builtins to extract vector registers from large +;; opaque integer modes. + +;; D-register list. + +(define_expand "aarch64_get_dreg" + [(match_operand:VDC 0 "register_operand" "=w") + (match_operand:VSTRUCT 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_SIMD" +{ + int part = INTVAL (operands[2]); + rtx temp = gen_reg_rtx (mode); + int offset = part * 16; + + emit_move_insn (temp, gen_rtx_SUBREG (mode, operands[1], offset)); + emit_move_insn (operands[0], gen_lowpart (mode, temp)); + DONE; +}) + +;; Q-register list. + +(define_expand "aarch64_get_qreg" + [(match_operand:VQ 0 "register_operand" "=w") + (match_operand:VSTRUCT 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_SIMD" +{ + int part = INTVAL (operands[2]); + int offset = part * 16; + + emit_move_insn (operands[0], + gen_rtx_SUBREG (mode, operands[1], offset)); + DONE; +}) + +;; Permuted-store expanders for neon intrinsics. + +;; Permute instructions + +;; vec_perm support + +(define_expand "vec_perm_const" + [(match_operand:VALL 0 "register_operand") + (match_operand:VALL 1 "register_operand") + (match_operand:VALL 2 "register_operand") + (match_operand: 3)] + "TARGET_SIMD" +{ + if (aarch64_expand_vec_perm_const (operands[0], operands[1], + operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "vec_perm" + [(match_operand:VB 0 "register_operand") + (match_operand:VB 1 "register_operand") + (match_operand:VB 2 "register_operand") + (match_operand:VB 3 "register_operand")] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" +{ + aarch64_expand_vec_perm (operands[0], operands[1], + operands[2], operands[3]); + DONE; +}) + +(define_insn "aarch64_tbl1" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w")] + UNSPEC_TBL))] + "TARGET_SIMD" + "tbl\\t%0., {%1.16b}, %2." + [(set_attr "type" "neon_tbl1")] +) + +;; Two source registers. + +(define_insn "aarch64_tbl2v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") + (match_operand:V16QI 2 "register_operand" "w")] + UNSPEC_TBL))] + "TARGET_SIMD" + "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" + [(set_attr "type" "neon_tbl2_q")] +) + +(define_insn_and_split "aarch64_combinev16qi" + [(set (match_operand:OI 0 "register_operand" "=w") + (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") + (match_operand:V16QI 2 "register_operand" "w")] + UNSPEC_CONCAT))] + "TARGET_SIMD" + "#" + "&& reload_completed" + [(const_int 0)] +{ + aarch64_split_combinev16qi (operands); + DONE; +} +[(set_attr "type" "multiple")] +) + +(define_insn "aarch64_" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") + (match_operand:VALL 2 "register_operand" "w")] + PERMUTE))] + "TARGET_SIMD" + "\\t%0., %1., %2." + [(set_attr "type" "neon_permute")] +) + +(define_insn "aarch64_st2_dreg" + [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:TI [(match_operand:OI 1 "register_operand" "w") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" + "st2\\t{%S1. - %T1.}, %0" + [(set_attr "type" "neon_store2_2reg")] +) + +(define_insn "aarch64_st2_dreg" + [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:TI [(match_operand:OI 1 "register_operand" "w") + (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" + "st1\\t{%S1.1d - %T1.1d}, %0" + [(set_attr "type" "neon_store1_2reg")] +) + +(define_insn "aarch64_st3_dreg" + [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:EI [(match_operand:CI 1 "register_operand" "w") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" + "st3\\t{%S1. - %U1.}, %0" + [(set_attr "type" "neon_store3_3reg")] +) + +(define_insn "aarch64_st3_dreg" + [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:EI [(match_operand:CI 1 "register_operand" "w") + (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" + "st1\\t{%S1.1d - %U1.1d}, %0" + [(set_attr "type" "neon_store1_3reg")] +) + +(define_insn "aarch64_st4_dreg" + [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:OI [(match_operand:XI 1 "register_operand" "w") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" + "st4\\t{%S1. - %V1.}, %0" + [(set_attr "type" "neon_store4_4reg")] +) + +(define_insn "aarch64_st4_dreg" + [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:OI [(match_operand:XI 1 "register_operand" "w") + (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" + "st1\\t{%S1.1d - %V1.1d}, %0" + [(set_attr "type" "neon_store1_4reg")] +) + +(define_expand "aarch64_st" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:VSTRUCT 1 "register_operand" "w") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + + emit_insn (gen_aarch64_st_dreg (mem, operands[1])); + DONE; +}) + +(define_expand "aarch64_st" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:VSTRUCT 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + + emit_insn (gen_vec_store_lanes (mem, operands[1])); + DONE; +}) + +(define_expand "aarch64_st1" + [(match_operand:DI 0 "register_operand") + (match_operand:VALL 1 "register_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_be_st1 (mem, operands[1])); + else + emit_move_insn (mem, operands[1]); + DONE; +}) + +;; Expander for builtins to insert vector registers into large +;; opaque integer modes. + +;; Q-register list. We don't need a D-reg inserter as we zero +;; extend them in arm_neon.h and insert the resulting Q-regs. + +(define_expand "aarch64_set_qreg" + [(match_operand:VSTRUCT 0 "register_operand" "+w") + (match_operand:VSTRUCT 1 "register_operand" "0") + (match_operand:VQ 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_SIMD" +{ + int part = INTVAL (operands[3]); + int offset = part * 16; + + emit_move_insn (operands[0], operands[1]); + emit_move_insn (gen_rtx_SUBREG (mode, operands[0], offset), + operands[2]); + DONE; +}) + +;; Standard pattern name vec_init. + +(define_expand "vec_init" + [(match_operand:VALL 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SIMD" +{ + aarch64_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +(define_insn "*aarch64_simd_ld1r" + [(set (match_operand:VALLDI 0 "register_operand" "=w") + (vec_duplicate:VALLDI + (match_operand: 1 "aarch64_simd_struct_operand" "Utv")))] + "TARGET_SIMD" + "ld1r\\t{%0.}, %1" + [(set_attr "type" "neon_load1_all_lanes")] +) + +(define_insn "aarch64_frecpe" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] + UNSPEC_FRECPE))] + "TARGET_SIMD" + "frecpe\\t%0., %1." + [(set_attr "type" "neon_fp_recpe_")] +) + +(define_insn "aarch64_frecp" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FRECP))] + "TARGET_SIMD" + "frecp\\t%0, %1" + [(set_attr "type" "neon_fp_recp_")] +) + +(define_insn "aarch64_frecps" + [(set (match_operand:VALLF 0 "register_operand" "=w") + (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") + (match_operand:VALLF 2 "register_operand" "w")] + UNSPEC_FRECPS))] + "TARGET_SIMD" + "frecps\\t%0, %1, %2" + [(set_attr "type" "neon_fp_recps_")] +) + +;; Standard pattern name vec_extract. + +(define_expand "vec_extract" + [(match_operand: 0 "aarch64_simd_nonimmediate_operand" "") + (match_operand:VALL 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_SIMD" +{ + emit_insn + (gen_aarch64_get_lane (operands[0], operands[1], operands[2])); + DONE; +}) + +;; aes + +(define_insn "aarch64_crypto_aesv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "register_operand" "w")] + CRYPTO_AES))] + "TARGET_SIMD && TARGET_CRYPTO" + "aes\\t%0.16b, %2.16b" + [(set_attr "type" "crypto_aes")] +) + +(define_insn "aarch64_crypto_aesv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] + CRYPTO_AESMC))] + "TARGET_SIMD && TARGET_CRYPTO" + "aes\\t%0.16b, %1.16b" + [(set_attr "type" "crypto_aes")] +) + +;; sha1 + +(define_insn "aarch64_crypto_sha1hsi" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 + "register_operand" "w")] + UNSPEC_SHA1H))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1h\\t%s0, %s1" + [(set_attr "type" "crypto_sha1_fast")] +) + +(define_insn "aarch64_crypto_sha1su1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w")] + UNSPEC_SHA1SU1))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1su1\\t%0.4s, %2.4s" + [(set_attr "type" "crypto_sha1_fast")] +) + +(define_insn "aarch64_crypto_sha1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + CRYPTO_SHA1))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1\\t%q0, %s2, %3.4s" + [(set_attr "type" "crypto_sha1_slow")] +) + +(define_insn "aarch64_crypto_sha1su0v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + UNSPEC_SHA1SU0))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha1su0\\t%0.4s, %2.4s, %3.4s" + [(set_attr "type" "crypto_sha1_xor")] +) + +;; sha256 + +(define_insn "aarch64_crypto_sha256hv4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + CRYPTO_SHA256))] + "TARGET_SIMD && TARGET_CRYPTO" + "sha256h\\t%q0, %q2, %3.4s" + [(set_attr "type" "crypto_sha256_slow")] +) + +(define_insn "aarch64_crypto_sha256su0v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w")] + UNSPEC_SHA256SU0))] + "TARGET_SIMD &&TARGET_CRYPTO" + "sha256su0\\t%0.4s, %2.4s" + [(set_attr "type" "crypto_sha256_fast")] +) + +(define_insn "aarch64_crypto_sha256su1v4si" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "register_operand" "w") + (match_operand:V4SI 3 "register_operand" "w")] + UNSPEC_SHA256SU1))] + "TARGET_SIMD &&TARGET_CRYPTO" + "sha256su1\\t%0.4s, %2.4s, %3.4s" + [(set_attr "type" "crypto_sha256_slow")] +) + +;; pmull + +(define_insn "aarch64_crypto_pmulldi" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w")] + UNSPEC_PMULL))] + "TARGET_SIMD && TARGET_CRYPTO" + "pmull\\t%0.1q, %1.1d, %2.1d" + [(set_attr "type" "neon_mul_d_long")] +) + +(define_insn "aarch64_crypto_pmullv2di" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") + (match_operand:V2DI 2 "register_operand" "w")] + UNSPEC_PMULL2))] + "TARGET_SIMD && TARGET_CRYPTO" + "pmull2\\t%0.1q, %1.2d, %2.2d" + [(set_attr "type" "neon_mul_d_long")] +) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-tune.md b/gcc-4.9/gcc/config/aarch64/aarch64-tune.md new file mode 100644 index 000000000..b7e40e0b5 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64-tune.md @@ -0,0 +1,5 @@ +;; -*- buffer-read-only: t -*- +;; Generated automatically by gentune.sh from aarch64-cores.def +(define_attr "tune" + "cortexa53,cortexa15,cortexa57cortexa53" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c new file mode 100644 index 000000000..ebd58c007 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -0,0 +1,8544 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "insn-codes.h" +#include "rtl.h" +#include "insn-attr.h" +#include "tree.h" +#include "stringpool.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "regs.h" +#include "df.h" +#include "hard-reg-set.h" +#include "output.h" +#include "expr.h" +#include "reload.h" +#include "toplev.h" +#include "target.h" +#include "target-def.h" +#include "targhooks.h" +#include "ggc.h" +#include "function.h" +#include "tm_p.h" +#include "recog.h" +#include "langhooks.h" +#include "diagnostic-core.h" +#include "pointer-set.h" +#include "hash-table.h" +#include "vec.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "optabs.h" +#include "dwarf2.h" +#include "cfgloop.h" +#include "tree-vectorizer.h" +#include "config/arm/aarch-cost-tables.h" + +/* Defined for convenience. */ +#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) + +/* Classifies an address. + + ADDRESS_REG_IMM + A simple base register plus immediate offset. + + ADDRESS_REG_WB + A base register indexed by immediate offset with writeback. + + ADDRESS_REG_REG + A base register indexed by (optionally scaled) register. + + ADDRESS_REG_UXTW + A base register indexed by (optionally scaled) zero-extended register. + + ADDRESS_REG_SXTW + A base register indexed by (optionally scaled) sign-extended register. + + ADDRESS_LO_SUM + A LO_SUM rtx with a base register and "LO12" symbol relocation. + + ADDRESS_SYMBOLIC: + A constant symbolic address, in pc-relative literal pool. */ + +enum aarch64_address_type { + ADDRESS_REG_IMM, + ADDRESS_REG_WB, + ADDRESS_REG_REG, + ADDRESS_REG_UXTW, + ADDRESS_REG_SXTW, + ADDRESS_LO_SUM, + ADDRESS_SYMBOLIC +}; + +struct aarch64_address_info { + enum aarch64_address_type type; + rtx base; + rtx offset; + int shift; + enum aarch64_symbol_type symbol_type; +}; + +struct simd_immediate_info +{ + rtx value; + int shift; + int element_width; + bool mvn; + bool msl; +}; + +/* The current code model. */ +enum aarch64_code_model aarch64_cmodel; + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS 1 +#endif + +static bool aarch64_lra_p (void); +static bool aarch64_composite_type_p (const_tree, enum machine_mode); +static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode, + const_tree, + enum machine_mode *, int *, + bool *); +static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; +static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; +static void aarch64_override_options_after_change (void); +static bool aarch64_vector_mode_supported_p (enum machine_mode); +static unsigned bit_count (unsigned HOST_WIDE_INT); +static bool aarch64_const_vec_all_same_int_p (rtx, + HOST_WIDE_INT, HOST_WIDE_INT); + +static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel); + +/* The processor for which instructions should be scheduled. */ +enum aarch64_processor aarch64_tune = cortexa53; + +/* The current tuning set. */ +const struct tune_params *aarch64_tune_params; + +/* Mask to specify which instructions we are allowed to generate. */ +unsigned long aarch64_isa_flags = 0; + +/* Mask to specify which instruction scheduling options should be used. */ +unsigned long aarch64_tune_flags = 0; + +/* Tuning parameters. */ + +#if HAVE_DESIGNATED_INITIALIZERS +#define NAMED_PARAM(NAME, VAL) .NAME = (VAL) +#else +#define NAMED_PARAM(NAME, VAL) (VAL) +#endif + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_addrcost_table generic_addrcost_table = +{ + NAMED_PARAM (pre_modify, 0), + NAMED_PARAM (post_modify, 0), + NAMED_PARAM (register_offset, 0), + NAMED_PARAM (register_extend, 0), + NAMED_PARAM (imm_offset, 0) +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_regmove_cost generic_regmove_cost = +{ + NAMED_PARAM (GP2GP, 1), + NAMED_PARAM (GP2FP, 2), + NAMED_PARAM (FP2GP, 2), + /* We currently do not provide direct support for TFmode Q->Q move. + Therefore we need to raise the cost above 2 in order to have + reload handle the situation. */ + NAMED_PARAM (FP2FP, 4) +}; + +/* Generic costs for vector insn classes. */ +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct cpu_vector_cost generic_vector_cost = +{ + NAMED_PARAM (scalar_stmt_cost, 1), + NAMED_PARAM (scalar_load_cost, 1), + NAMED_PARAM (scalar_store_cost, 1), + NAMED_PARAM (vec_stmt_cost, 1), + NAMED_PARAM (vec_to_scalar_cost, 1), + NAMED_PARAM (scalar_to_vec_cost, 1), + NAMED_PARAM (vec_align_load_cost, 1), + NAMED_PARAM (vec_unalign_load_cost, 1), + NAMED_PARAM (vec_unalign_store_cost, 1), + NAMED_PARAM (vec_store_cost, 1), + NAMED_PARAM (cond_taken_branch_cost, 3), + NAMED_PARAM (cond_not_taken_branch_cost, 1) +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif +static const struct tune_params generic_tunings = +{ + &cortexa57_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &generic_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 2) +}; + +static const struct tune_params cortexa53_tunings = +{ + &cortexa53_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &generic_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 2) +}; + +static const struct tune_params cortexa57_tunings = +{ + &cortexa57_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &generic_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 3) +}; + +/* A processor implementing AArch64. */ +struct processor +{ + const char *const name; + enum aarch64_processor core; + const char *arch; + const unsigned long flags; + const struct tune_params *const tune; +}; + +/* Processor cores implementing AArch64. */ +static const struct processor all_cores[] = +{ +#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ + {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings}, +#include "aarch64-cores.def" +#undef AARCH64_CORE + {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings}, + {NULL, aarch64_none, NULL, 0, NULL} +}; + +/* Architectures implementing AArch64. */ +static const struct processor all_architectures[] = +{ +#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ + {NAME, CORE, #ARCH, FLAGS, NULL}, +#include "aarch64-arches.def" +#undef AARCH64_ARCH + {NULL, aarch64_none, NULL, 0, NULL} +}; + +/* Target specification. These are populated as commandline arguments + are processed, or NULL if not specified. */ +static const struct processor *selected_arch; +static const struct processor *selected_cpu; +static const struct processor *selected_tune; + +#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) + +/* An ISA extension in the co-processor and main instruction set space. */ +struct aarch64_option_extension +{ + const char *const name; + const unsigned long flags_on; + const unsigned long flags_off; +}; + +/* ISA extensions in AArch64. */ +static const struct aarch64_option_extension all_extensions[] = +{ +#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \ + {NAME, FLAGS_ON, FLAGS_OFF}, +#include "aarch64-option-extensions.def" +#undef AARCH64_OPT_EXTENSION + {NULL, 0, 0} +}; + +/* Used to track the size of an address when generating a pre/post + increment address. */ +static enum machine_mode aarch64_memory_reference_mode; + +/* Used to force GTY into this file. */ +static GTY(()) int gty_dummy; + +/* A table of valid AArch64 "bitmask immediate" values for + logical instructions. */ + +#define AARCH64_NUM_BITMASKS 5334 +static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS]; + +/* Did we set flag_omit_frame_pointer just so + aarch64_frame_pointer_required would be called? */ +static bool faked_omit_frame_pointer; + +typedef enum aarch64_cond_code +{ + AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL, + AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT, + AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV +} +aarch64_cc; + +#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1)) + +/* The condition codes of the processor, and the inverse function. */ +static const char * const aarch64_condition_codes[] = +{ + "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" +}; + +/* Provide a mapping from gcc register numbers to dwarf register numbers. */ +unsigned +aarch64_dbx_register_number (unsigned regno) +{ + if (GP_REGNUM_P (regno)) + return AARCH64_DWARF_R0 + regno - R0_REGNUM; + else if (regno == SP_REGNUM) + return AARCH64_DWARF_SP; + else if (FP_REGNUM_P (regno)) + return AARCH64_DWARF_V0 + regno - V0_REGNUM; + + /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no + equivalent DWARF register. */ + return DWARF_FRAME_REGISTERS; +} + +/* Return TRUE if MODE is any of the large INT modes. */ +static bool +aarch64_vect_struct_mode_p (enum machine_mode mode) +{ + return mode == OImode || mode == CImode || mode == XImode; +} + +/* Return TRUE if MODE is any of the vector modes. */ +static bool +aarch64_vector_mode_p (enum machine_mode mode) +{ + return aarch64_vector_mode_supported_p (mode) + || aarch64_vect_struct_mode_p (mode); +} + +/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ +static bool +aarch64_array_mode_supported_p (enum machine_mode mode, + unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_SIMD + && AARCH64_VALID_SIMD_QREG_MODE (mode) + && (nelems >= 2 && nelems <= 4)) + return true; + + return false; +} + +/* Implement HARD_REGNO_NREGS. */ + +int +aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode) +{ + switch (aarch64_regno_regclass (regno)) + { + case FP_REGS: + case FP_LO_REGS: + return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; + default: + return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + } + gcc_unreachable (); +} + +/* Implement HARD_REGNO_MODE_OK. */ + +int +aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode) +{ + if (GET_MODE_CLASS (mode) == MODE_CC) + return regno == CC_REGNUM; + + if (regno == SP_REGNUM) + /* The purpose of comparing with ptr_mode is to support the + global register variable associated with the stack pointer + register via the syntax of asm ("wsp") in ILP32. */ + return mode == Pmode || mode == ptr_mode; + + if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) + return mode == Pmode; + + if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode)) + return 1; + + if (FP_REGNUM_P (regno)) + { + if (aarch64_vect_struct_mode_p (mode)) + return + (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM; + else + return 1; + } + + return 0; +} + +/* Return true if calls to DECL should be treated as + long-calls (ie called via a register). */ +static bool +aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) +{ + return false; +} + +/* Return true if calls to symbol-ref SYM should be treated as + long-calls (ie called via a register). */ +bool +aarch64_is_long_call_p (rtx sym) +{ + return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); +} + +/* Return true if the offsets to a zero/sign-extract operation + represent an expression that matches an extend operation. The + operands represent the paramters from + + (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ +bool +aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, + rtx extract_imm) +{ + HOST_WIDE_INT mult_val, extract_val; + + if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) + return false; + + mult_val = INTVAL (mult_imm); + extract_val = INTVAL (extract_imm); + + if (extract_val > 8 + && extract_val < GET_MODE_BITSIZE (mode) + && exact_log2 (extract_val & ~7) > 0 + && (extract_val & 7) <= 4 + && mult_val == (1 << (extract_val & 7))) + return true; + + return false; +} + +/* Emit an insn that's a simple single-set. Both the operands must be + known to be valid. */ +inline static rtx +emit_set_insn (rtx x, rtx y) +{ + return emit_insn (gen_rtx_SET (VOIDmode, x, y)); +} + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for register 0 in the proper mode. */ +rtx +aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) +{ + enum machine_mode mode = SELECT_CC_MODE (code, x, y); + rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); + + emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + return cc_reg; +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY(()) rtx tls_get_addr_libfunc; + +rtx +aarch64_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +/* Return the TLS model to use for ADDR. */ + +static enum tls_model +tls_symbolic_operand_type (rtx addr) +{ + enum tls_model tls_kind = TLS_MODEL_NONE; + rtx sym, addend; + + if (GET_CODE (addr) == CONST) + { + split_const (addr, &sym, &addend); + if (GET_CODE (sym) == SYMBOL_REF) + tls_kind = SYMBOL_REF_TLS_MODEL (sym); + } + else if (GET_CODE (addr) == SYMBOL_REF) + tls_kind = SYMBOL_REF_TLS_MODEL (addr); + + return tls_kind; +} + +/* We'll allow lo_sum's in addresses in our legitimate addresses + so that combine would take care of combining addresses where + necessary, but for generation purposes, we'll generate the address + as : + RTL Absolute + tmp = hi (symbol_ref); adrp x1, foo + dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo + nop + + PIC TLS + adrp x1, :got:foo adrp tmp, :tlsgd:foo + ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo + bl __tls_get_addr + nop + + Load TLS symbol, depending on TLS mechanism and TLS access model. + + Global Dynamic - Traditional TLS: + adrp tmp, :tlsgd:imm + add dest, tmp, #:tlsgd_lo12:imm + bl __tls_get_addr + + Global Dynamic - TLS Descriptors: + adrp dest, :tlsdesc:imm + ldr tmp, [dest, #:tlsdesc_lo12:imm] + add dest, dest, #:tlsdesc_lo12:imm + blr tmp + mrs tp, tpidr_el0 + add dest, dest, tp + + Initial Exec: + mrs tp, tpidr_el0 + adrp tmp, :gottprel:imm + ldr dest, [tmp, #:gottprel_lo12:imm] + add dest, dest, tp + + Local Exec: + mrs tp, tpidr_el0 + add t0, tp, #:tprel_hi12:imm + add t0, #:tprel_lo12_nc:imm +*/ + +static void +aarch64_load_symref_appropriately (rtx dest, rtx imm, + enum aarch64_symbol_type type) +{ + switch (type) + { + case SYMBOL_SMALL_ABSOLUTE: + { + /* In ILP32, the mode of dest can be either SImode or DImode. */ + rtx tmp_reg = dest; + enum machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Pmode || mode == ptr_mode); + + if (can_create_pseudo_p ()) + tmp_reg = gen_reg_rtx (mode); + + emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); + emit_insn (gen_add_losym (dest, tmp_reg, imm)); + return; + } + + case SYMBOL_TINY_ABSOLUTE: + emit_insn (gen_rtx_SET (Pmode, dest, imm)); + return; + + case SYMBOL_SMALL_GOT: + { + /* In ILP32, the mode of dest can be either SImode or DImode, + while the got entry is always of SImode size. The mode of + dest depends on how dest is used: if dest is assigned to a + pointer (e.g. in the memory), it has SImode; it may have + DImode if dest is dereferenced to access the memeory. + This is why we have to handle three different ldr_got_small + patterns here (two patterns for ILP32). */ + rtx tmp_reg = dest; + enum machine_mode mode = GET_MODE (dest); + + if (can_create_pseudo_p ()) + tmp_reg = gen_reg_rtx (mode); + + emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); + if (mode == ptr_mode) + { + if (mode == DImode) + emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm)); + else + emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm)); + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm)); + } + + return; + } + + case SYMBOL_SMALL_TLSGD: + { + rtx insns; + rtx result = gen_rtx_REG (Pmode, R0_REGNUM); + + start_sequence (); + emit_call_insn (gen_tlsgd_small (result, imm)); + insns = get_insns (); + end_sequence (); + + RTL_CONST_CALL_P (insns) = 1; + emit_libcall_block (insns, dest, result, imm); + return; + } + + case SYMBOL_SMALL_TLSDESC: + { + rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); + rtx tp; + + emit_insn (gen_tlsdesc_small (imm)); + tp = aarch64_load_tp (NULL); + emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + + case SYMBOL_SMALL_GOTTPREL: + { + rtx tmp_reg = gen_reg_rtx (Pmode); + rtx tp = aarch64_load_tp (NULL); + emit_insn (gen_tlsie_small (tmp_reg, imm)); + emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + + case SYMBOL_SMALL_TPREL: + { + rtx tp = aarch64_load_tp (NULL); + emit_insn (gen_tlsle_small (dest, tp, imm)); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } + + case SYMBOL_TINY_GOT: + emit_insn (gen_ldr_got_tiny (dest, imm)); + return; + + default: + gcc_unreachable (); + } +} + +/* Emit a move from SRC to DEST. Assume that the move expanders can + handle all moves if !can_create_pseudo_p (). The distinction is + important because, unlike emit_move_insn, the move expanders know + how to force Pmode objects into the constant pool even when the + constant pool address is not itself legitimate. */ +static rtx +aarch64_emit_move (rtx dest, rtx src) +{ + return (can_create_pseudo_p () + ? emit_move_insn (dest, src) + : emit_move_insn_1 (dest, src)); +} + +/* Split a 128-bit move operation into two 64-bit move operations, + taking care to handle partial overlap of register to register + copies. Special cases are needed when moving between GP regs and + FP regs. SRC can be a register, constant or memory; DST a register + or memory. If either operand is memory it must not have any side + effects. */ +void +aarch64_split_128bit_move (rtx dst, rtx src) +{ + rtx dst_lo, dst_hi; + rtx src_lo, src_hi; + + enum machine_mode mode = GET_MODE (dst); + + gcc_assert (mode == TImode || mode == TFmode); + gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); + gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); + + if (REG_P (dst) && REG_P (src)) + { + int src_regno = REGNO (src); + int dst_regno = REGNO (dst); + + /* Handle FP <-> GP regs. */ + if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno)) + { + src_lo = gen_lowpart (word_mode, src); + src_hi = gen_highpart (word_mode, src); + + if (mode == TImode) + { + emit_insn (gen_aarch64_movtilow_di (dst, src_lo)); + emit_insn (gen_aarch64_movtihigh_di (dst, src_hi)); + } + else + { + emit_insn (gen_aarch64_movtflow_di (dst, src_lo)); + emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi)); + } + return; + } + else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) + { + dst_lo = gen_lowpart (word_mode, dst); + dst_hi = gen_highpart (word_mode, dst); + + if (mode == TImode) + { + emit_insn (gen_aarch64_movdi_tilow (dst_lo, src)); + emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src)); + } + else + { + emit_insn (gen_aarch64_movdi_tflow (dst_lo, src)); + emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src)); + } + return; + } + } + + dst_lo = gen_lowpart (word_mode, dst); + dst_hi = gen_highpart (word_mode, dst); + src_lo = gen_lowpart (word_mode, src); + src_hi = gen_highpart_mode (word_mode, mode, src); + + /* At most one pairing may overlap. */ + if (reg_overlap_mentioned_p (dst_lo, src_hi)) + { + aarch64_emit_move (dst_hi, src_hi); + aarch64_emit_move (dst_lo, src_lo); + } + else + { + aarch64_emit_move (dst_lo, src_lo); + aarch64_emit_move (dst_hi, src_hi); + } +} + +bool +aarch64_split_128bit_move_p (rtx dst, rtx src) +{ + return (! REG_P (src) + || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); +} + +/* Split a complex SIMD combine. */ + +void +aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) +{ + enum machine_mode src_mode = GET_MODE (src1); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src1) && REG_P (src2)) + { + rtx (*gen) (rtx, rtx, rtx); + + switch (src_mode) + { + case V8QImode: + gen = gen_aarch64_simd_combinev8qi; + break; + case V4HImode: + gen = gen_aarch64_simd_combinev4hi; + break; + case V2SImode: + gen = gen_aarch64_simd_combinev2si; + break; + case V2SFmode: + gen = gen_aarch64_simd_combinev2sf; + break; + case DImode: + gen = gen_aarch64_simd_combinedi; + break; + case DFmode: + gen = gen_aarch64_simd_combinedf; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src1, src2)); + return; + } +} + +/* Split a complex SIMD move. */ + +void +aarch64_split_simd_move (rtx dst, rtx src) +{ + enum machine_mode src_mode = GET_MODE (src); + enum machine_mode dst_mode = GET_MODE (dst); + + gcc_assert (VECTOR_MODE_P (dst_mode)); + + if (REG_P (dst) && REG_P (src)) + { + rtx (*gen) (rtx, rtx); + + gcc_assert (VECTOR_MODE_P (src_mode)); + + switch (src_mode) + { + case V16QImode: + gen = gen_aarch64_split_simd_movv16qi; + break; + case V8HImode: + gen = gen_aarch64_split_simd_movv8hi; + break; + case V4SImode: + gen = gen_aarch64_split_simd_movv4si; + break; + case V2DImode: + gen = gen_aarch64_split_simd_movv2di; + break; + case V4SFmode: + gen = gen_aarch64_split_simd_movv4sf; + break; + case V2DFmode: + gen = gen_aarch64_split_simd_movv2df; + break; + default: + gcc_unreachable (); + } + + emit_insn (gen (dst, src)); + return; + } +} + +static rtx +aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value) +{ + if (can_create_pseudo_p ()) + return force_reg (mode, value); + else + { + x = aarch64_emit_move (x, value); + return x; + } +} + + +static rtx +aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) +{ + if (!aarch64_plus_immediate (GEN_INT (offset), mode)) + { + rtx high; + /* Load the full offset into a register. This + might be improvable in the future. */ + high = GEN_INT (offset); + offset = 0; + high = aarch64_force_temporary (mode, temp, high); + reg = aarch64_force_temporary (mode, temp, + gen_rtx_PLUS (mode, high, reg)); + } + return plus_constant (mode, reg, offset); +} + +void +aarch64_expand_mov_immediate (rtx dest, rtx imm) +{ + enum machine_mode mode = GET_MODE (dest); + unsigned HOST_WIDE_INT mask; + int i; + bool first; + unsigned HOST_WIDE_INT val; + bool subtargets; + rtx subtarget; + int one_match, zero_match; + + gcc_assert (mode == SImode || mode == DImode); + + /* Check on what type of symbol it is. */ + if (GET_CODE (imm) == SYMBOL_REF + || GET_CODE (imm) == LABEL_REF + || GET_CODE (imm) == CONST) + { + rtx mem, base, offset; + enum aarch64_symbol_type sty; + + /* If we have (const (plus symbol offset)), separate out the offset + before we start classifying the symbol. */ + split_const (imm, &base, &offset); + + sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR); + switch (sty) + { + case SYMBOL_FORCE_TO_MEM: + if (offset != const0_rtx + && targetm.cannot_force_const_mem (mode, imm)) + { + gcc_assert (can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + mem = force_const_mem (ptr_mode, imm); + gcc_assert (mem); + if (mode != ptr_mode) + mem = gen_rtx_ZERO_EXTEND (mode, mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + return; + + case SYMBOL_SMALL_TLSGD: + case SYMBOL_SMALL_TLSDESC: + case SYMBOL_SMALL_GOTTPREL: + case SYMBOL_SMALL_GOT: + case SYMBOL_TINY_GOT: + if (offset != const0_rtx) + { + gcc_assert(can_create_pseudo_p ()); + base = aarch64_force_temporary (mode, dest, base); + base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); + aarch64_emit_move (dest, base); + return; + } + /* FALLTHRU */ + + case SYMBOL_SMALL_TPREL: + case SYMBOL_SMALL_ABSOLUTE: + case SYMBOL_TINY_ABSOLUTE: + aarch64_load_symref_appropriately (dest, imm, sty); + return; + + default: + gcc_unreachable (); + } + } + + if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); + return; + } + + if (!CONST_INT_P (imm)) + { + if (GET_CODE (imm) == HIGH) + emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); + else + { + rtx mem = force_const_mem (mode, imm); + gcc_assert (mem); + emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); + } + + return; + } + + if (mode == SImode) + { + /* We know we can't do this in 1 insn, and we must be able to do it + in two; so don't mess around looking for sequences that don't buy + us anything. */ + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff))); + emit_insn (gen_insv_immsi (dest, GEN_INT (16), + GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); + return; + } + + /* Remaining cases are all for DImode. */ + + val = INTVAL (imm); + subtargets = optimize && can_create_pseudo_p (); + + one_match = 0; + zero_match = 0; + mask = 0xffff; + + for (i = 0; i < 64; i += 16, mask <<= 16) + { + if ((val & mask) == 0) + zero_match++; + else if ((val & mask) == mask) + one_match++; + } + + if (one_match == 2) + { + mask = 0xffff; + for (i = 0; i < 64; i += 16, mask <<= 16) + { + if ((val & mask) != mask) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + return; + } + } + gcc_unreachable (); + } + + if (zero_match == 2) + goto simple_sequence; + + mask = 0x0ffff0000UL; + for (i = 16; i < 64; i += 16, mask <<= 16) + { + HOST_WIDE_INT comp = mask & ~(mask - 1); + + if (aarch64_uimm12_shift (val - (val & mask))) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + + emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val & mask)))); + return; + } + else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val + comp) & mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val + comp) & mask)))); + return; + } + else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT ((val - comp) | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - ((val - comp) | ~mask)))); + return; + } + else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (val | ~mask))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - (val | ~mask)))); + return; + } + } + + /* See if we can do it by arithmetically combining two + immediates. */ + for (i = 0; i < AARCH64_NUM_BITMASKS; i++) + { + int j; + mask = 0xffff; + + if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) + || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) + { + subtarget = subtargets ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_adddi3 (dest, subtarget, + GEN_INT (val - aarch64_bitmasks[i]))); + return; + } + + for (j = 0; j < 64; j += 16, mask <<= 16) + { + if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_insv_immdi (dest, GEN_INT (j), + GEN_INT ((val >> j) & 0xffff))); + return; + } + } + } + + /* See if we can do it by logically combining two immediates. */ + for (i = 0; i < AARCH64_NUM_BITMASKS; i++) + { + if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) + { + int j; + + for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) + if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) + { + subtarget = subtargets ? gen_reg_rtx (mode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + emit_insn (gen_iordi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + return; + } + } + else if ((val & aarch64_bitmasks[i]) == val) + { + int j; + + for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) + if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) + { + + subtarget = subtargets ? gen_reg_rtx (mode) : dest; + emit_insn (gen_rtx_SET (VOIDmode, subtarget, + GEN_INT (aarch64_bitmasks[j]))); + emit_insn (gen_anddi3 (dest, subtarget, + GEN_INT (aarch64_bitmasks[i]))); + return; + } + } + } + + simple_sequence: + first = true; + mask = 0xffff; + for (i = 0; i < 64; i += 16, mask <<= 16) + { + if ((val & mask) != 0) + { + if (first) + { + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val & mask))); + first = false; + } + else + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); + } + } +} + +static bool +aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + /* Indirect calls are not currently supported. */ + if (decl == NULL) + return false; + + /* Cannot tail-call to long-calls, since these are outside of the + range of a branch instruction (we could handle this if we added + support for indirect tail-calls. */ + if (aarch64_decl_is_long_call_p (decl)) + return false; + + return true; +} + +/* Implement TARGET_PASS_BY_REFERENCE. */ + +static bool +aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, + enum machine_mode mode, + const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size; + enum machine_mode dummymode; + int nregs; + + /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ + size = (mode == BLKmode && type) + ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + + /* Aggregates are passed by reference based on their size. */ + if (type && AGGREGATE_TYPE_P (type)) + { + size = int_size_in_bytes (type); + } + + /* Variable sized arguments are always returned by reference. */ + if (size < 0) + return true; + + /* Can this be a candidate to be passed in fp/simd register(s)? */ + if (aarch64_vfp_is_call_or_return_candidate (mode, type, + &dummymode, &nregs, + NULL)) + return false; + + /* Arguments which are variable sized or larger than 2 registers are + passed by reference unless they are a homogenous floating point + aggregate. */ + return size > 2 * UNITS_PER_WORD; +} + +/* Return TRUE if VALTYPE is padded to its least significant bits. */ +static bool +aarch64_return_in_msb (const_tree valtype) +{ + enum machine_mode dummy_mode; + int dummy_int; + + /* Never happens in little-endian mode. */ + if (!BYTES_BIG_ENDIAN) + return false; + + /* Only composite types smaller than or equal to 16 bytes can + be potentially returned in registers. */ + if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype)) + || int_size_in_bytes (valtype) <= 0 + || int_size_in_bytes (valtype) > 16) + return false; + + /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate) + or an HVA (Homogeneous Short-Vector Aggregate); such a special composite + is always passed/returned in the least significant bits of fp/simd + register(s). */ + if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype, + &dummy_mode, &dummy_int, NULL)) + return false; + + return true; +} + +/* Implement TARGET_FUNCTION_VALUE. + Define how to find the value returned by a function. */ + +static rtx +aarch64_function_value (const_tree type, const_tree func, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode; + int unsignedp; + int count; + enum machine_mode ag_mode; + + mode = TYPE_MODE (type); + if (INTEGRAL_TYPE_P (type)) + mode = promote_function_mode (type, mode, &unsignedp, func, 1); + + if (aarch64_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + + if (size % UNITS_PER_WORD != 0) + { + size += UNITS_PER_WORD - size % UNITS_PER_WORD; + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + } + + if (aarch64_vfp_is_call_or_return_candidate (mode, type, + &ag_mode, &count, NULL)) + { + if (!aarch64_composite_type_p (type, mode)) + { + gcc_assert (count == 1 && mode == ag_mode); + return gen_rtx_REG (mode, V0_REGNUM); + } + else + { + int i; + rtx par; + + par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); + for (i = 0; i < count; i++) + { + rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (ag_mode))); + XVECEXP (par, 0, i) = tmp; + } + return par; + } + } + else + return gen_rtx_REG (mode, R0_REGNUM); +} + +/* Implements TARGET_FUNCTION_VALUE_REGNO_P. + Return true if REGNO is the number of a hard register in which the values + of called function may come back. */ + +static bool +aarch64_function_value_regno_p (const unsigned int regno) +{ + /* Maximum of 16 bytes can be returned in the general registers. Examples + of 16-byte return values are: 128-bit integers and 16-byte small + structures (excluding homogeneous floating-point aggregates). */ + if (regno == R0_REGNUM || regno == R1_REGNUM) + return true; + + /* Up to four fp/simd registers can return a function value, e.g. a + homogeneous floating-point aggregate having four members. */ + if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS) + return !TARGET_GENERAL_REGS_ONLY; + + return false; +} + +/* Implement TARGET_RETURN_IN_MEMORY. + + If the type T of the result of a function is such that + void func (T arg) + would require that arg be passed as a value in a register (or set of + registers) according to the parameter passing rules, then the result + is returned in the same registers as would be used for such an + argument. */ + +static bool +aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size; + enum machine_mode ag_mode; + int count; + + if (!AGGREGATE_TYPE_P (type) + && TREE_CODE (type) != COMPLEX_TYPE + && TREE_CODE (type) != VECTOR_TYPE) + /* Simple scalar types always returned in registers. */ + return false; + + if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), + type, + &ag_mode, + &count, + NULL)) + return false; + + /* Types larger than 2 registers returned in memory. */ + size = int_size_in_bytes (type); + return (size < 0 || size > 2 * UNITS_PER_WORD); +} + +static bool +aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode, + const_tree type, int *nregs) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + return aarch64_vfp_is_call_or_return_candidate (mode, + type, + &pcum->aapcs_vfp_rmode, + nregs, + NULL); +} + +/* Given MODE and TYPE of a function argument, return the alignment in + bits. The idea is to suppress any stronger alignment requested by + the user and opt for the natural alignment (specified in AAPCS64 \S 4.1). + This is a helper function for local use only. */ + +static unsigned int +aarch64_function_arg_alignment (enum machine_mode mode, const_tree type) +{ + unsigned int alignment; + + if (type) + { + if (!integer_zerop (TYPE_SIZE (type))) + { + if (TYPE_MODE (type) == mode) + alignment = TYPE_ALIGN (type); + else + alignment = GET_MODE_ALIGNMENT (mode); + } + else + alignment = 0; + } + else + alignment = GET_MODE_ALIGNMENT (mode); + + return alignment; +} + +/* Layout a function argument according to the AAPCS64 rules. The rule + numbers refer to the rule numbers in the AAPCS64. */ + +static void +aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode, + const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + int ncrn, nvrn, nregs; + bool allocate_ncrn, allocate_nvrn; + + /* We need to do this once per argument. */ + if (pcum->aapcs_arg_processed) + return; + + pcum->aapcs_arg_processed = true; + + allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); + allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, + mode, + type, + &nregs); + + /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable. + The following code thus handles passing by SIMD/FP registers first. */ + + nvrn = pcum->aapcs_nvrn; + + /* C1 - C5 for floating point, homogenous floating point aggregates (HFA) + and homogenous short-vector aggregates (HVA). */ + if (allocate_nvrn) + { + if (nvrn + nregs <= NUM_FP_ARG_REGS) + { + pcum->aapcs_nextnvrn = nvrn + nregs; + if (!aarch64_composite_type_p (type, mode)) + { + gcc_assert (nregs == 1); + pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); + } + else + { + rtx par; + int i; + par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); + for (i = 0; i < nregs; i++) + { + rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, + V0_REGNUM + nvrn + i); + tmp = gen_rtx_EXPR_LIST + (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); + XVECEXP (par, 0, i) = tmp; + } + pcum->aapcs_reg = par; + } + return; + } + else + { + /* C.3 NSRN is set to 8. */ + pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS; + goto on_stack; + } + } + + ncrn = pcum->aapcs_ncrn; + nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) + + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + + /* C6 - C9. though the sign and zero extension semantics are + handled elsewhere. This is the case where the argument fits + entirely general registers. */ + if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS)) + { + unsigned int alignment = aarch64_function_arg_alignment (mode, type); + + gcc_assert (nregs == 0 || nregs == 1 || nregs == 2); + + /* C.8 if the argument has an alignment of 16 then the NGRN is + rounded up to the next even number. */ + if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2) + { + ++ncrn; + gcc_assert (ncrn + nregs <= NUM_ARG_REGS); + } + /* NREGS can be 0 when e.g. an empty structure is to be passed. + A reg is still generated for it, but the caller should be smart + enough not to use it. */ + if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT) + { + pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); + } + else + { + rtx par; + int i; + + par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); + for (i = 0; i < nregs; i++) + { + rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, + GEN_INT (i * UNITS_PER_WORD)); + XVECEXP (par, 0, i) = tmp; + } + pcum->aapcs_reg = par; + } + + pcum->aapcs_nextncrn = ncrn + nregs; + return; + } + + /* C.11 */ + pcum->aapcs_nextncrn = NUM_ARG_REGS; + + /* The argument is passed on stack; record the needed number of words for + this argument (we can re-use NREGS) and align the total size if + necessary. */ +on_stack: + pcum->aapcs_stack_words = nregs; + if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) + pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, + 16 / UNITS_PER_WORD) + 1; + return; +} + +/* Implement TARGET_FUNCTION_ARG. */ + +static rtx +aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); + + if (mode == VOIDmode) + return NULL_RTX; + + aarch64_layout_arg (pcum_v, mode, type, named); + return pcum->aapcs_reg; +} + +void +aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + const_tree fntype ATTRIBUTE_UNUSED, + rtx libname ATTRIBUTE_UNUSED, + const_tree fndecl ATTRIBUTE_UNUSED, + unsigned n_named ATTRIBUTE_UNUSED) +{ + pcum->aapcs_ncrn = 0; + pcum->aapcs_nvrn = 0; + pcum->aapcs_nextncrn = 0; + pcum->aapcs_nextnvrn = 0; + pcum->pcs_variant = ARM_PCS_AAPCS64; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_arg_processed = false; + pcum->aapcs_stack_words = 0; + pcum->aapcs_stack_size = 0; + + return; +} + +static void +aarch64_function_arg_advance (cumulative_args_t pcum_v, + enum machine_mode mode, + const_tree type, + bool named) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + if (pcum->pcs_variant == ARM_PCS_AAPCS64) + { + aarch64_layout_arg (pcum_v, mode, type, named); + gcc_assert ((pcum->aapcs_reg != NULL_RTX) + != (pcum->aapcs_stack_words != 0)); + pcum->aapcs_arg_processed = false; + pcum->aapcs_ncrn = pcum->aapcs_nextncrn; + pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; + pcum->aapcs_stack_size += pcum->aapcs_stack_words; + pcum->aapcs_stack_words = 0; + pcum->aapcs_reg = NULL_RTX; + } +} + +bool +aarch64_function_arg_regno_p (unsigned regno) +{ + return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) + || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS)); +} + +/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least + PARM_BOUNDARY bits of alignment, but will be given anything up + to STACK_BOUNDARY bits if the type requires it. This makes sure + that both before and after the layout of each argument, the Next + Stacked Argument Address (NSAA) will have a minimum alignment of + 8 bytes. */ + +static unsigned int +aarch64_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + unsigned int alignment = aarch64_function_arg_alignment (mode, type); + + if (alignment < PARM_BOUNDARY) + alignment = PARM_BOUNDARY; + if (alignment > STACK_BOUNDARY) + alignment = STACK_BOUNDARY; + return alignment; +} + +/* For use by FUNCTION_ARG_PADDING (MODE, TYPE). + + Return true if an argument passed on the stack should be padded upwards, + i.e. if the least-significant byte of the stack slot has useful data. + + Small aggregate types are placed in the lowest memory address. + + The related parameter passing rules are B.4, C.3, C.5 and C.14. */ + +bool +aarch64_pad_arg_upward (enum machine_mode mode, const_tree type) +{ + /* On little-endian targets, the least significant byte of every stack + argument is passed at the lowest byte address of the stack slot. */ + if (!BYTES_BIG_ENDIAN) + return true; + + /* Otherwise, integral, floating-point and pointer types are padded downward: + the least significant byte of a stack argument is passed at the highest + byte address of the stack slot. */ + if (type + ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type) + || POINTER_TYPE_P (type)) + : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode))) + return false; + + /* Everything else padded upward, i.e. data in first byte of stack slot. */ + return true; +} + +/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). + + It specifies padding for the last (may also be the only) + element of a block move between registers and memory. If + assuming the block is in the memory, padding upward means that + the last element is padded after its highest significant byte, + while in downward padding, the last element is padded at the + its least significant byte side. + + Small aggregates and small complex types are always padded + upwards. + + We don't need to worry about homogeneous floating-point or + short-vector aggregates; their move is not affected by the + padding direction determined here. Regardless of endianness, + each element of such an aggregate is put in the least + significant bits of a fp/simd register. + + Return !BYTES_BIG_ENDIAN if the least significant byte of the + register has useful data, and return the opposite if the most + significant byte does. */ + +bool +aarch64_pad_reg_upward (enum machine_mode mode, const_tree type, + bool first ATTRIBUTE_UNUSED) +{ + + /* Small composite types are always padded upward. */ + if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) + { + HOST_WIDE_INT size = (type ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)); + if (size < 2 * UNITS_PER_WORD) + return true; + } + + /* Otherwise, use the default padding. */ + return !BYTES_BIG_ENDIAN; +} + +static enum machine_mode +aarch64_libgcc_cmp_return_mode (void) +{ + return SImode; +} + +static bool +aarch64_frame_pointer_required (void) +{ + /* If the function contains dynamic stack allocations, we need to + use the frame pointer to access the static parts of the frame. */ + if (cfun->calls_alloca) + return true; + + /* We may have turned flag_omit_frame_pointer on in order to have this + function called; if we did, we also set the 'faked_omit_frame_pointer' flag + and we'll check it here. + If we really did set flag_omit_frame_pointer normally, then we return false + (no frame pointer required) in all cases. */ + + if (flag_omit_frame_pointer && !faked_omit_frame_pointer) + return false; + else if (flag_omit_leaf_frame_pointer) + return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM); + return true; +} + +/* Mark the registers that need to be saved by the callee and calculate + the size of the callee-saved registers area and frame record (both FP + and LR may be omitted). */ +static void +aarch64_layout_frame (void) +{ + HOST_WIDE_INT offset = 0; + int regno; + + if (reload_completed && cfun->machine->frame.laid_out) + return; + + cfun->machine->frame.fp_lr_offset = 0; + + /* First mark all the registers that really need to be saved... */ + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + cfun->machine->frame.reg_offset[regno] = -1; + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + cfun->machine->frame.reg_offset[regno] = -1; + + /* ... that includes the eh data registers (if needed)... */ + if (crtl->calls_eh_return) + for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) + cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0; + + /* ... and any callee saved register that dataflow says is live. */ + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + if (df_regs_ever_live_p (regno) + && !call_used_regs[regno]) + cfun->machine->frame.reg_offset[regno] = 0; + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (df_regs_ever_live_p (regno) + && !call_used_regs[regno]) + cfun->machine->frame.reg_offset[regno] = 0; + + if (frame_pointer_needed) + { + cfun->machine->frame.reg_offset[R30_REGNUM] = 0; + cfun->machine->frame.reg_offset[R29_REGNUM] = 0; + cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; + } + + /* Now assign stack slots for them. */ + for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++) + if (cfun->machine->frame.reg_offset[regno] != -1) + { + cfun->machine->frame.reg_offset[regno] = offset; + offset += UNITS_PER_WORD; + } + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (cfun->machine->frame.reg_offset[regno] != -1) + { + cfun->machine->frame.reg_offset[regno] = offset; + offset += UNITS_PER_WORD; + } + + if (frame_pointer_needed) + { + cfun->machine->frame.reg_offset[R29_REGNUM] = offset; + offset += UNITS_PER_WORD; + cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD; + } + + if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) + { + cfun->machine->frame.reg_offset[R30_REGNUM] = offset; + offset += UNITS_PER_WORD; + cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD; + } + + cfun->machine->frame.padding0 = + (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); + offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); + + cfun->machine->frame.saved_regs_size = offset; + cfun->machine->frame.laid_out = true; +} + +/* Make the last instruction frame-related and note that it performs + the operation described by FRAME_PATTERN. */ + +static void +aarch64_set_frame_expr (rtx frame_pattern) +{ + rtx insn; + + insn = get_last_insn (); + RTX_FRAME_RELATED_P (insn) = 1; + RTX_FRAME_RELATED_P (frame_pattern) = 1; + REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, + frame_pattern, + REG_NOTES (insn)); +} + +static bool +aarch64_register_saved_on_entry (int regno) +{ + return cfun->machine->frame.reg_offset[regno] != -1; +} + + +static void +aarch64_save_or_restore_fprs (int start_offset, int increment, + bool restore, rtx base_rtx) + +{ + unsigned regno; + unsigned regno2; + rtx insn; + rtx (*gen_mem_ref)(enum machine_mode, rtx) + = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + { + if (aarch64_register_saved_on_entry (regno)) + { + rtx mem; + mem = gen_mem_ref (DFmode, + plus_constant (Pmode, + base_rtx, + start_offset)); + + for (regno2 = regno + 1; + regno2 <= V31_REGNUM + && !aarch64_register_saved_on_entry (regno2); + regno2++) + { + /* Empty loop. */ + } + if (regno2 <= V31_REGNUM && + aarch64_register_saved_on_entry (regno2)) + { + rtx mem2; + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (DFmode, + plus_constant + (Pmode, + base_rtx, + start_offset + increment)); + if (restore == false) + { + insn = emit_insn + ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno), + mem2, gen_rtx_REG (DFmode, regno2))); + + } + else + { + insn = emit_insn + ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem, + gen_rtx_REG (DFmode, regno2), mem2)); + + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (DFmode, regno)); + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (DFmode, regno2)); + } + + /* The first part of a frame-related parallel insn + is always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } + else + { + if (restore == false) + insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno)); + else + { + insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (DImode, regno)); + } + start_offset += increment; + } + RTX_FRAME_RELATED_P (insn) = 1; + } + } + +} + + +/* offset from the stack pointer of where the saves and + restore's have to happen. */ +static void +aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, + bool restore) +{ + rtx insn; + rtx base_rtx = stack_pointer_rtx; + HOST_WIDE_INT start_offset = offset; + HOST_WIDE_INT increment = UNITS_PER_WORD; + rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM; + unsigned regno; + unsigned regno2; + + for (regno = R0_REGNUM; regno <= limit; regno++) + { + if (aarch64_register_saved_on_entry (regno)) + { + rtx mem; + mem = gen_mem_ref (Pmode, + plus_constant (Pmode, + base_rtx, + start_offset)); + + for (regno2 = regno + 1; + regno2 <= limit + && !aarch64_register_saved_on_entry (regno2); + regno2++) + { + /* Empty loop. */ + } + if (regno2 <= limit && + aarch64_register_saved_on_entry (regno2)) + { + rtx mem2; + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (Pmode, + plus_constant + (Pmode, + base_rtx, + start_offset + increment)); + if (restore == false) + { + insn = emit_insn + ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno), + mem2, gen_rtx_REG (DImode, regno2))); + + } + else + { + insn = emit_insn + ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem, + gen_rtx_REG (DImode, regno2), mem2)); + + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); + } + + /* The first part of a frame-related parallel insn + is always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, + 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } + else + { + if (restore == false) + insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno)); + else + { + insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem); + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno)); + } + start_offset += increment; + } + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); + +} + +/* AArch64 stack frames generated by this compiler look like: + + +-------------------------------+ + | | + | incoming stack arguments | + | | + +-------------------------------+ <-- arg_pointer_rtx + | | + | callee-allocated save area | + | for register varargs | + | | + +-------------------------------+ <-- frame_pointer_rtx + | | + | local variables | + | | + +-------------------------------+ + | padding0 | \ + +-------------------------------+ | + | | | + | | | + | callee-saved registers | | frame.saved_regs_size + | | | + +-------------------------------+ | + | LR' | | + +-------------------------------+ | + | FP' | / + P +-------------------------------+ <-- hard_frame_pointer_rtx + | dynamic allocation | + +-------------------------------+ + | | + | outgoing stack arguments | + | | + +-------------------------------+ <-- stack_pointer_rtx + + Dynamic stack allocations such as alloca insert data at point P. + They decrease stack_pointer_rtx but leave frame_pointer_rtx and + hard_frame_pointer_rtx unchanged. */ + +/* Generate the prologue instructions for entry into a function. + Establish the stack frame by decreasing the stack pointer with a + properly calculated size and, if necessary, create a frame record + filled with the values of LR and previous frame pointer. The + current FP is also set up if it is in use. */ + +void +aarch64_expand_prologue (void) +{ + /* sub sp, sp, # + stp {fp, lr}, [sp, # - 16] + add fp, sp, # - hardfp_offset + stp {cs_reg}, [fp, #-16] etc. + + sub sp, sp, + */ + HOST_WIDE_INT original_frame_size; /* local variables + vararg save */ + HOST_WIDE_INT frame_size, offset; + HOST_WIDE_INT fp_offset; /* FP offset from SP */ + rtx insn; + + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; + gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg) + && (cfun->stdarg || !cfun->machine->saved_varargs_size)); + frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size + + crtl->outgoing_args_size); + offset = frame_size = AARCH64_ROUND_UP (frame_size, + STACK_BOUNDARY / BITS_PER_UNIT); + + if (flag_stack_usage_info) + current_function_static_stack_size = frame_size; + + fp_offset = (offset + - original_frame_size + - cfun->machine->frame.saved_regs_size); + + /* Store pairs and load pairs have a range only -512 to 504. */ + if (offset >= 512) + { + /* When the frame has a large size, an initial decrease is done on + the stack pointer to jump over the callee-allocated save area for + register varargs, the local variable area and/or the callee-saved + register area. This will allow the pre-index write-back + store pair instructions to be used for setting up the stack frame + efficiently. */ + offset = original_frame_size + cfun->machine->frame.saved_regs_size; + if (offset >= 512) + offset = cfun->machine->frame.saved_regs_size; + + frame_size -= (offset + crtl->outgoing_args_size); + fp_offset = 0; + + if (frame_size >= 0x1000000) + { + rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); + emit_move_insn (op0, GEN_INT (-frame_size)); + emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); + aarch64_set_frame_expr (gen_rtx_SET + (Pmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -frame_size))); + } + else if (frame_size > 0) + { + if ((frame_size & 0xfff) != frame_size) + { + insn = emit_insn (gen_add2_insn + (stack_pointer_rtx, + GEN_INT (-(frame_size + & ~(HOST_WIDE_INT)0xfff)))); + RTX_FRAME_RELATED_P (insn) = 1; + } + if ((frame_size & 0xfff) != 0) + { + insn = emit_insn (gen_add2_insn + (stack_pointer_rtx, + GEN_INT (-(frame_size + & (HOST_WIDE_INT)0xfff)))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + else + frame_size = -1; + + if (offset > 0) + { + /* Save the frame pointer and lr if the frame pointer is needed + first. Make the frame pointer point to the location of the + old frame pointer on the stack. */ + if (frame_pointer_needed) + { + rtx mem_fp, mem_lr; + + if (fp_offset) + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (-offset))); + RTX_FRAME_RELATED_P (insn) = 1; + aarch64_set_frame_expr (gen_rtx_SET + (Pmode, stack_pointer_rtx, + gen_rtx_MINUS (Pmode, + stack_pointer_rtx, + GEN_INT (offset)))); + mem_fp = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + fp_offset)); + mem_lr = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + fp_offset + + UNITS_PER_WORD)); + insn = emit_insn (gen_store_pairdi (mem_fp, + hard_frame_pointer_rtx, + mem_lr, + gen_rtx_REG (DImode, + LR_REGNUM))); + } + else + { + insn = emit_insn (gen_storewb_pairdi_di + (stack_pointer_rtx, stack_pointer_rtx, + hard_frame_pointer_rtx, + gen_rtx_REG (DImode, LR_REGNUM), + GEN_INT (-offset), + GEN_INT (GET_MODE_SIZE (DImode) - offset))); + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; + } + + /* The first part of a frame-related parallel insn is always + assumed to be relevant to the frame calculations; + subsequent parts, are only frame-related if explicitly + marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + RTX_FRAME_RELATED_P (insn) = 1; + + /* Set up frame pointer to point to the location of the + previous frame pointer on the stack. */ + insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, + stack_pointer_rtx, + GEN_INT (fp_offset))); + aarch64_set_frame_expr (gen_rtx_SET + (Pmode, hard_frame_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + fp_offset))); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_stack_tie (stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + else + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (-offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + aarch64_save_or_restore_callee_save_registers + (fp_offset + cfun->machine->frame.hardfp_offset, 0); + } + + /* when offset >= 512, + sub sp, sp, # */ + if (frame_size > -1) + { + if (crtl->outgoing_args_size > 0) + { + insn = emit_insn (gen_add2_insn + (stack_pointer_rtx, + GEN_INT (- crtl->outgoing_args_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +} + +/* Generate the epilogue instructions for returning from a function. */ +void +aarch64_expand_epilogue (bool for_sibcall) +{ + HOST_WIDE_INT original_frame_size, frame_size, offset; + HOST_WIDE_INT fp_offset; + rtx insn; + rtx cfa_reg; + + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; + frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size + + crtl->outgoing_args_size); + offset = frame_size = AARCH64_ROUND_UP (frame_size, + STACK_BOUNDARY / BITS_PER_UNIT); + + fp_offset = (offset + - original_frame_size + - cfun->machine->frame.saved_regs_size); + + cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx; + + /* Store pairs and load pairs have a range only -512 to 504. */ + if (offset >= 512) + { + offset = original_frame_size + cfun->machine->frame.saved_regs_size; + if (offset >= 512) + offset = cfun->machine->frame.saved_regs_size; + + frame_size -= (offset + crtl->outgoing_args_size); + fp_offset = 0; + if (!frame_pointer_needed && crtl->outgoing_args_size > 0) + { + insn = emit_insn (gen_add2_insn + (stack_pointer_rtx, + GEN_INT (crtl->outgoing_args_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + else + frame_size = -1; + + /* If there were outgoing arguments or we've done dynamic stack + allocation, then restore the stack pointer from the frame + pointer. This is at most one insn and more efficient than using + GCC's internal mechanism. */ + if (frame_pointer_needed + && (crtl->outgoing_args_size || cfun->calls_alloca)) + { + insn = emit_insn (gen_add3_insn (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- fp_offset))); + RTX_FRAME_RELATED_P (insn) = 1; + /* As SP is set to (FP - fp_offset), according to the rules in + dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated + from the value of SP from now on. */ + cfa_reg = stack_pointer_rtx; + } + + aarch64_save_or_restore_callee_save_registers + (fp_offset + cfun->machine->frame.hardfp_offset, 1); + + /* Restore the frame pointer and lr if the frame pointer is needed. */ + if (offset > 0) + { + if (frame_pointer_needed) + { + rtx mem_fp, mem_lr; + + if (fp_offset) + { + mem_fp = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + fp_offset)); + mem_lr = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + fp_offset + + UNITS_PER_WORD)); + insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx, + mem_fp, + gen_rtx_REG (DImode, + LR_REGNUM), + mem_lr)); + } + else + { + insn = emit_insn (gen_loadwb_pairdi_di + (stack_pointer_rtx, + stack_pointer_rtx, + hard_frame_pointer_rtx, + gen_rtx_REG (DImode, LR_REGNUM), + GEN_INT (offset), + GEN_INT (GET_MODE_SIZE (DImode) + offset))); + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + (gen_rtx_SET (Pmode, stack_pointer_rtx, + plus_constant (Pmode, cfa_reg, + offset)))); + } + + /* The first part of a frame-related parallel insn + is always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); + add_reg_note (insn, REG_CFA_RESTORE, + gen_rtx_REG (DImode, LR_REGNUM)); + + if (fp_offset) + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + else + { + insn = emit_insn (gen_add2_insn (stack_pointer_rtx, + GEN_INT (offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + /* Stack adjustment for exception handler. */ + if (crtl->calls_eh_return) + { + /* We need to unwind the stack by the offset computed by + EH_RETURN_STACKADJ_RTX. However, at this point the CFA is + based on SP. Ideally we would update the SP and define the + CFA along the lines of: + + SP = SP + EH_RETURN_STACKADJ_RTX + (regnote CFA = SP - EH_RETURN_STACKADJ_RTX) + + However the dwarf emitter only understands a constant + register offset. + + The solution chosen here is to use the otherwise unused IP0 + as a temporary register to hold the current SP value. The + CFA is described using IP0 then SP is modified. */ + + rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM); + + insn = emit_move_insn (ip0, stack_pointer_rtx); + add_reg_note (insn, REG_CFA_DEF_CFA, ip0); + RTX_FRAME_RELATED_P (insn) = 1; + + emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); + + /* Ensure the assignment to IP0 does not get optimized away. */ + emit_use (ip0); + } + + if (frame_size > -1) + { + if (frame_size >= 0x1000000) + { + rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); + emit_move_insn (op0, GEN_INT (frame_size)); + emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); + aarch64_set_frame_expr (gen_rtx_SET + (Pmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + frame_size))); + } + else if (frame_size > 0) + { + if ((frame_size & 0xfff) != 0) + { + insn = emit_insn (gen_add2_insn + (stack_pointer_rtx, + GEN_INT ((frame_size + & (HOST_WIDE_INT) 0xfff)))); + RTX_FRAME_RELATED_P (insn) = 1; + } + if ((frame_size & 0xfff) != frame_size) + { + insn = emit_insn (gen_add2_insn + (stack_pointer_rtx, + GEN_INT ((frame_size + & ~ (HOST_WIDE_INT) 0xfff)))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset))); + } + + emit_use (gen_rtx_REG (DImode, LR_REGNUM)); + if (!for_sibcall) + emit_jump_insn (ret_rtx); +} + +/* Return the place to copy the exception unwinding return address to. + This will probably be a stack slot, but could (in theory be the + return register). */ +rtx +aarch64_final_eh_return_addr (void) +{ + HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset; + aarch64_layout_frame (); + original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size; + frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size + + crtl->outgoing_args_size); + offset = frame_size = AARCH64_ROUND_UP (frame_size, + STACK_BOUNDARY / BITS_PER_UNIT); + fp_offset = offset + - original_frame_size + - cfun->machine->frame.saved_regs_size; + + if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0) + return gen_rtx_REG (DImode, LR_REGNUM); + + /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can + result in a store to save LR introduced by builtin_eh_return () being + incorrectly deleted because the alias is not detected. + So in the calculation of the address to copy the exception unwinding + return address to, we note 2 cases. + If FP is needed and the fp_offset is 0, it means that SP = FP and hence + we return a SP-relative location since all the addresses are SP-relative + in this case. This prevents the store from being optimized away. + If the fp_offset is not 0, then the addresses will be FP-relative and + therefore we return a FP-relative location. */ + + if (frame_pointer_needed) + { + if (fp_offset) + return gen_frame_mem (DImode, + plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD)); + else + return gen_frame_mem (DImode, + plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD)); + } + + /* If FP is not needed, we calculate the location of LR, which would be + at the top of the saved registers block. */ + + return gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + fp_offset + + cfun->machine->frame.saved_regs_size + - 2 * UNITS_PER_WORD)); +} + +/* Output code to build up a constant in a register. */ +static void +aarch64_build_constant (int regnum, HOST_WIDE_INT val) +{ + if (aarch64_bitmask_imm (val, DImode)) + emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); + else + { + int i; + int ncount = 0; + int zcount = 0; + HOST_WIDE_INT valp = val >> 16; + HOST_WIDE_INT valm; + HOST_WIDE_INT tval; + + for (i = 16; i < 64; i += 16) + { + valm = (valp & 0xffff); + + if (valm != 0) + ++ zcount; + + if (valm != 0xffff) + ++ ncount; + + valp >>= 16; + } + + /* zcount contains the number of additional MOVK instructions + required if the constant is built up with an initial MOVZ instruction, + while ncount is the number of MOVK instructions required if starting + with a MOVN instruction. Choose the sequence that yields the fewest + number of instructions, preferring MOVZ instructions when they are both + the same. */ + if (ncount < zcount) + { + emit_move_insn (gen_rtx_REG (Pmode, regnum), + GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); + tval = 0xffff; + } + else + { + emit_move_insn (gen_rtx_REG (Pmode, regnum), + GEN_INT (val & 0xffff)); + tval = 0; + } + + val >>= 16; + + for (i = 16; i < 64; i += 16) + { + if ((val & 0xffff) != tval) + emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), + GEN_INT (i), GEN_INT (val & 0xffff))); + val >>= 16; + } + } +} + +static void +aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta) +{ + HOST_WIDE_INT mdelta = delta; + rtx this_rtx = gen_rtx_REG (Pmode, regnum); + rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); + + if (mdelta < 0) + mdelta = -mdelta; + + if (mdelta >= 4096 * 4096) + { + aarch64_build_constant (scratchreg, delta); + emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); + } + else if (mdelta > 0) + { + if (mdelta >= 4096) + { + emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096))); + rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12)); + if (delta < 0) + emit_insn (gen_rtx_SET (Pmode, this_rtx, + gen_rtx_MINUS (Pmode, this_rtx, shift))); + else + emit_insn (gen_rtx_SET (Pmode, this_rtx, + gen_rtx_PLUS (Pmode, this_rtx, shift))); + } + if (mdelta % 4096 != 0) + { + scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096)); + emit_insn (gen_rtx_SET (Pmode, this_rtx, + gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx))); + } + } +} + +/* Output code to add DELTA to the first argument, and then jump + to FUNCTION. Used for C++ multiple inheritance. */ +static void +aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function) +{ + /* The this pointer is always in x0. Note that this differs from + Arm where the this pointer maybe bumped to r1 if r0 is required + to return a pointer to an aggregate. On AArch64 a result value + pointer will be in x8. */ + int this_regno = R0_REGNUM; + rtx this_rtx, temp0, temp1, addr, insn, funexp; + + reload_completed = 1; + emit_note (NOTE_INSN_PROLOGUE_END); + + if (vcall_offset == 0) + aarch64_add_constant (this_regno, IP1_REGNUM, delta); + else + { + gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); + + this_rtx = gen_rtx_REG (Pmode, this_regno); + temp0 = gen_rtx_REG (Pmode, IP0_REGNUM); + temp1 = gen_rtx_REG (Pmode, IP1_REGNUM); + + addr = this_rtx; + if (delta != 0) + { + if (delta >= -256 && delta < 256) + addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx, + plus_constant (Pmode, this_rtx, delta)); + else + aarch64_add_constant (this_regno, IP1_REGNUM, delta); + } + + if (Pmode == ptr_mode) + aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr)); + else + aarch64_emit_move (temp0, + gen_rtx_ZERO_EXTEND (Pmode, + gen_rtx_MEM (ptr_mode, addr))); + + if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES) + addr = plus_constant (Pmode, temp0, vcall_offset); + else + { + aarch64_build_constant (IP1_REGNUM, vcall_offset); + addr = gen_rtx_PLUS (Pmode, temp0, temp1); + } + + if (Pmode == ptr_mode) + aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr)); + else + aarch64_emit_move (temp1, + gen_rtx_SIGN_EXTEND (Pmode, + gen_rtx_MEM (ptr_mode, addr))); + + emit_insn (gen_add2_insn (this_rtx, temp1)); + } + + /* Generate a tail call to the target function. */ + if (!TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); + SIBLING_CALL_P (insn) = 1; + + insn = get_insns (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + + /* Stop pretending to be a post-reload pass. */ + reload_completed = 0; +} + +static int +aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*x) == SYMBOL_REF) + return SYMBOL_REF_TLS_MODEL (*x) != 0; + + /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are + TLS offsets, not real symbol references. */ + if (GET_CODE (*x) == UNSPEC + && XINT (*x, 1) == UNSPEC_TLS) + return -1; + + return 0; +} + +static bool +aarch64_tls_referenced_p (rtx x) +{ + if (!TARGET_HAVE_TLS) + return false; + + return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL); +} + + +static int +aarch64_bitmasks_cmp (const void *i1, const void *i2) +{ + const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1; + const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2; + + if (*imm1 < *imm2) + return -1; + if (*imm1 > *imm2) + return +1; + return 0; +} + + +static void +aarch64_build_bitmask_table (void) +{ + unsigned HOST_WIDE_INT mask, imm; + unsigned int log_e, e, s, r; + unsigned int nimms = 0; + + for (log_e = 1; log_e <= 6; log_e++) + { + e = 1 << log_e; + if (e == 64) + mask = ~(HOST_WIDE_INT) 0; + else + mask = ((HOST_WIDE_INT) 1 << e) - 1; + for (s = 1; s < e; s++) + { + for (r = 0; r < e; r++) + { + /* set s consecutive bits to 1 (s < 64) */ + imm = ((unsigned HOST_WIDE_INT)1 << s) - 1; + /* rotate right by r */ + if (r != 0) + imm = ((imm >> r) | (imm << (e - r))) & mask; + /* replicate the constant depending on SIMD size */ + switch (log_e) { + case 1: imm |= (imm << 2); + case 2: imm |= (imm << 4); + case 3: imm |= (imm << 8); + case 4: imm |= (imm << 16); + case 5: imm |= (imm << 32); + case 6: + break; + default: + gcc_unreachable (); + } + gcc_assert (nimms < AARCH64_NUM_BITMASKS); + aarch64_bitmasks[nimms++] = imm; + } + } + } + + gcc_assert (nimms == AARCH64_NUM_BITMASKS); + qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]), + aarch64_bitmasks_cmp); +} + + +/* Return true if val can be encoded as a 12-bit unsigned immediate with + a left shift of 0 or 12 bits. */ +bool +aarch64_uimm12_shift (HOST_WIDE_INT val) +{ + return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val + || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val + ); +} + + +/* Return true if val is an immediate that can be loaded into a + register by a MOVZ instruction. */ +static bool +aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode) +{ + if (GET_MODE_SIZE (mode) > 4) + { + if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val + || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) + return 1; + } + else + { + /* Ignore sign extension. */ + val &= (HOST_WIDE_INT) 0xffffffff; + } + return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val + || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); +} + + +/* Return true if val is a valid bitmask immediate. */ +bool +aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode) +{ + if (GET_MODE_SIZE (mode) < 8) + { + /* Replicate bit pattern. */ + val &= (HOST_WIDE_INT) 0xffffffff; + val |= val << 32; + } + return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS, + sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL; +} + + +/* Return true if val is an immediate that can be loaded into a + register in a single instruction. */ +bool +aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode) +{ + if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode)) + return 1; + return aarch64_bitmask_imm (val, mode); +} + +static bool +aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + rtx base, offset; + + if (GET_CODE (x) == HIGH) + return true; + + split_const (x, &base, &offset); + if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) + { + if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) + != SYMBOL_FORCE_TO_MEM) + return true; + else + /* Avoid generating a 64-bit relocation in ILP32; leave + to aarch64_expand_mov_immediate to handle it properly. */ + return mode != ptr_mode; + } + + return aarch64_tls_referenced_p (x); +} + +/* Return true if register REGNO is a valid index register. + STRICT_P is true if REG_OK_STRICT is in effect. */ + +bool +aarch64_regno_ok_for_index_p (int regno, bool strict_p) +{ + if (!HARD_REGISTER_NUM_P (regno)) + { + if (!strict_p) + return true; + + if (!reg_renumber) + return false; + + regno = reg_renumber[regno]; + } + return GP_REGNUM_P (regno); +} + +/* Return true if register REGNO is a valid base register for mode MODE. + STRICT_P is true if REG_OK_STRICT is in effect. */ + +bool +aarch64_regno_ok_for_base_p (int regno, bool strict_p) +{ + if (!HARD_REGISTER_NUM_P (regno)) + { + if (!strict_p) + return true; + + if (!reg_renumber) + return false; + + regno = reg_renumber[regno]; + } + + /* The fake registers will be eliminated to either the stack or + hard frame pointer, both of which are usually valid base registers. + Reload deals with the cases where the eliminated form isn't valid. */ + return (GP_REGNUM_P (regno) + || regno == SP_REGNUM + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM); +} + +/* Return true if X is a valid base register for mode MODE. + STRICT_P is true if REG_OK_STRICT is in effect. */ + +static bool +aarch64_base_register_rtx_p (rtx x, bool strict_p) +{ + if (!strict_p && GET_CODE (x) == SUBREG) + x = SUBREG_REG (x); + + return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p)); +} + +/* Return true if address offset is a valid index. If it is, fill in INFO + appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */ + +static bool +aarch64_classify_index (struct aarch64_address_info *info, rtx x, + enum machine_mode mode, bool strict_p) +{ + enum aarch64_address_type type; + rtx index; + int shift; + + /* (reg:P) */ + if ((REG_P (x) || GET_CODE (x) == SUBREG) + && GET_MODE (x) == Pmode) + { + type = ADDRESS_REG_REG; + index = x; + shift = 0; + } + /* (sign_extend:DI (reg:SI)) */ + else if ((GET_CODE (x) == SIGN_EXTEND + || GET_CODE (x) == ZERO_EXTEND) + && GET_MODE (x) == DImode + && GET_MODE (XEXP (x, 0)) == SImode) + { + type = (GET_CODE (x) == SIGN_EXTEND) + ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; + index = XEXP (x, 0); + shift = 0; + } + /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */ + else if (GET_CODE (x) == MULT + && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) + && GET_MODE (XEXP (x, 0)) == DImode + && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode + && CONST_INT_P (XEXP (x, 1))) + { + type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; + index = XEXP (XEXP (x, 0), 0); + shift = exact_log2 (INTVAL (XEXP (x, 1))); + } + /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */ + else if (GET_CODE (x) == ASHIFT + && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) + && GET_MODE (XEXP (x, 0)) == DImode + && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode + && CONST_INT_P (XEXP (x, 1))) + { + type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; + index = XEXP (XEXP (x, 0), 0); + shift = INTVAL (XEXP (x, 1)); + } + /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */ + else if ((GET_CODE (x) == SIGN_EXTRACT + || GET_CODE (x) == ZERO_EXTRACT) + && GET_MODE (x) == DImode + && GET_CODE (XEXP (x, 0)) == MULT + && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + type = (GET_CODE (x) == SIGN_EXTRACT) + ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; + index = XEXP (XEXP (x, 0), 0); + shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); + if (INTVAL (XEXP (x, 1)) != 32 + shift + || INTVAL (XEXP (x, 2)) != 0) + shift = -1; + } + /* (and:DI (mult:DI (reg:DI) (const_int scale)) + (const_int 0xffffffff< 0 && shift <= 3 + && (1 << shift) == GET_MODE_SIZE (mode))) + && REG_P (index) + && aarch64_regno_ok_for_index_p (REGNO (index), strict_p)) + { + info->type = type; + info->offset = index; + info->shift = shift; + return true; + } + + return false; +} + +static inline bool +offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset) +{ + return (offset >= -64 * GET_MODE_SIZE (mode) + && offset < 64 * GET_MODE_SIZE (mode) + && offset % GET_MODE_SIZE (mode) == 0); +} + +static inline bool +offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED, + HOST_WIDE_INT offset) +{ + return offset >= -256 && offset < 256; +} + +static inline bool +offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset) +{ + return (offset >= 0 + && offset < 4096 * GET_MODE_SIZE (mode) + && offset % GET_MODE_SIZE (mode) == 0); +} + +/* Return true if X is a valid address for machine mode MODE. If it is, + fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in + effect. OUTER_CODE is PARALLEL for a load/store pair. */ + +static bool +aarch64_classify_address (struct aarch64_address_info *info, + rtx x, enum machine_mode mode, + RTX_CODE outer_code, bool strict_p) +{ + enum rtx_code code = GET_CODE (x); + rtx op0, op1; + bool allow_reg_index_p = + outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16; + + /* Don't support anything other than POST_INC or REG addressing for + AdvSIMD. */ + if (aarch64_vector_mode_p (mode) + && (code != POST_INC && code != REG)) + return false; + + switch (code) + { + case REG: + case SUBREG: + info->type = ADDRESS_REG_IMM; + info->base = x; + info->offset = const0_rtx; + return aarch64_base_register_rtx_p (x, strict_p); + + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_MODE_SIZE (mode) != 0 + && CONST_INT_P (op1) + && aarch64_base_register_rtx_p (op0, strict_p)) + { + HOST_WIDE_INT offset = INTVAL (op1); + + info->type = ADDRESS_REG_IMM; + info->base = op0; + info->offset = op1; + + /* TImode and TFmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset + Q: 9-bit signed offset + We conservatively require an offset representable in either mode. + */ + if (mode == TImode || mode == TFmode) + return (offset_7bit_signed_scaled_p (mode, offset) + && offset_9bit_signed_unscaled_p (mode, offset)); + + if (outer_code == PARALLEL) + return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + && offset_7bit_signed_scaled_p (mode, offset)); + else + return (offset_9bit_signed_unscaled_p (mode, offset) + || offset_12bit_unsigned_scaled_p (mode, offset)); + } + + if (allow_reg_index_p) + { + /* Look for base + (scaled/extended) index register. */ + if (aarch64_base_register_rtx_p (op0, strict_p) + && aarch64_classify_index (info, op1, mode, strict_p)) + { + info->base = op0; + return true; + } + if (aarch64_base_register_rtx_p (op1, strict_p) + && aarch64_classify_index (info, op0, mode, strict_p)) + { + info->base = op1; + return true; + } + } + + return false; + + case POST_INC: + case POST_DEC: + case PRE_INC: + case PRE_DEC: + info->type = ADDRESS_REG_WB; + info->base = XEXP (x, 0); + info->offset = NULL_RTX; + return aarch64_base_register_rtx_p (info->base, strict_p); + + case POST_MODIFY: + case PRE_MODIFY: + info->type = ADDRESS_REG_WB; + info->base = XEXP (x, 0); + if (GET_CODE (XEXP (x, 1)) == PLUS + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base) + && aarch64_base_register_rtx_p (info->base, strict_p)) + { + HOST_WIDE_INT offset; + info->offset = XEXP (XEXP (x, 1), 1); + offset = INTVAL (info->offset); + + /* TImode and TFmode values are allowed in both pairs of X + registers and individual Q registers. The available + address modes are: + X,X: 7-bit signed scaled offset + Q: 9-bit signed offset + We conservatively require an offset representable in either mode. + */ + if (mode == TImode || mode == TFmode) + return (offset_7bit_signed_scaled_p (mode, offset) + && offset_9bit_signed_unscaled_p (mode, offset)); + + if (outer_code == PARALLEL) + return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + && offset_7bit_signed_scaled_p (mode, offset)); + else + return offset_9bit_signed_unscaled_p (mode, offset); + } + return false; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + /* load literal: pc-relative constant pool entry. Only supported + for SI mode or larger. */ + info->type = ADDRESS_SYMBOLIC; + if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4) + { + rtx sym, addend; + + split_const (x, &sym, &addend); + return (GET_CODE (sym) == LABEL_REF + || (GET_CODE (sym) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (sym))); + } + return false; + + case LO_SUM: + info->type = ADDRESS_LO_SUM; + info->base = XEXP (x, 0); + info->offset = XEXP (x, 1); + if (allow_reg_index_p + && aarch64_base_register_rtx_p (info->base, strict_p)) + { + rtx sym, offs; + split_const (info->offset, &sym, &offs); + if (GET_CODE (sym) == SYMBOL_REF + && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM) + == SYMBOL_SMALL_ABSOLUTE)) + { + /* The symbol and offset must be aligned to the access size. */ + unsigned int align; + unsigned int ref_size; + + if (CONSTANT_POOL_ADDRESS_P (sym)) + align = GET_MODE_ALIGNMENT (get_pool_mode (sym)); + else if (TREE_CONSTANT_POOL_ADDRESS_P (sym)) + { + tree exp = SYMBOL_REF_DECL (sym); + align = TYPE_ALIGN (TREE_TYPE (exp)); + align = CONSTANT_ALIGNMENT (exp, align); + } + else if (SYMBOL_REF_DECL (sym)) + align = DECL_ALIGN (SYMBOL_REF_DECL (sym)); + else + align = BITS_PER_UNIT; + + ref_size = GET_MODE_SIZE (mode); + if (ref_size == 0) + ref_size = GET_MODE_SIZE (DImode); + + return ((INTVAL (offs) & (ref_size - 1)) == 0 + && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0); + } + } + return false; + + default: + return false; + } +} + +bool +aarch64_symbolic_address_p (rtx x) +{ + rtx offset; + + split_const (x, &x, &offset); + return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF; +} + +/* Classify the base of symbolic expression X, given that X appears in + context CONTEXT. */ + +enum aarch64_symbol_type +aarch64_classify_symbolic_expression (rtx x, + enum aarch64_symbol_context context) +{ + rtx offset; + + split_const (x, &x, &offset); + return aarch64_classify_symbol (x, context); +} + + +/* Return TRUE if X is a legitimate address for accessing memory in + mode MODE. */ +static bool +aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p) +{ + struct aarch64_address_info addr; + + return aarch64_classify_address (&addr, x, mode, MEM, strict_p); +} + +/* Return TRUE if X is a legitimate address for accessing memory in + mode MODE. OUTER_CODE will be PARALLEL if this is a load/store + pair operation. */ +bool +aarch64_legitimate_address_p (enum machine_mode mode, rtx x, + RTX_CODE outer_code, bool strict_p) +{ + struct aarch64_address_info addr; + + return aarch64_classify_address (&addr, x, mode, outer_code, strict_p); +} + +/* Return TRUE if rtx X is immediate constant 0.0 */ +bool +aarch64_float_const_zero_rtx_p (rtx x) +{ + REAL_VALUE_TYPE r; + + if (GET_MODE (x) == VOIDmode) + return false; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + if (REAL_VALUE_MINUS_ZERO (r)) + return !HONOR_SIGNED_ZEROS (GET_MODE (x)); + return REAL_VALUES_EQUAL (r, dconst0); +} + +/* Return the fixed registers used for condition codes. */ + +static bool +aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) +{ + *p1 = CC_REGNUM; + *p2 = INVALID_REGNUM; + return true; +} + +enum machine_mode +aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) +{ + /* All floating point compares return CCFP if it is an equality + comparison, and CCFPE otherwise. */ + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + switch (code) + { + case EQ: + case NE: + case UNORDERED: + case ORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case LTGT: + return CCFPmode; + + case LT: + case LE: + case GT: + case GE: + return CCFPEmode; + + default: + gcc_unreachable (); + } + } + + if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + && y == const0_rtx + && (code == EQ || code == NE || code == LT || code == GE) + && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND + || GET_CODE (x) == NEG)) + return CC_NZmode; + + /* A compare with a shifted operand. Because of canonicalization, + the comparison will have to be swapped when we emit the assembly + code. */ + if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG) + && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT + || GET_CODE (x) == LSHIFTRT + || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) + return CC_SWPmode; + + /* Similarly for a negated operand, but we can only do this for + equalities. */ + if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG) + && (code == EQ || code == NE) + && GET_CODE (x) == NEG) + return CC_Zmode; + + /* A compare of a mode narrower than SI mode against zero can be done + by extending the value in the comparison. */ + if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode) + && y == const0_rtx) + /* Only use sign-extension if we really need it. */ + return ((code == GT || code == GE || code == LE || code == LT) + ? CC_SESWPmode : CC_ZESWPmode); + + /* For everything else, return CCmode. */ + return CCmode; +} + +static unsigned +aarch64_get_condition_code (rtx x) +{ + enum machine_mode mode = GET_MODE (XEXP (x, 0)); + enum rtx_code comp_code = GET_CODE (x); + + if (GET_MODE_CLASS (mode) != MODE_CC) + mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1)); + + switch (mode) + { + case CCFPmode: + case CCFPEmode: + switch (comp_code) + { + case GE: return AARCH64_GE; + case GT: return AARCH64_GT; + case LE: return AARCH64_LS; + case LT: return AARCH64_MI; + case NE: return AARCH64_NE; + case EQ: return AARCH64_EQ; + case ORDERED: return AARCH64_VC; + case UNORDERED: return AARCH64_VS; + case UNLT: return AARCH64_LT; + case UNLE: return AARCH64_LE; + case UNGT: return AARCH64_HI; + case UNGE: return AARCH64_PL; + default: gcc_unreachable (); + } + break; + + case CCmode: + switch (comp_code) + { + case NE: return AARCH64_NE; + case EQ: return AARCH64_EQ; + case GE: return AARCH64_GE; + case GT: return AARCH64_GT; + case LE: return AARCH64_LE; + case LT: return AARCH64_LT; + case GEU: return AARCH64_CS; + case GTU: return AARCH64_HI; + case LEU: return AARCH64_LS; + case LTU: return AARCH64_CC; + default: gcc_unreachable (); + } + break; + + case CC_SWPmode: + case CC_ZESWPmode: + case CC_SESWPmode: + switch (comp_code) + { + case NE: return AARCH64_NE; + case EQ: return AARCH64_EQ; + case GE: return AARCH64_LE; + case GT: return AARCH64_LT; + case LE: return AARCH64_GE; + case LT: return AARCH64_GT; + case GEU: return AARCH64_LS; + case GTU: return AARCH64_CC; + case LEU: return AARCH64_CS; + case LTU: return AARCH64_HI; + default: gcc_unreachable (); + } + break; + + case CC_NZmode: + switch (comp_code) + { + case NE: return AARCH64_NE; + case EQ: return AARCH64_EQ; + case GE: return AARCH64_PL; + case LT: return AARCH64_MI; + default: gcc_unreachable (); + } + break; + + case CC_Zmode: + switch (comp_code) + { + case NE: return AARCH64_NE; + case EQ: return AARCH64_EQ; + default: gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + break; + } +} + +static unsigned +bit_count (unsigned HOST_WIDE_INT value) +{ + unsigned count = 0; + + while (value) + { + count++; + value &= value - 1; + } + + return count; +} + +void +aarch64_print_operand (FILE *f, rtx x, char code) +{ + switch (code) + { + /* An integer or symbol address without a preceding # sign. */ + case 'c': + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case SYMBOL_REF: + output_addr_const (f, x); + break; + + case CONST: + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) + { + output_addr_const (f, x); + break; + } + /* Fall through. */ + + default: + output_operand_lossage ("Unsupported operand for code '%c'", code); + } + break; + + case 'e': + /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */ + { + int n; + + if (GET_CODE (x) != CONST_INT + || (n = exact_log2 (INTVAL (x) & ~7)) <= 0) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + + switch (n) + { + case 3: + fputc ('b', f); + break; + case 4: + fputc ('h', f); + break; + case 5: + fputc ('w', f); + break; + default: + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + } + break; + + case 'p': + { + int n; + + /* Print N such that 2^N == X. */ + if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + + asm_fprintf (f, "%d", n); + } + break; + + case 'P': + /* Print the number of non-zero bits in X (a const_int). */ + if (GET_CODE (x) != CONST_INT) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + + asm_fprintf (f, "%u", bit_count (INTVAL (x))); + break; + + case 'H': + /* Print the higher numbered register of a pair (TImode) of regs. */ + if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1)) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + + asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]); + break; + + case 'm': + /* Print a condition (eq, ne, etc). */ + + /* CONST_TRUE_RTX means always -- that's the default. */ + if (x == const_true_rtx) + return; + + if (!COMPARISON_P (x)) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + + fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f); + break; + + case 'M': + /* Print the inverse of a condition (eq <-> ne, etc). */ + + /* CONST_TRUE_RTX means never -- that's the default. */ + if (x == const_true_rtx) + { + fputs ("nv", f); + return; + } + + if (!COMPARISON_P (x)) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + + fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE + (aarch64_get_condition_code (x))], f); + break; + + case 'b': + case 'h': + case 's': + case 'd': + case 'q': + /* Print a scalar FP/SIMD register name. */ + if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) + { + output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); + return; + } + asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM); + break; + + case 'S': + case 'T': + case 'U': + case 'V': + /* Print the first FP/SIMD register name in a list. */ + if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) + { + output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); + return; + } + asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S')); + break; + + case 'X': + /* Print bottom 16 bits of integer constant in hex. */ + if (GET_CODE (x) != CONST_INT) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff); + break; + + case 'w': + case 'x': + /* Print a general register name or the zero register (32-bit or + 64-bit). */ + if (x == const0_rtx + || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x))) + { + asm_fprintf (f, "%czr", code); + break; + } + + if (REG_P (x) && GP_REGNUM_P (REGNO (x))) + { + asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM); + break; + } + + if (REG_P (x) && REGNO (x) == SP_REGNUM) + { + asm_fprintf (f, "%ssp", code == 'w' ? "w" : ""); + break; + } + + /* Fall through */ + + case 0: + /* Print a normal operand, if it's a general register, then we + assume DImode. */ + if (x == NULL) + { + output_operand_lossage ("missing operand"); + return; + } + + switch (GET_CODE (x)) + { + case REG: + asm_fprintf (f, "%s", reg_names [REGNO (x)]); + break; + + case MEM: + aarch64_memory_reference_mode = GET_MODE (x); + output_address (XEXP (x, 0)); + break; + + case LABEL_REF: + case SYMBOL_REF: + output_addr_const (asm_out_file, x); + break; + + case CONST_INT: + asm_fprintf (f, "%wd", INTVAL (x)); + break; + + case CONST_VECTOR: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) + { + gcc_assert (aarch64_const_vec_all_same_int_p (x, + HOST_WIDE_INT_MIN, + HOST_WIDE_INT_MAX)); + asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0))); + } + else if (aarch64_simd_imm_zero_p (x, GET_MODE (x))) + { + fputc ('0', f); + } + else + gcc_unreachable (); + break; + + case CONST_DOUBLE: + /* CONST_DOUBLE can represent a double-width integer. + In this case, the mode of x is VOIDmode. */ + if (GET_MODE (x) == VOIDmode) + ; /* Do Nothing. */ + else if (aarch64_float_const_zero_rtx_p (x)) + { + fputc ('0', f); + break; + } + else if (aarch64_float_const_representable_p (x)) + { +#define buf_size 20 + char float_buf[buf_size] = {'\0'}; + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + real_to_decimal_for_mode (float_buf, &r, + buf_size, buf_size, + 1, GET_MODE (x)); + asm_fprintf (asm_out_file, "%s", float_buf); + break; +#undef buf_size + } + output_operand_lossage ("invalid constant"); + return; + default: + output_operand_lossage ("invalid operand"); + return; + } + break; + + case 'A': + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + + switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) + { + case SYMBOL_SMALL_GOT: + asm_fprintf (asm_out_file, ":got:"); + break; + + case SYMBOL_SMALL_TLSGD: + asm_fprintf (asm_out_file, ":tlsgd:"); + break; + + case SYMBOL_SMALL_TLSDESC: + asm_fprintf (asm_out_file, ":tlsdesc:"); + break; + + case SYMBOL_SMALL_GOTTPREL: + asm_fprintf (asm_out_file, ":gottprel:"); + break; + + case SYMBOL_SMALL_TPREL: + asm_fprintf (asm_out_file, ":tprel:"); + break; + + case SYMBOL_TINY_GOT: + gcc_unreachable (); + break; + + default: + break; + } + output_addr_const (asm_out_file, x); + break; + + case 'L': + switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) + { + case SYMBOL_SMALL_GOT: + asm_fprintf (asm_out_file, ":lo12:"); + break; + + case SYMBOL_SMALL_TLSGD: + asm_fprintf (asm_out_file, ":tlsgd_lo12:"); + break; + + case SYMBOL_SMALL_TLSDESC: + asm_fprintf (asm_out_file, ":tlsdesc_lo12:"); + break; + + case SYMBOL_SMALL_GOTTPREL: + asm_fprintf (asm_out_file, ":gottprel_lo12:"); + break; + + case SYMBOL_SMALL_TPREL: + asm_fprintf (asm_out_file, ":tprel_lo12_nc:"); + break; + + case SYMBOL_TINY_GOT: + asm_fprintf (asm_out_file, ":got:"); + break; + + default: + break; + } + output_addr_const (asm_out_file, x); + break; + + case 'G': + + switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) + { + case SYMBOL_SMALL_TPREL: + asm_fprintf (asm_out_file, ":tprel_hi12:"); + break; + default: + break; + } + output_addr_const (asm_out_file, x); + break; + + default: + output_operand_lossage ("invalid operand prefix '%%%c'", code); + return; + } +} + +void +aarch64_print_operand_address (FILE *f, rtx x) +{ + struct aarch64_address_info addr; + + if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode, + MEM, true)) + switch (addr.type) + { + case ADDRESS_REG_IMM: + if (addr.offset == const0_rtx) + asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); + else + asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)], + INTVAL (addr.offset)); + return; + + case ADDRESS_REG_REG: + if (addr.shift == 0) + asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)], + reg_names [REGNO (addr.offset)]); + else + asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)], + reg_names [REGNO (addr.offset)], addr.shift); + return; + + case ADDRESS_REG_UXTW: + if (addr.shift == 0) + asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM); + else + asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM, addr.shift); + return; + + case ADDRESS_REG_SXTW: + if (addr.shift == 0) + asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM); + else + asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM, addr.shift); + return; + + case ADDRESS_REG_WB: + switch (GET_CODE (x)) + { + case PRE_INC: + asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case POST_INC: + asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case PRE_DEC: + asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case POST_DEC: + asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case PRE_MODIFY: + asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)], + INTVAL (addr.offset)); + return; + case POST_MODIFY: + asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)], + INTVAL (addr.offset)); + return; + default: + break; + } + break; + + case ADDRESS_LO_SUM: + asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]); + output_addr_const (f, addr.offset); + asm_fprintf (f, "]"); + return; + + case ADDRESS_SYMBOLIC: + break; + } + + output_addr_const (f, x); +} + +bool +aarch64_label_mentioned_p (rtx x) +{ + const char *fmt; + int i; + + if (GET_CODE (x) == LABEL_REF) + return true; + + /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the + referencing instruction, but they are constant offsets, not + symbols. */ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return false; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (aarch64_label_mentioned_p (XVECEXP (x, i, j))) + return 1; + } + else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i))) + return 1; + } + + return 0; +} + +/* Implement REGNO_REG_CLASS. */ + +enum reg_class +aarch64_regno_regclass (unsigned regno) +{ + if (GP_REGNUM_P (regno)) + return CORE_REGS; + + if (regno == SP_REGNUM) + return STACK_REG; + + if (regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + return POINTER_REGS; + + if (FP_REGNUM_P (regno)) + return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS; + + return NO_REGS; +} + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and return the new rtx. */ + +rtx +aarch64_legitimize_reload_address (rtx *x_p, + enum machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + rtx x = *x_p; + + /* Do not allow mem (plus (reg, const)) if vector mode. */ + if (aarch64_vector_mode_p (mode) + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))) + { + rtx orig_rtx = x; + x = copy_rtx (x); + push_reload (orig_rtx, NULL_RTX, x_p, NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && CONST_INT_P (XEXP (x, 1))) + { + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + /* We wish to handle large displacements off a base register by splitting + the addend across an add and the mem insn. This can cut the number of + extra insns needed from 3 to 1. It is only useful for load/store of a + single register with 12 bit offset field. */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1)) + && HARD_REGISTER_P (XEXP (x, 0)) + && mode != TImode + && mode != TFmode + && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true)) + { + HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); + HOST_WIDE_INT low = val & 0xfff; + HOST_WIDE_INT high = val - low; + HOST_WIDE_INT offs; + rtx cst; + enum machine_mode xmode = GET_MODE (x); + + /* In ILP32, xmode can be either DImode or SImode. */ + gcc_assert (xmode == DImode || xmode == SImode); + + /* Reload non-zero BLKmode offsets. This is because we cannot ascertain + BLKmode alignment. */ + if (GET_MODE_SIZE (mode) == 0) + return NULL_RTX; + + offs = low % GET_MODE_SIZE (mode); + + /* Align misaligned offset by adjusting high part to compensate. */ + if (offs != 0) + { + if (aarch64_uimm12_shift (high + offs)) + { + /* Align down. */ + low = low - offs; + high = high + offs; + } + else + { + /* Align up. */ + offs = GET_MODE_SIZE (mode) - offs; + low = low + offs; + high = high + (low & 0x1000) - offs; + low &= 0xfff; + } + } + + /* Check for overflow. */ + if (high + low != val) + return NULL_RTX; + + cst = GEN_INT (high); + if (!aarch64_uimm12_shift (high)) + cst = force_const_mem (xmode, cst); + + /* Reload high part into base reg, leaving the low part + in the mem instruction. + Note that replacing this gen_rtx_PLUS with plus_constant is + wrong in this case because we rely on the + (plus (plus reg c1) c2) structure being preserved so that + XEXP (*p, 0) in push_reload below uses the correct term. */ + x = gen_rtx_PLUS (xmode, + gen_rtx_PLUS (xmode, XEXP (x, 0), cst), + GEN_INT (low)); + + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, xmode, VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + return NULL_RTX; +} + + +static reg_class_t +aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + reg_class_t rclass, + enum machine_mode mode, + secondary_reload_info *sri) +{ + /* Without the TARGET_SIMD instructions we cannot move a Q register + to a Q register directly. We need a scratch. */ + if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) + && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD + && reg_class_subset_p (rclass, FP_REGS)) + { + if (mode == TFmode) + sri->icode = CODE_FOR_aarch64_reload_movtf; + else if (mode == TImode) + sri->icode = CODE_FOR_aarch64_reload_movti; + return NO_REGS; + } + + /* A TFmode or TImode memory access should be handled via an FP_REGS + because AArch64 has richer addressing modes for LDR/STR instructions + than LDP/STP instructions. */ + if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS + && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) + return FP_REGS; + + if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) + return CORE_REGS; + + return NO_REGS; +} + +static bool +aarch64_can_eliminate (const int from, const int to) +{ + /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into + HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ + + if (frame_pointer_needed) + { + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return true; + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return false; + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM + && !cfun->calls_alloca) + return true; + if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return true; + return false; + } + else + { + /* If we decided that we didn't need a leaf frame pointer but then used + LR in the function, then we'll want a frame pointer after all, so + prevent this elimination to ensure a frame pointer is used. + + NOTE: the original value of flag_omit_frame_pointer gets trashed + IFF flag_omit_leaf_frame_pointer is true, so we check the value + of faked_omit_frame_pointer here (which is true when we always + wish to keep non-leaf frame pointers but only wish to keep leaf frame + pointers when LR is clobbered). */ + if (to == STACK_POINTER_REGNUM + && df_regs_ever_live_p (LR_REGNUM) + && faked_omit_frame_pointer) + return false; + } + + return true; +} + +HOST_WIDE_INT +aarch64_initial_elimination_offset (unsigned from, unsigned to) +{ + HOST_WIDE_INT frame_size; + HOST_WIDE_INT offset; + + aarch64_layout_frame (); + frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size + + crtl->outgoing_args_size + + cfun->machine->saved_varargs_size); + + frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); + offset = frame_size; + + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) + return offset - crtl->outgoing_args_size; + + if (from == FRAME_POINTER_REGNUM) + return cfun->machine->frame.saved_regs_size + get_frame_size (); + } + + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) + { + HOST_WIDE_INT elim = crtl->outgoing_args_size + + cfun->machine->frame.saved_regs_size + + get_frame_size () + - cfun->machine->frame.fp_lr_offset; + elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); + return elim; + } + } + + return offset; +} + + +/* Implement RETURN_ADDR_RTX. We do not support moving back to a + previous frame. */ + +rtx +aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return const0_rtx; + return get_hard_reg_initial_val (Pmode, LR_REGNUM); +} + + +static void +aarch64_asm_trampoline_template (FILE *f) +{ + if (TARGET_ILP32) + { + asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM); + asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM); + } + else + { + asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]); + asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]); + } + asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]); + assemble_aligned_integer (4, const0_rtx); + assemble_aligned_integer (POINTER_BYTES, const0_rtx); + assemble_aligned_integer (POINTER_BYTES, const0_rtx); +} + +static void +aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr, mem, a_tramp; + const int tramp_code_sz = 16; + + /* Don't need to copy the trailing D-words, we fill those in below. */ + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL); + mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz); + fnaddr = XEXP (DECL_RTL (fndecl), 0); + if (GET_MODE (fnaddr) != ptr_mode) + fnaddr = convert_memory_address (ptr_mode, fnaddr); + emit_move_insn (mem, fnaddr); + + mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES); + emit_move_insn (mem, chain_value); + + /* XXX We should really define a "clear_cache" pattern and use + gen_clear_cache(). */ + a_tramp = XEXP (m_tramp, 0); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), + LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode, + plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE), + ptr_mode); +} + +static unsigned char +aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode) +{ + switch (regclass) + { + case CORE_REGS: + case POINTER_REGS: + case GENERAL_REGS: + case ALL_REGS: + case FP_REGS: + case FP_LO_REGS: + return + aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 : + (GET_MODE_SIZE (mode) + 7) / 8; + case STACK_REG: + return 1; + + case NO_REGS: + return 0; + + default: + break; + } + gcc_unreachable (); +} + +static reg_class_t +aarch64_preferred_reload_class (rtx x, reg_class_t regclass) +{ + if (regclass == POINTER_REGS) + return GENERAL_REGS; + + if (regclass == STACK_REG) + { + if (REG_P(x) + && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS)) + return regclass; + + return NO_REGS; + } + + /* If it's an integer immediate that MOVI can't handle, then + FP_REGS is not an option, so we return NO_REGS instead. */ + if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS) + && !aarch64_simd_imm_scalar_p (x, GET_MODE (x))) + return NO_REGS; + + /* Register eliminiation can result in a request for + SP+constant->FP_REGS. We cannot support such operations which + use SP as source and an FP_REG as destination, so reject out + right now. */ + if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS) + { + rtx lhs = XEXP (x, 0); + + /* Look through a possible SUBREG introduced by ILP32. */ + if (GET_CODE (lhs) == SUBREG) + lhs = SUBREG_REG (lhs); + + gcc_assert (REG_P (lhs)); + gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)), + POINTER_REGS)); + return NO_REGS; + } + + return regclass; +} + +void +aarch64_asm_output_labelref (FILE* f, const char *name) +{ + asm_fprintf (f, "%U%s", name); +} + +static void +aarch64_elf_asm_constructor (rtx symbol, int priority) +{ + if (priority == DEFAULT_INIT_PRIORITY) + default_ctor_section_asm_out_constructor (symbol, priority); + else + { + section *s; + char buf[18]; + snprintf (buf, sizeof (buf), ".init_array.%.5u", priority); + s = get_section (buf, SECTION_WRITE, NULL); + switch_to_section (s); + assemble_align (POINTER_SIZE); + assemble_aligned_integer (POINTER_BYTES, symbol); + } +} + +static void +aarch64_elf_asm_destructor (rtx symbol, int priority) +{ + if (priority == DEFAULT_INIT_PRIORITY) + default_dtor_section_asm_out_destructor (symbol, priority); + else + { + section *s; + char buf[18]; + snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority); + s = get_section (buf, SECTION_WRITE, NULL); + switch_to_section (s); + assemble_align (POINTER_SIZE); + assemble_aligned_integer (POINTER_BYTES, symbol); + } +} + +const char* +aarch64_output_casesi (rtx *operands) +{ + char buf[100]; + char label[100]; + rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); + int index; + static const char *const patterns[4][2] = + { + { + "ldrb\t%w3, [%0,%w1,uxtw]", + "add\t%3, %4, %w3, sxtb #2" + }, + { + "ldrh\t%w3, [%0,%w1,uxtw #1]", + "add\t%3, %4, %w3, sxth #2" + }, + { + "ldr\t%w3, [%0,%w1,uxtw #2]", + "add\t%3, %4, %w3, sxtw #2" + }, + /* We assume that DImode is only generated when not optimizing and + that we don't really need 64-bit address offsets. That would + imply an object file with 8GB of code in a single function! */ + { + "ldr\t%w3, [%0,%w1,uxtw #2]", + "add\t%3, %4, %w3, sxtw #2" + } + }; + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec))); + + gcc_assert (index >= 0 && index <= 3); + + /* Need to implement table size reduction, by chaning the code below. */ + output_asm_insn (patterns[index][0], operands); + ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2])); + snprintf (buf, sizeof (buf), + "adr\t%%4, %s", targetm.strip_name_encoding (label)); + output_asm_insn (buf, operands); + output_asm_insn (patterns[index][1], operands); + output_asm_insn ("br\t%3", operands); + assemble_label (asm_out_file, label); + return ""; +} + + +/* Return size in bits of an arithmetic operand which is shifted/scaled and + masked such that it is suitable for a UXTB, UXTH, or UXTW extend + operator. */ + +int +aarch64_uxt_size (int shift, HOST_WIDE_INT mask) +{ + if (shift >= 0 && shift <= 3) + { + int size; + for (size = 8; size <= 32; size *= 2) + { + HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1; + if (mask == bits << shift) + return size; + } + } + return 0; +} + +static bool +aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, + const_rtx x ATTRIBUTE_UNUSED) +{ + /* We can't use blocks for constants when we're using a per-function + constant pool. */ + return false; +} + +static section * +aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + /* Force all constant pool entries into the current function section. */ + return function_section (current_function_decl); +} + + +/* Costs. */ + +/* Helper function for rtx cost calculation. Strip a shift expression + from X. Returns the inner operand if successful, or the original + expression on failure. */ +static rtx +aarch64_strip_shift (rtx x) +{ + rtx op = x; + + if ((GET_CODE (op) == ASHIFT + || GET_CODE (op) == ASHIFTRT + || GET_CODE (op) == LSHIFTRT) + && CONST_INT_P (XEXP (op, 1))) + return XEXP (op, 0); + + if (GET_CODE (op) == MULT + && CONST_INT_P (XEXP (op, 1)) + && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64) + return XEXP (op, 0); + + return x; +} + +/* Helper function for rtx cost calculation. Strip a shift or extend + expression from X. Returns the inner operand if successful, or the + original expression on failure. We deal with a number of possible + canonicalization variations here. */ +static rtx +aarch64_strip_shift_or_extend (rtx x) +{ + rtx op = x; + + /* Zero and sign extraction of a widened value. */ + if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) + && XEXP (op, 2) == const0_rtx + && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), + XEXP (op, 1))) + return XEXP (XEXP (op, 0), 0); + + /* It can also be represented (for zero-extend) as an AND with an + immediate. */ + if (GET_CODE (op) == AND + && GET_CODE (XEXP (op, 0)) == MULT + && CONST_INT_P (XEXP (XEXP (op, 0), 1)) + && CONST_INT_P (XEXP (op, 1)) + && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))), + INTVAL (XEXP (op, 1))) != 0) + return XEXP (XEXP (op, 0), 0); + + /* Now handle extended register, as this may also have an optional + left shift by 1..4. */ + if (GET_CODE (op) == ASHIFT + && CONST_INT_P (XEXP (op, 1)) + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4) + op = XEXP (op, 0); + + if (GET_CODE (op) == ZERO_EXTEND + || GET_CODE (op) == SIGN_EXTEND) + op = XEXP (op, 0); + + if (op != x) + return op; + + return aarch64_strip_shift (x); +} + +/* Calculate the cost of calculating X, storing it in *COST. Result + is true if the total cost of the operation has now been calculated. */ +static bool +aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, + int param ATTRIBUTE_UNUSED, int *cost, bool speed) +{ + rtx op0, op1; + const struct cpu_cost_table *extra_cost + = aarch64_tune_params->insn_extra_cost; + + switch (code) + { + case SET: + op0 = SET_DEST (x); + op1 = SET_SRC (x); + + switch (GET_CODE (op0)) + { + case MEM: + if (speed) + *cost += extra_cost->ldst.store; + + if (op1 != const0_rtx) + *cost += rtx_cost (op1, SET, 1, speed); + return true; + + case SUBREG: + if (! REG_P (SUBREG_REG (op0))) + *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); + /* Fall through. */ + case REG: + /* Cost is just the cost of the RHS of the set. */ + *cost += rtx_cost (op1, SET, 1, true); + return true; + + case ZERO_EXTRACT: /* Bit-field insertion. */ + case SIGN_EXTRACT: + /* Strip any redundant widening of the RHS to meet the width of + the target. */ + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + if ((GET_CODE (op1) == ZERO_EXTEND + || GET_CODE (op1) == SIGN_EXTEND) + && GET_CODE (XEXP (op0, 1)) == CONST_INT + && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) + >= INTVAL (XEXP (op0, 1)))) + op1 = XEXP (op1, 0); + *cost += rtx_cost (op1, SET, 1, speed); + return true; + + default: + break; + } + return false; + + case MEM: + if (speed) + *cost += extra_cost->ldst.load; + + return true; + + case NEG: + op0 = CONST0_RTX (GET_MODE (x)); + op1 = XEXP (x, 0); + goto cost_minus; + + case COMPARE: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (op1 == const0_rtx + && GET_CODE (op0) == AND) + { + x = op0; + goto cost_logic; + } + + /* Comparisons can work if the order is swapped. + Canonicalization puts the more complex operation first, but + we want it in op1. */ + if (! (REG_P (op0) + || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) + { + op0 = XEXP (x, 1); + op1 = XEXP (x, 0); + } + goto cost_minus; + + case MINUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + cost_minus: + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT + || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC + && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) + { + if (op0 != const0_rtx) + *cost += rtx_cost (op0, MINUS, 0, speed); + + if (CONST_INT_P (op1)) + { + if (!aarch64_uimm12_shift (INTVAL (op1))) + *cost += rtx_cost (op1, MINUS, 1, speed); + } + else + { + op1 = aarch64_strip_shift_or_extend (op1); + *cost += rtx_cost (op1, MINUS, 1, speed); + } + return true; + } + + return false; + + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { + if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) + { + *cost += rtx_cost (op0, PLUS, 0, speed); + } + else + { + rtx new_op0 = aarch64_strip_shift_or_extend (op0); + + if (new_op0 == op0 + && GET_CODE (op0) == MULT) + { + if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND + && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND)) + { + *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0, + speed) + + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1, + speed) + + rtx_cost (op1, PLUS, 1, speed)); + if (speed) + *cost += + extra_cost->mult[GET_MODE (x) == DImode].extend_add; + return true; + } + + *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) + + rtx_cost (XEXP (op0, 1), MULT, 1, speed) + + rtx_cost (op1, PLUS, 1, speed)); + + if (speed) + *cost += extra_cost->mult[GET_MODE (x) == DImode].add; + + return true; + } + + *cost += (rtx_cost (new_op0, PLUS, 0, speed) + + rtx_cost (op1, PLUS, 1, speed)); + } + return true; + } + + return false; + + case IOR: + case XOR: + case AND: + cost_logic: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { + if (CONST_INT_P (op1) + && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) + { + *cost += rtx_cost (op0, AND, 0, speed); + } + else + { + if (GET_CODE (op0) == NOT) + op0 = XEXP (op0, 0); + op0 = aarch64_strip_shift (op0); + *cost += (rtx_cost (op0, AND, 0, speed) + + rtx_cost (op1, AND, 1, speed)); + } + return true; + } + return false; + + case ZERO_EXTEND: + if ((GET_MODE (x) == DImode + && GET_MODE (XEXP (x, 0)) == SImode) + || GET_CODE (XEXP (x, 0)) == MEM) + { + *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); + return true; + } + return false; + + case SIGN_EXTEND: + if (GET_CODE (XEXP (x, 0)) == MEM) + { + *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed); + return true; + } + return false; + + case ROTATE: + if (!CONST_INT_P (XEXP (x, 1))) + *cost += COSTS_N_INSNS (2); + /* Fall through. */ + case ROTATERT: + case LSHIFTRT: + case ASHIFT: + case ASHIFTRT: + + /* Shifting by a register often takes an extra cycle. */ + if (speed && !CONST_INT_P (XEXP (x, 1))) + *cost += extra_cost->alu.arith_shift_reg; + + *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed); + return true; + + case HIGH: + if (!CONSTANT_P (XEXP (x, 0))) + *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed); + return true; + + case LO_SUM: + if (!CONSTANT_P (XEXP (x, 1))) + *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed); + *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed); + return true; + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed); + return true; + + case MULT: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + *cost = COSTS_N_INSNS (1); + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { + if (CONST_INT_P (op1) + && exact_log2 (INTVAL (op1)) > 0) + { + *cost += rtx_cost (op0, ASHIFT, 0, speed); + return true; + } + + if ((GET_CODE (op0) == ZERO_EXTEND + && GET_CODE (op1) == ZERO_EXTEND) + || (GET_CODE (op0) == SIGN_EXTEND + && GET_CODE (op1) == SIGN_EXTEND)) + { + *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) + + rtx_cost (XEXP (op1, 0), MULT, 1, speed)); + if (speed) + *cost += extra_cost->mult[GET_MODE (x) == DImode].extend; + return true; + } + + if (speed) + *cost += extra_cost->mult[GET_MODE (x) == DImode].simple; + } + else if (speed) + { + if (GET_MODE (x) == DFmode) + *cost += extra_cost->fp[1].mult; + else if (GET_MODE (x) == SFmode) + *cost += extra_cost->fp[0].mult; + } + + return false; /* All arguments need to be in registers. */ + + case MOD: + case UMOD: + *cost = COSTS_N_INSNS (2); + if (speed) + { + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + *cost += (extra_cost->mult[GET_MODE (x) == DImode].add + + extra_cost->mult[GET_MODE (x) == DImode].idiv); + else if (GET_MODE (x) == DFmode) + *cost += (extra_cost->fp[1].mult + + extra_cost->fp[1].div); + else if (GET_MODE (x) == SFmode) + *cost += (extra_cost->fp[0].mult + + extra_cost->fp[0].div); + } + return false; /* All arguments need to be in registers. */ + + case DIV: + case UDIV: + *cost = COSTS_N_INSNS (1); + if (speed) + { + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv; + else if (GET_MODE (x) == DFmode) + *cost += extra_cost->fp[1].div; + else if (GET_MODE (x) == SFmode) + *cost += extra_cost->fp[0].div; + } + return false; /* All arguments need to be in registers. */ + + default: + break; + } + return false; +} + +static int +aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) +{ + enum rtx_code c = GET_CODE (x); + const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; + + if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) + return addr_cost->pre_modify; + + if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) + return addr_cost->post_modify; + + if (c == PLUS) + { + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + return addr_cost->imm_offset; + else if (GET_CODE (XEXP (x, 0)) == MULT + || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + return addr_cost->register_extend; + + return addr_cost->register_offset; + } + else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) + return addr_cost->imm_offset; + + return 0; +} + +static int +aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + const struct cpu_regmove_cost *regmove_cost + = aarch64_tune_params->regmove_cost; + + /* Moving between GPR and stack cost is the same as GP2GP. */ + if ((from == GENERAL_REGS && to == STACK_REG) + || (to == GENERAL_REGS && from == STACK_REG)) + return regmove_cost->GP2GP; + + /* To/From the stack register, we move via the gprs. */ + if (to == STACK_REG || from == STACK_REG) + return aarch64_register_move_cost (mode, from, GENERAL_REGS) + + aarch64_register_move_cost (mode, GENERAL_REGS, to); + + if (from == GENERAL_REGS && to == GENERAL_REGS) + return regmove_cost->GP2GP; + else if (from == GENERAL_REGS) + return regmove_cost->GP2FP; + else if (to == GENERAL_REGS) + return regmove_cost->FP2GP; + + /* When AdvSIMD instructions are disabled it is not possible to move + a 128-bit value directly between Q registers. This is handled in + secondary reload. A general register is used as a scratch to move + the upper DI value and the lower DI value is moved directly, + hence the cost is the sum of three moves. */ + + if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128) + return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; + + return regmove_cost->FP2FP; +} + +static int +aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + return aarch64_tune_params->memmov_cost; +} + +/* Return the number of instructions that can be issued per cycle. */ +static int +aarch64_sched_issue_rate (void) +{ + return aarch64_tune_params->issue_rate; +} + +/* Vectorizer cost model target hooks. */ + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return aarch64_tune_params->vec_costs->scalar_stmt_cost; + + case scalar_load: + return aarch64_tune_params->vec_costs->scalar_load_cost; + + case scalar_store: + return aarch64_tune_params->vec_costs->scalar_store_cost; + + case vector_stmt: + return aarch64_tune_params->vec_costs->vec_stmt_cost; + + case vector_load: + return aarch64_tune_params->vec_costs->vec_align_load_cost; + + case vector_store: + return aarch64_tune_params->vec_costs->vec_store_cost; + + case vec_to_scalar: + return aarch64_tune_params->vec_costs->vec_to_scalar_cost; + + case scalar_to_vec: + return aarch64_tune_params->vec_costs->scalar_to_vec_cost; + + case unaligned_load: + return aarch64_tune_params->vec_costs->vec_unalign_load_cost; + + case unaligned_store: + return aarch64_tune_params->vec_costs->vec_unalign_store_cost; + + case cond_branch_taken: + return aarch64_tune_params->vec_costs->cond_taken_branch_cost; + + case cond_branch_not_taken: + return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: + return aarch64_tune_params->vec_costs->vec_stmt_cost; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); + return elements / 2 + 1; + + default: + gcc_unreachable (); + } +} + +/* Implement targetm.vectorize.add_stmt_cost. */ +static unsigned +aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int misalign, + enum vect_cost_model_location where) +{ + unsigned *cost = (unsigned *) data; + unsigned retval = 0; + + if (flag_vect_cost_model) + { + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = + aarch64_builtin_vectorization_cost (kind, vectype, misalign); + + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + a function (linear for now) of the loop nest level. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + { + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_info); + unsigned nest_level = loop_depth (loop); + + count *= nest_level; + } + + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; + } + + return retval; +} + +static void initialize_aarch64_code_model (void); + +/* Parse the architecture extension string. */ + +static void +aarch64_parse_extension (char *str) +{ + /* The extension string is parsed left to right. */ + const struct aarch64_option_extension *opt = NULL; + + /* Flag to say whether we are adding or removing an extension. */ + int adding_ext = -1; + + while (str != NULL && *str != 0) + { + char *ext; + size_t len; + + str++; + ext = strchr (str, '+'); + + if (ext != NULL) + len = ext - str; + else + len = strlen (str); + + if (len >= 2 && strncmp (str, "no", 2) == 0) + { + adding_ext = 0; + len -= 2; + str += 2; + } + else if (len > 0) + adding_ext = 1; + + if (len == 0) + { + error ("missing feature modifier after %qs", "+no"); + return; + } + + /* Scan over the extensions table trying to find an exact match. */ + for (opt = all_extensions; opt->name != NULL; opt++) + { + if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0) + { + /* Add or remove the extension. */ + if (adding_ext) + aarch64_isa_flags |= opt->flags_on; + else + aarch64_isa_flags &= ~(opt->flags_off); + break; + } + } + + if (opt->name == NULL) + { + /* Extension not found in list. */ + error ("unknown feature modifier %qs", str); + return; + } + + str = ext; + }; + + return; +} + +/* Parse the ARCH string. */ + +static void +aarch64_parse_arch (void) +{ + char *ext; + const struct processor *arch; + char *str = (char *) alloca (strlen (aarch64_arch_string) + 1); + size_t len; + + strcpy (str, aarch64_arch_string); + + ext = strchr (str, '+'); + + if (ext != NULL) + len = ext - str; + else + len = strlen (str); + + if (len == 0) + { + error ("missing arch name in -march=%qs", str); + return; + } + + /* Loop through the list of supported ARCHs to find a match. */ + for (arch = all_architectures; arch->name != NULL; arch++) + { + if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0) + { + selected_arch = arch; + aarch64_isa_flags = selected_arch->flags; + + if (!selected_cpu) + selected_cpu = &all_cores[selected_arch->core]; + + if (ext != NULL) + { + /* ARCH string contains at least one extension. */ + aarch64_parse_extension (ext); + } + + if (strcmp (selected_arch->arch, selected_cpu->arch)) + { + warning (0, "switch -mcpu=%s conflicts with -march=%s switch", + selected_cpu->name, selected_arch->name); + } + + return; + } + } + + /* ARCH name not found in list. */ + error ("unknown value %qs for -march", str); + return; +} + +/* Parse the CPU string. */ + +static void +aarch64_parse_cpu (void) +{ + char *ext; + const struct processor *cpu; + char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1); + size_t len; + + strcpy (str, aarch64_cpu_string); + + ext = strchr (str, '+'); + + if (ext != NULL) + len = ext - str; + else + len = strlen (str); + + if (len == 0) + { + error ("missing cpu name in -mcpu=%qs", str); + return; + } + + /* Loop through the list of supported CPUs to find a match. */ + for (cpu = all_cores; cpu->name != NULL; cpu++) + { + if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0) + { + selected_cpu = cpu; + selected_tune = cpu; + aarch64_isa_flags = selected_cpu->flags; + + if (ext != NULL) + { + /* CPU string contains at least one extension. */ + aarch64_parse_extension (ext); + } + + return; + } + } + + /* CPU name not found in list. */ + error ("unknown value %qs for -mcpu", str); + return; +} + +/* Parse the TUNE string. */ + +static void +aarch64_parse_tune (void) +{ + const struct processor *cpu; + char *str = (char *) alloca (strlen (aarch64_tune_string) + 1); + strcpy (str, aarch64_tune_string); + + /* Loop through the list of supported CPUs to find a match. */ + for (cpu = all_cores; cpu->name != NULL; cpu++) + { + if (strcmp (cpu->name, str) == 0) + { + selected_tune = cpu; + return; + } + } + + /* CPU name not found in list. */ + error ("unknown value %qs for -mtune", str); + return; +} + + +/* Implement TARGET_OPTION_OVERRIDE. */ + +static void +aarch64_override_options (void) +{ + /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU. + If either of -march or -mtune is given, they override their + respective component of -mcpu. + + So, first parse AARCH64_CPU_STRING, then the others, be careful + with -march as, if -mcpu is not present on the command line, march + must set a sensible default CPU. */ + if (aarch64_cpu_string) + { + aarch64_parse_cpu (); + } + + if (aarch64_arch_string) + { + aarch64_parse_arch (); + } + + if (aarch64_tune_string) + { + aarch64_parse_tune (); + } + +#ifndef HAVE_AS_MABI_OPTION + /* The compiler may have been configured with 2.23.* binutils, which does + not have support for ILP32. */ + if (TARGET_ILP32) + error ("Assembler does not support -mabi=ilp32"); +#endif + + initialize_aarch64_code_model (); + + aarch64_build_bitmask_table (); + + /* This target defaults to strict volatile bitfields. */ + if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2)) + flag_strict_volatile_bitfields = 1; + + /* If the user did not specify a processor, choose the default + one for them. This will be the CPU set during configuration using + --with-cpu, otherwise it is "generic". */ + if (!selected_cpu) + { + selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f]; + aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6; + } + + gcc_assert (selected_cpu); + + /* The selected cpu may be an architecture, so lookup tuning by core ID. */ + if (!selected_tune) + selected_tune = &all_cores[selected_cpu->core]; + + aarch64_tune_flags = selected_tune->flags; + aarch64_tune = selected_tune->core; + aarch64_tune_params = selected_tune->tune; + + aarch64_override_options_after_change (); +} + +/* Implement targetm.override_options_after_change. */ + +static void +aarch64_override_options_after_change (void) +{ + faked_omit_frame_pointer = false; + + /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so + that aarch64_frame_pointer_required will be called. We need to remember + whether flag_omit_frame_pointer was turned on normally or just faked. */ + + if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer) + { + flag_omit_frame_pointer = true; + faked_omit_frame_pointer = true; + } +} + +static struct machine_function * +aarch64_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_alloc_cleared_machine_function (); + return machine; +} + +void +aarch64_init_expanders (void) +{ + init_machine_status = aarch64_init_machine_status; +} + +/* A checking mechanism for the implementation of the various code models. */ +static void +initialize_aarch64_code_model (void) +{ + if (flag_pic) + { + switch (aarch64_cmodel_var) + { + case AARCH64_CMODEL_TINY: + aarch64_cmodel = AARCH64_CMODEL_TINY_PIC; + break; + case AARCH64_CMODEL_SMALL: + aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; + break; + case AARCH64_CMODEL_LARGE: + sorry ("code model %qs with -f%s", "large", + flag_pic > 1 ? "PIC" : "pic"); + default: + gcc_unreachable (); + } + } + else + aarch64_cmodel = aarch64_cmodel_var; +} + +/* Return true if SYMBOL_REF X binds locally. */ + +static bool +aarch64_symbol_binds_local_p (const_rtx x) +{ + return (SYMBOL_REF_DECL (x) + ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) + : SYMBOL_REF_LOCAL_P (x)); +} + +/* Return true if SYMBOL_REF X is thread local */ +static bool +aarch64_tls_symbol_p (rtx x) +{ + if (! TARGET_HAVE_TLS) + return false; + + if (GET_CODE (x) != SYMBOL_REF) + return false; + + return SYMBOL_REF_TLS_MODEL (x) != 0; +} + +/* Classify a TLS symbol into one of the TLS kinds. */ +enum aarch64_symbol_type +aarch64_classify_tls_symbol (rtx x) +{ + enum tls_model tls_kind = tls_symbolic_operand_type (x); + + switch (tls_kind) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + case TLS_MODEL_LOCAL_DYNAMIC: + return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD; + + case TLS_MODEL_INITIAL_EXEC: + return SYMBOL_SMALL_GOTTPREL; + + case TLS_MODEL_LOCAL_EXEC: + return SYMBOL_SMALL_TPREL; + + case TLS_MODEL_EMULATED: + case TLS_MODEL_NONE: + return SYMBOL_FORCE_TO_MEM; + + default: + gcc_unreachable (); + } +} + +/* Return the method that should be used to access SYMBOL_REF or + LABEL_REF X in context CONTEXT. */ + +enum aarch64_symbol_type +aarch64_classify_symbol (rtx x, + enum aarch64_symbol_context context ATTRIBUTE_UNUSED) +{ + if (GET_CODE (x) == LABEL_REF) + { + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_LARGE: + return SYMBOL_FORCE_TO_MEM; + + case AARCH64_CMODEL_TINY_PIC: + case AARCH64_CMODEL_TINY: + return SYMBOL_TINY_ABSOLUTE; + + case AARCH64_CMODEL_SMALL_PIC: + case AARCH64_CMODEL_SMALL: + return SYMBOL_SMALL_ABSOLUTE; + + default: + gcc_unreachable (); + } + } + + if (GET_CODE (x) == SYMBOL_REF) + { + if (aarch64_cmodel == AARCH64_CMODEL_LARGE) + return SYMBOL_FORCE_TO_MEM; + + if (aarch64_tls_symbol_p (x)) + return aarch64_classify_tls_symbol (x); + + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_TINY: + if (SYMBOL_REF_WEAK (x)) + return SYMBOL_FORCE_TO_MEM; + return SYMBOL_TINY_ABSOLUTE; + + case AARCH64_CMODEL_SMALL: + if (SYMBOL_REF_WEAK (x)) + return SYMBOL_FORCE_TO_MEM; + return SYMBOL_SMALL_ABSOLUTE; + + case AARCH64_CMODEL_TINY_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_TINY_GOT; + return SYMBOL_TINY_ABSOLUTE; + + case AARCH64_CMODEL_SMALL_PIC: + if (!aarch64_symbol_binds_local_p (x)) + return SYMBOL_SMALL_GOT; + return SYMBOL_SMALL_ABSOLUTE; + + default: + gcc_unreachable (); + } + } + + /* By default push everything into the constant pool. */ + return SYMBOL_FORCE_TO_MEM; +} + +bool +aarch64_constant_address_p (rtx x) +{ + return (CONSTANT_P (x) && memory_address_p (DImode, x)); +} + +bool +aarch64_legitimate_pic_operand_p (rtx x) +{ + if (GET_CODE (x) == SYMBOL_REF + || (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) + return false; + + return true; +} + +/* Return true if X holds either a quarter-precision or + floating-point +0.0 constant. */ +static bool +aarch64_valid_floating_const (enum machine_mode mode, rtx x) +{ + if (!CONST_DOUBLE_P (x)) + return false; + + /* TODO: We could handle moving 0.0 to a TFmode register, + but first we would like to refactor the movtf_aarch64 + to be more amicable to split moves properly and + correctly gate on TARGET_SIMD. For now - reject all + constants which are not to SFmode or DFmode registers. */ + if (!(mode == SFmode || mode == DFmode)) + return false; + + if (aarch64_float_const_zero_rtx_p (x)) + return true; + return aarch64_float_const_representable_p (x); +} + +static bool +aarch64_legitimate_constant_p (enum machine_mode mode, rtx x) +{ + /* Do not allow vector struct mode constants. We could support + 0 and -1 easily, but they need support in aarch64-simd.md. */ + if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode)) + return false; + + /* This could probably go away because + we now decompose CONST_INTs according to expand_mov_immediate. */ + if ((GET_CODE (x) == CONST_VECTOR + && aarch64_simd_valid_immediate (x, mode, false, NULL)) + || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) + return !targetm.cannot_force_const_mem (mode, x); + + if (GET_CODE (x) == HIGH + && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) + return true; + + return aarch64_constant_address_p (x); +} + +rtx +aarch64_load_tp (rtx target) +{ + if (!target + || GET_MODE (target) != Pmode + || !register_operand (target, Pmode)) + target = gen_reg_rtx (Pmode); + + /* Can return in any reg. */ + emit_insn (gen_aarch64_load_tp_hard (target)); + return target; +} + +/* On AAPCS systems, this is the "struct __va_list". */ +static GTY(()) tree va_list_type; + +/* Implement TARGET_BUILD_BUILTIN_VA_LIST. + Return the type to use as __builtin_va_list. + + AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as: + + struct __va_list + { + void *__stack; + void *__gr_top; + void *__vr_top; + int __gr_offs; + int __vr_offs; + }; */ + +static tree +aarch64_build_builtin_va_list (void) +{ + tree va_list_name; + tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; + + /* Create the type. */ + va_list_type = lang_hooks.types.make_type (RECORD_TYPE); + /* Give it the required name. */ + va_list_name = build_decl (BUILTINS_LOCATION, + TYPE_DECL, + get_identifier ("__va_list"), + va_list_type); + DECL_ARTIFICIAL (va_list_name) = 1; + TYPE_NAME (va_list_type) = va_list_name; + TYPE_STUB_DECL (va_list_type) = va_list_name; + + /* Create the fields. */ + f_stack = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__stack"), + ptr_type_node); + f_grtop = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__gr_top"), + ptr_type_node); + f_vrtop = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__vr_top"), + ptr_type_node); + f_groff = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__gr_offs"), + integer_type_node); + f_vroff = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__vr_offs"), + integer_type_node); + + DECL_ARTIFICIAL (f_stack) = 1; + DECL_ARTIFICIAL (f_grtop) = 1; + DECL_ARTIFICIAL (f_vrtop) = 1; + DECL_ARTIFICIAL (f_groff) = 1; + DECL_ARTIFICIAL (f_vroff) = 1; + + DECL_FIELD_CONTEXT (f_stack) = va_list_type; + DECL_FIELD_CONTEXT (f_grtop) = va_list_type; + DECL_FIELD_CONTEXT (f_vrtop) = va_list_type; + DECL_FIELD_CONTEXT (f_groff) = va_list_type; + DECL_FIELD_CONTEXT (f_vroff) = va_list_type; + + TYPE_FIELDS (va_list_type) = f_stack; + DECL_CHAIN (f_stack) = f_grtop; + DECL_CHAIN (f_grtop) = f_vrtop; + DECL_CHAIN (f_vrtop) = f_groff; + DECL_CHAIN (f_groff) = f_vroff; + + /* Compute its layout. */ + layout_type (va_list_type); + + return va_list_type; +} + +/* Implement TARGET_EXPAND_BUILTIN_VA_START. */ +static void +aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) +{ + const CUMULATIVE_ARGS *cum; + tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; + tree stack, grtop, vrtop, groff, vroff; + tree t; + int gr_save_area_size; + int vr_save_area_size; + int vr_offset; + + cum = &crtl->args.info; + gr_save_area_size + = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD; + vr_save_area_size + = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG; + + if (TARGET_GENERAL_REGS_ONLY) + { + if (cum->aapcs_nvrn > 0) + sorry ("%qs and floating point or vector arguments", + "-mgeneral-regs-only"); + vr_save_area_size = 0; + } + + f_stack = TYPE_FIELDS (va_list_type_node); + f_grtop = DECL_CHAIN (f_stack); + f_vrtop = DECL_CHAIN (f_grtop); + f_groff = DECL_CHAIN (f_vrtop); + f_vroff = DECL_CHAIN (f_groff); + + stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack, + NULL_TREE); + grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop, + NULL_TREE); + vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop, + NULL_TREE); + groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff, + NULL_TREE); + vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff, + NULL_TREE); + + /* Emit code to initialize STACK, which points to the next varargs stack + argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used + by named arguments. STACK is 8-byte aligned. */ + t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx); + if (cum->aapcs_stack_size > 0) + t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD); + t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t); + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* Emit code to initialize GRTOP, the top of the GR save area. + virtual_incoming_args_rtx should have been 16 byte aligned. */ + t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx); + t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t); + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* Emit code to initialize VRTOP, the top of the VR save area. + This address is gr_save_area_bytes below GRTOP, rounded + down to the next 16-byte boundary. */ + t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx); + vr_offset = AARCH64_ROUND_UP (gr_save_area_size, + STACK_BOUNDARY / BITS_PER_UNIT); + + if (vr_offset) + t = fold_build_pointer_plus_hwi (t, -vr_offset); + t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t); + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* Emit code to initialize GROFF, the offset from GRTOP of the + next GPR argument. */ + t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff, + build_int_cst (TREE_TYPE (groff), -gr_save_area_size)); + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + /* Likewise emit code to initialize VROFF, the offset from FTOP + of the next VR argument. */ + t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff, + build_int_cst (TREE_TYPE (vroff), -vr_save_area_size)); + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); +} + +/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ + +static tree +aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p ATTRIBUTE_UNUSED) +{ + tree addr; + bool indirect_p; + bool is_ha; /* is HFA or HVA. */ + bool dw_align; /* double-word align. */ + enum machine_mode ag_mode = VOIDmode; + int nregs; + enum machine_mode mode; + + tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; + tree stack, f_top, f_off, off, arg, roundup, on_stack; + HOST_WIDE_INT size, rsize, adjust, align; + tree t, u, cond1, cond2; + + indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect_p) + type = build_pointer_type (type); + + mode = TYPE_MODE (type); + + f_stack = TYPE_FIELDS (va_list_type_node); + f_grtop = DECL_CHAIN (f_stack); + f_vrtop = DECL_CHAIN (f_grtop); + f_groff = DECL_CHAIN (f_vrtop); + f_vroff = DECL_CHAIN (f_groff); + + stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist), + f_stack, NULL_TREE); + size = int_size_in_bytes (type); + align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT; + + dw_align = false; + adjust = 0; + if (aarch64_vfp_is_call_or_return_candidate (mode, + type, + &ag_mode, + &nregs, + &is_ha)) + { + /* TYPE passed in fp/simd registers. */ + if (TARGET_GENERAL_REGS_ONLY) + sorry ("%qs and floating point or vector arguments", + "-mgeneral-regs-only"); + + f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), + unshare_expr (valist), f_vrtop, NULL_TREE); + f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), + unshare_expr (valist), f_vroff, NULL_TREE); + + rsize = nregs * UNITS_PER_VREG; + + if (is_ha) + { + if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG) + adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode); + } + else if (BLOCK_REG_PADDING (mode, type, 1) == downward + && size < UNITS_PER_VREG) + { + adjust = UNITS_PER_VREG - size; + } + } + else + { + /* TYPE passed in general registers. */ + f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), + unshare_expr (valist), f_grtop, NULL_TREE); + f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff), + unshare_expr (valist), f_groff, NULL_TREE); + rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; + nregs = rsize / UNITS_PER_WORD; + + if (align > 8) + dw_align = true; + + if (BLOCK_REG_PADDING (mode, type, 1) == downward + && size < UNITS_PER_WORD) + { + adjust = UNITS_PER_WORD - size; + } + } + + /* Get a local temporary for the field value. */ + off = get_initialized_tmp_var (f_off, pre_p, NULL); + + /* Emit code to branch if off >= 0. */ + t = build2 (GE_EXPR, boolean_type_node, off, + build_int_cst (TREE_TYPE (off), 0)); + cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE); + + if (dw_align) + { + /* Emit: offs = (offs + 15) & -16. */ + t = build2 (PLUS_EXPR, TREE_TYPE (off), off, + build_int_cst (TREE_TYPE (off), 15)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, + build_int_cst (TREE_TYPE (off), -16)); + roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); + } + else + roundup = NULL; + + /* Update ap.__[g|v]r_offs */ + t = build2 (PLUS_EXPR, TREE_TYPE (off), off, + build_int_cst (TREE_TYPE (off), rsize)); + t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); + + /* String up. */ + if (roundup) + t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); + + /* [cond2] if (ap.__[g|v]r_offs > 0) */ + u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), + build_int_cst (TREE_TYPE (f_off), 0)); + cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); + + /* String up: make sure the assignment happens before the use. */ + t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2); + COND_EXPR_ELSE (cond1) = t; + + /* Prepare the trees handling the argument that is passed on the stack; + the top level node will store in ON_STACK. */ + arg = get_initialized_tmp_var (stack, pre_p, NULL); + if (align > 8) + { + /* if (alignof(type) > 8) (arg = arg + 15) & -16; */ + t = fold_convert (intDI_type_node, arg); + t = build2 (PLUS_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), 15)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), -16)); + t = fold_convert (TREE_TYPE (arg), t); + roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t); + } + else + roundup = NULL; + /* Advance ap.__stack */ + t = fold_convert (intDI_type_node, arg); + t = build2 (PLUS_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), size + 7)); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), -8)); + t = fold_convert (TREE_TYPE (arg), t); + t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t); + /* String up roundup and advance. */ + if (roundup) + t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); + /* String up with arg */ + on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg); + /* Big-endianness related address adjustment. */ + if (BLOCK_REG_PADDING (mode, type, 1) == downward + && size < UNITS_PER_WORD) + { + t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg, + size_int (UNITS_PER_WORD - size)); + on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t); + } + + COND_EXPR_THEN (cond1) = unshare_expr (on_stack); + COND_EXPR_THEN (cond2) = unshare_expr (on_stack); + + /* Adjustment to OFFSET in the case of BIG_ENDIAN. */ + t = off; + if (adjust) + t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off, + build_int_cst (TREE_TYPE (off), adjust)); + + t = fold_convert (sizetype, t); + t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t); + + if (is_ha) + { + /* type ha; // treat as "struct {ftype field[n];}" + ... [computing offs] + for (i = 0; i 0) + sorry ("%qs and floating point or vector arguments", + "-mgeneral-regs-only"); + vr_saved = 0; + } + + if (!no_rtl) + { + if (gr_saved > 0) + { + rtx ptr, mem; + + /* virtual_incoming_args_rtx should have been 16-byte aligned. */ + ptr = plus_constant (Pmode, virtual_incoming_args_rtx, + - gr_saved * UNITS_PER_WORD); + mem = gen_frame_mem (BLKmode, ptr); + set_mem_alias_set (mem, get_varargs_alias_set ()); + + move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM, + mem, gr_saved); + } + if (vr_saved > 0) + { + /* We can't use move_block_from_reg, because it will use + the wrong mode, storing D regs only. */ + enum machine_mode mode = TImode; + int off, i; + + /* Set OFF to the offset from virtual_incoming_args_rtx of + the first vector register. The VR save area lies below + the GR one, and is aligned to 16 bytes. */ + off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, + STACK_BOUNDARY / BITS_PER_UNIT); + off -= vr_saved * UNITS_PER_VREG; + + for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i) + { + rtx ptr, mem; + + ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off); + mem = gen_frame_mem (mode, ptr); + set_mem_alias_set (mem, get_varargs_alias_set ()); + aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i)); + off += UNITS_PER_VREG; + } + } + } + + /* We don't save the size into *PRETEND_SIZE because we want to avoid + any complication of having crtl->args.pretend_args_size changed. */ + cfun->machine->saved_varargs_size + = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, + STACK_BOUNDARY / BITS_PER_UNIT) + + vr_saved * UNITS_PER_VREG); +} + +static void +aarch64_conditional_register_usage (void) +{ + int i; + if (!TARGET_FLOAT) + { + for (i = V0_REGNUM; i <= V31_REGNUM; i++) + { + fixed_regs[i] = 1; + call_used_regs[i] = 1; + } + } +} + +/* Walk down the type tree of TYPE counting consecutive base elements. + If *MODEP is VOIDmode, then set it to the first valid floating point + type. If a non-floating point type is found, or if a floating point + type that doesn't match a non-VOIDmode *MODEP is found, then return -1, + otherwise return the count in the sub-tree. */ +static int +aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) +{ + enum machine_mode mode; + HOST_WIDE_INT size; + + switch (TREE_CODE (type)) + { + case REAL_TYPE: + mode = TYPE_MODE (type); + if (mode != DFmode && mode != SFmode && mode != TFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 1; + + break; + + case COMPLEX_TYPE: + mode = TYPE_MODE (TREE_TYPE (type)); + if (mode != DFmode && mode != SFmode && mode != TFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 2; + + break; + + case VECTOR_TYPE: + /* Use V2SImode and V4SImode as representatives of all 64-bit + and 128-bit vector types. */ + size = int_size_in_bytes (type); + switch (size) + { + case 8: + mode = V2SImode; + break; + case 16: + mode = V4SImode; + break; + default: + return -1; + } + + if (*modep == VOIDmode) + *modep = mode; + + /* Vector modes are considered to be opaque: two vectors are + equivalent for the purposes of being homogeneous aggregates + if they are the same size. */ + if (*modep == mode) + return 1; + + break; + + case ARRAY_TYPE: + { + int count; + tree index = TYPE_DOMAIN (type); + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P (type)) + return -1; + + count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); + if (count == -1 + || !index + || !TYPE_MAX_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) + || !TYPE_MIN_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) + || count < 0) + return -1; + + count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) + - tree_to_uhwi (TYPE_MIN_VALUE (index))); + + /* There must be no padding. */ + if (!tree_fits_uhwi_p (TYPE_SIZE (type)) + || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case RECORD_TYPE: + { + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P (type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count += sub_count; + } + + /* There must be no padding. */ + if (!tree_fits_uhwi_p (TYPE_SIZE (type)) + || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + /* These aren't very interesting except in a degenerate case. */ + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P (type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count = count > sub_count ? count : sub_count; + } + + /* There must be no padding. */ + if (!tree_fits_uhwi_p (TYPE_SIZE (type)) + || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + default: + break; + } + + return -1; +} + +/* Return true if we use LRA instead of reload pass. */ +static bool +aarch64_lra_p (void) +{ + return aarch64_lra_flag; +} + +/* Return TRUE if the type, as described by TYPE and MODE, is a composite + type as described in AAPCS64 \S 4.3. This includes aggregate, union and + array types. The C99 floating-point complex types are also considered + as composite types, according to AAPCS64 \S 7.1.1. The complex integer + types, which are GCC extensions and out of the scope of AAPCS64, are + treated as composite types here as well. + + Note that MODE itself is not sufficient in determining whether a type + is such a composite type or not. This is because + stor-layout.c:compute_record_mode may have already changed the MODE + (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a + structure with only one field may have its MODE set to the mode of the + field. Also an integer mode whose size matches the size of the + RECORD_TYPE type may be used to substitute the original mode + (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be + solely relied on. */ + +static bool +aarch64_composite_type_p (const_tree type, + enum machine_mode mode) +{ + if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)) + return true; + + if (mode == BLKmode + || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT + || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) + return true; + + return false; +} + +/* Return TRUE if the type, as described by TYPE and MODE, is a short vector + type as described in AAPCS64 \S 4.1.2. + + See the comment above aarch64_composite_type_p for the notes on MODE. */ + +static bool +aarch64_short_vector_p (const_tree type, + enum machine_mode mode) +{ + HOST_WIDE_INT size = -1; + + if (type && TREE_CODE (type) == VECTOR_TYPE) + size = int_size_in_bytes (type); + else if (!aarch64_composite_type_p (type, mode) + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)) + size = GET_MODE_SIZE (mode); + + return (size == 8 || size == 16) ? true : false; +} + +/* Return TRUE if an argument, whose type is described by TYPE and MODE, + shall be passed or returned in simd/fp register(s) (providing these + parameter passing registers are available). + + Upon successful return, *COUNT returns the number of needed registers, + *BASE_MODE returns the mode of the individual register and when IS_HAF + is not NULL, *IS_HA indicates whether or not the argument is a homogeneous + floating-point aggregate or a homogeneous short-vector aggregate. */ + +static bool +aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode, + const_tree type, + enum machine_mode *base_mode, + int *count, + bool *is_ha) +{ + enum machine_mode new_mode = VOIDmode; + bool composite_p = aarch64_composite_type_p (type, mode); + + if (is_ha != NULL) *is_ha = false; + + if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) + || aarch64_short_vector_p (type, mode)) + { + *count = 1; + new_mode = mode; + } + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + { + if (is_ha != NULL) *is_ha = true; + *count = 2; + new_mode = GET_MODE_INNER (mode); + } + else if (type && composite_p) + { + int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); + + if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS) + { + if (is_ha != NULL) *is_ha = true; + *count = ag_count; + } + else + return false; + } + else + return false; + + *base_mode = new_mode; + return true; +} + +/* Implement TARGET_STRUCT_VALUE_RTX. */ + +static rtx +aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM); +} + +/* Implements target hook vector_mode_supported_p. */ +static bool +aarch64_vector_mode_supported_p (enum machine_mode mode) +{ + if (TARGET_SIMD + && (mode == V4SImode || mode == V8HImode + || mode == V16QImode || mode == V2DImode + || mode == V2SImode || mode == V4HImode + || mode == V8QImode || mode == V2SFmode + || mode == V4SFmode || mode == V2DFmode)) + return true; + + return false; +} + +/* Return appropriate SIMD container + for MODE within a vector of WIDTH bits. */ +static enum machine_mode +aarch64_simd_container_mode (enum machine_mode mode, unsigned width) +{ + gcc_assert (width == 64 || width == 128); + if (TARGET_SIMD) + { + if (width == 128) + switch (mode) + { + case DFmode: + return V2DFmode; + case SFmode: + return V4SFmode; + case SImode: + return V4SImode; + case HImode: + return V8HImode; + case QImode: + return V16QImode; + case DImode: + return V2DImode; + default: + break; + } + else + switch (mode) + { + case SFmode: + return V2SFmode; + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + default: + break; + } + } + return word_mode; +} + +/* Return 128-bit container as the preferred SIMD mode for MODE. */ +static enum machine_mode +aarch64_preferred_simd_mode (enum machine_mode mode) +{ + return aarch64_simd_container_mode (mode, 128); +} + +/* Return the bitmask of possible vector sizes for the vectorizer + to iterate over. */ +static unsigned int +aarch64_autovectorize_vector_sizes (void) +{ + return (16 | 8); +} + +/* A table to help perform AArch64-specific name mangling for AdvSIMD + vector types in order to conform to the AAPCS64 (see "Procedure + Call Standard for the ARM 64-bit Architecture", Appendix A). To + qualify for emission with the mangled names defined in that document, + a vector type must not only be of the correct mode but also be + composed of AdvSIMD vector element types (e.g. + _builtin_aarch64_simd_qi); these types are registered by + aarch64_init_simd_builtins (). In other words, vector types defined + in other ways e.g. via vector_size attribute will get default + mangled names. */ +typedef struct +{ + enum machine_mode mode; + const char *element_type_name; + const char *mangled_name; +} aarch64_simd_mangle_map_entry; + +static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = { + /* 64-bit containerized types. */ + { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" }, + { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" }, + { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" }, + { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" }, + { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" }, + { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" }, + { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" }, + { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" }, + { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" }, + /* 128-bit containerized types. */ + { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" }, + { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" }, + { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" }, + { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" }, + { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" }, + { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" }, + { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" }, + { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" }, + { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" }, + { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" }, + { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" }, + { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" }, + { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" }, + { VOIDmode, NULL, NULL } +}; + +/* Implement TARGET_MANGLE_TYPE. */ + +static const char * +aarch64_mangle_type (const_tree type) +{ + /* The AArch64 ABI documents say that "__va_list" has to be + managled as if it is in the "std" namespace. */ + if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) + return "St9__va_list"; + + /* Check the mode of the vector type, and the name of the vector + element type, against the table. */ + if (TREE_CODE (type) == VECTOR_TYPE) + { + aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map; + + while (pos->mode != VOIDmode) + { + tree elt_type = TREE_TYPE (type); + + if (pos->mode == TYPE_MODE (type) + && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL + && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))), + pos->element_type_name)) + return pos->mangled_name; + + pos++; + } + } + + /* Use the default mangling. */ + return NULL; +} + +/* Return the equivalent letter for size. */ +static char +sizetochar (int size) +{ + switch (size) + { + case 64: return 'd'; + case 32: return 's'; + case 16: return 'h'; + case 8 : return 'b'; + default: gcc_unreachable (); + } +} + +/* Return true iff x is a uniform vector of floating-point + constants, and the constant can be represented in + quarter-precision form. Note, as aarch64_float_const_representable + rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */ +static bool +aarch64_vect_float_const_representable_p (rtx x) +{ + int i = 0; + REAL_VALUE_TYPE r0, ri; + rtx x0, xi; + + if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT) + return false; + + x0 = CONST_VECTOR_ELT (x, 0); + if (!CONST_DOUBLE_P (x0)) + return false; + + REAL_VALUE_FROM_CONST_DOUBLE (r0, x0); + + for (i = 1; i < CONST_VECTOR_NUNITS (x); i++) + { + xi = CONST_VECTOR_ELT (x, i); + if (!CONST_DOUBLE_P (xi)) + return false; + + REAL_VALUE_FROM_CONST_DOUBLE (ri, xi); + if (!REAL_VALUES_EQUAL (r0, ri)) + return false; + } + + return aarch64_float_const_representable_p (x0); +} + +/* Return true for valid and false for invalid. */ +bool +aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse, + struct simd_immediate_info *info) +{ +#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ + matches = 1; \ + for (i = 0; i < idx; i += (STRIDE)) \ + if (!(TEST)) \ + matches = 0; \ + if (matches) \ + { \ + immtype = (CLASS); \ + elsize = (ELSIZE); \ + eshift = (SHIFT); \ + emvn = (NEG); \ + break; \ + } + + unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); + unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); + unsigned char bytes[16]; + int immtype = -1, matches; + unsigned int invmask = inverse ? 0xff : 0; + int eshift, emvn; + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + if (! (aarch64_simd_imm_zero_p (op, mode) + || aarch64_vect_float_const_representable_p (op))) + return false; + + if (info) + { + info->value = CONST_VECTOR_ELT (op, 0); + info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value)); + info->mvn = false; + info->shift = 0; + } + + return true; + } + + /* Splat vector constant out into a byte vector. */ + for (i = 0; i < n_elts; i++) + { + rtx el = CONST_VECTOR_ELT (op, i); + unsigned HOST_WIDE_INT elpart; + unsigned int part, parts; + + if (GET_CODE (el) == CONST_INT) + { + elpart = INTVAL (el); + parts = 1; + } + else if (GET_CODE (el) == CONST_DOUBLE) + { + elpart = CONST_DOUBLE_LOW (el); + parts = 2; + } + else + gcc_unreachable (); + + for (part = 0; part < parts; part++) + { + unsigned int byte; + for (byte = 0; byte < innersize; byte++) + { + bytes[idx++] = (elpart & 0xff) ^ invmask; + elpart >>= BITS_PER_UNIT; + } + if (GET_CODE (el) == CONST_DOUBLE) + elpart = CONST_DOUBLE_HIGH (el); + } + } + + /* Sanity check. */ + gcc_assert (idx == GET_MODE_SIZE (mode)); + + do + { + CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); + + CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); + + CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); + + CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); + + CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); + + CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); + + CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); + + CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); + + CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); + + CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); + + CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); + + CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); + + CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); + + CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); + + CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); + + CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); + + CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); + + CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) + && bytes[i] == bytes[(i + 8) % idx], 0, 0); + } + while (0); + + if (immtype == -1) + return false; + + if (info) + { + info->element_width = elsize; + info->mvn = emvn != 0; + info->shift = eshift; + + unsigned HOST_WIDE_INT imm = 0; + + if (immtype >= 12 && immtype <= 15) + info->msl = true; + + /* Un-invert bytes of recognized vector, if necessary. */ + if (invmask != 0) + for (i = 0; i < idx; i++) + bytes[i] ^= invmask; + + if (immtype == 17) + { + /* FIXME: Broken on 32-bit H_W_I hosts. */ + gcc_assert (sizeof (HOST_WIDE_INT) == 8); + + for (i = 0; i < 8; i++) + imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) + << (i * BITS_PER_UNIT); + + + info->value = GEN_INT (imm); + } + else + { + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + + /* Construct 'abcdefgh' because the assembler cannot handle + generic constants. */ + if (info->mvn) + imm = ~imm; + imm = (imm >> info->shift) & 0xff; + info->value = GEN_INT (imm); + } + } + + return true; +#undef CHECK +} + +static bool +aarch64_const_vec_all_same_int_p (rtx x, + HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) +{ + HOST_WIDE_INT firstval; + int count, i; + + if (GET_CODE (x) != CONST_VECTOR + || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) + return false; + + firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); + if (firstval < minval || firstval > maxval) + return false; + + count = CONST_VECTOR_NUNITS (x); + for (i = 1; i < count; i++) + if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) + return false; + + return true; +} + +/* Check of immediate shift constants are within range. */ +bool +aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left) +{ + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + if (left) + return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1); + else + return aarch64_const_vec_all_same_int_p (x, 1, bit_width); +} + +/* Return true if X is a uniform vector where all elements + are either the floating-point constant 0.0 or the + integer constant 0. */ +bool +aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode) +{ + return x == CONST0_RTX (mode); +} + +bool +aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT imm = INTVAL (x); + int i; + + for (i = 0; i < 8; i++) + { + unsigned int byte = imm & 0xff; + if (byte != 0xff && byte != 0) + return false; + imm >>= 8; + } + + return true; +} + +bool +aarch64_mov_operand_p (rtx x, + enum aarch64_symbol_context context, + enum machine_mode mode) +{ + if (GET_CODE (x) == HIGH + && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) + return true; + + if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode)) + return true; + + if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) + return true; + + return aarch64_classify_symbolic_expression (x, context) + == SYMBOL_TINY_ABSOLUTE; +} + +/* Return a const_int vector of VAL. */ +rtx +aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val) +{ + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + int i; + + for (i=0; i < nunits; i++) + RTVEC_ELT (v, i) = GEN_INT (val); + + return gen_rtx_CONST_VECTOR (mode, v); +} + +/* Check OP is a legal scalar immediate for the MOVI instruction. */ + +bool +aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_preferred_simd_mode (mode); + rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op)); + return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); +} + +/* Construct and return a PARALLEL RTX vector. */ +rtx +aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high) +{ + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits / 2); + int base = high ? nunits / 2 : 0; + rtx t1; + int i; + + for (i=0; i < nunits / 2; i++) + RTVEC_ELT (v, i) = GEN_INT (base + i); + + t1 = gen_rtx_PARALLEL (mode, v); + return t1; +} + +/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and + HIGH (exclusive). */ +void +aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +{ + HOST_WIDE_INT lane; + gcc_assert (GET_CODE (operand) == CONST_INT); + lane = INTVAL (operand); + + if (lane < low || lane >= high) + error ("lane out of range"); +} + +void +aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +{ + gcc_assert (GET_CODE (operand) == CONST_INT); + HOST_WIDE_INT lane = INTVAL (operand); + + if (lane < low || lane >= high) + error ("constant out of range"); +} + +/* Emit code to reinterpret one AdvSIMD type as another, + without altering bits. */ +void +aarch64_simd_reinterpret (rtx dest, rtx src) +{ + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); +} + +/* Emit code to place a AdvSIMD pair result in memory locations (with equal + registers). */ +void +aarch64_simd_emit_pair_result_insn (enum machine_mode mode, + rtx (*intfn) (rtx, rtx, rtx), rtx destaddr, + rtx op1) +{ + rtx mem = gen_rtx_MEM (mode, destaddr); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + + emit_insn (intfn (tmp1, op1, tmp2)); + + emit_move_insn (mem, tmp1); + mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); + emit_move_insn (mem, tmp2); +} + +/* Return TRUE if OP is a valid vector addressing mode. */ +bool +aarch64_simd_mem_operand_p (rtx op) +{ + return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC + || GET_CODE (XEXP (op, 0)) == REG); +} + +/* Set up OPERANDS for a register copy from SRC to DEST, taking care + not to early-clobber SRC registers in the process. + + We assume that the operands described by SRC and DEST represent a + decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the + number of components into which the copy has been decomposed. */ +void +aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest, + rtx *src, unsigned int count) +{ + unsigned int i; + + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) < REGNO (operands[1])) + { + for (i = 0; i < count; i++) + { + operands[2 * i] = dest[i]; + operands[2 * i + 1] = src[i]; + } + } + else + { + for (i = 0; i < count; i++) + { + operands[2 * i] = dest[count - i - 1]; + operands[2 * i + 1] = src[count - i - 1]; + } + } +} + +/* Compute and return the length of aarch64_simd_mov, where is + one of VSTRUCT modes: OI, CI or XI. */ +int +aarch64_simd_attr_length_move (rtx insn) +{ + enum machine_mode mode; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) + { + mode = GET_MODE (recog_data.operand[0]); + switch (mode) + { + case OImode: + return 8; + case CImode: + return 12; + case XImode: + return 16; + default: + gcc_unreachable (); + } + } + return 4; +} + +/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum + alignment of a vector to 128 bits. */ +static HOST_WIDE_INT +aarch64_simd_vector_alignment (const_tree type) +{ + HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type)); + return MIN (align, 128); +} + +/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */ +static bool +aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) +{ + if (is_packed) + return false; + + /* We guarantee alignment for vectors up to 128-bits. */ + if (tree_int_cst_compare (TYPE_SIZE (type), + bitsize_int (BIGGEST_ALIGNMENT)) > 0) + return false; + + /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */ + return true; +} + +/* If VALS is a vector constant that can be loaded into a register + using DUP, generate instructions to do so and return an RTX to + assign to the register. Otherwise return NULL_RTX. */ +static rtx +aarch64_simd_dup_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + bool all_same = true; + rtx x; + int i; + + if (GET_CODE (vals) != CONST_VECTOR) + return NULL_RTX; + + for (i = 1; i < n_elts; ++i) + { + x = CONST_VECTOR_ELT (vals, i); + if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0))) + all_same = false; + } + + if (!all_same) + return NULL_RTX; + + /* We can load this constant by using DUP and a constant in a + single ARM register. This will be cheaper than a vector + load. */ + x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0)); + return gen_rtx_VEC_DUPLICATE (mode, x); +} + + +/* Generate code to load VALS, which is a PARALLEL containing only + constants (for vec_init) or CONST_VECTOR, efficiently into a + register. Returns an RTX to copy into the register, or NULL_RTX + for a PARALLEL that can not be converted into a CONST_VECTOR. */ +static rtx +aarch64_simd_make_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + rtx const_dup; + rtx const_vec = NULL_RTX; + int n_elts = GET_MODE_NUNITS (mode); + int n_const = 0; + int i; + + if (GET_CODE (vals) == CONST_VECTOR) + const_vec = vals; + else if (GET_CODE (vals) == PARALLEL) + { + /* A CONST_VECTOR must contain only CONST_INTs and + CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). + Only store valid constants in a CONST_VECTOR. */ + for (i = 0; i < n_elts; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) + n_const++; + } + if (n_const == n_elts) + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); + } + else + gcc_unreachable (); + + if (const_vec != NULL_RTX + && aarch64_simd_valid_immediate (const_vec, mode, false, NULL)) + /* Load using MOVI/MVNI. */ + return const_vec; + else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) + /* Loaded using DUP. */ + return const_dup; + else if (const_vec != NULL_RTX) + /* Load from constant pool. We can not take advantage of single-cycle + LD1 because we need a PC-relative addressing mode. */ + return const_vec; + else + /* A PARALLEL containing something not valid inside CONST_VECTOR. + We can not construct an initializer. */ + return NULL_RTX; +} + +void +aarch64_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true; + rtx x, mem; + int i; + + x = XVECEXP (vals, 0, 0); + if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) + n_var = 1, one_var = 0; + + for (i = 1; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) + ++n_var, one_var = i; + + if (!rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (n_var == 0) + { + rtx constant = aarch64_simd_make_constant (vals); + if (constant != NULL_RTX) + { + emit_move_insn (target, constant); + return; + } + } + + /* Splat a single non-constant element if we can. */ + if (all_same) + { + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); + return; + } + + /* One field is non-constant. Load constant then overwrite varying + field. This is more efficient than using the stack. */ + if (n_var == 1) + { + rtx copy = copy_rtx (vals); + rtx index = GEN_INT (one_var); + enum insn_code icode; + + /* Load constant part of vector, substitute neighboring value for + varying element. */ + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1); + aarch64_expand_vector_init (target, copy); + + /* Insert variable. */ + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); + icode = optab_handler (vec_set_optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + emit_insn (GEN_FCN (icode) (target, x, index)); + return; + } + + /* Construct the vector in memory one field at a time + and load the whole vector. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner_mode, + i * GET_MODE_SIZE (inner_mode)), + XVECEXP (vals, 0, i)); + emit_move_insn (target, mem); + +} + +static unsigned HOST_WIDE_INT +aarch64_shift_truncation_mask (enum machine_mode mode) +{ + return + (aarch64_vector_mode_supported_p (mode) + || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1); +} + +#ifndef TLS_SECTION_ASM_FLAG +#define TLS_SECTION_ASM_FLAG 'T' +#endif + +void +aarch64_elf_asm_named_section (const char *name, unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + char flagchars[10], *f = flagchars; + + /* If we have already declared this section, we can use an + abbreviated form to switch back to it -- unless this section is + part of a COMDAT groups, in which case GAS requires the full + declaration every time. */ + if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) + && (flags & SECTION_DECLARED)) + { + fprintf (asm_out_file, "\t.section\t%s\n", name); + return; + } + + if (!(flags & SECTION_DEBUG)) + *f++ = 'a'; + if (flags & SECTION_WRITE) + *f++ = 'w'; + if (flags & SECTION_CODE) + *f++ = 'x'; + if (flags & SECTION_SMALL) + *f++ = 's'; + if (flags & SECTION_MERGE) + *f++ = 'M'; + if (flags & SECTION_STRINGS) + *f++ = 'S'; + if (flags & SECTION_TLS) + *f++ = TLS_SECTION_ASM_FLAG; + if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) + *f++ = 'G'; + *f = '\0'; + + fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars); + + if (!(flags & SECTION_NOTYPE)) + { + const char *type; + const char *format; + + if (flags & SECTION_BSS) + type = "nobits"; + else + type = "progbits"; + +#ifdef TYPE_OPERAND_FMT + format = "," TYPE_OPERAND_FMT; +#else + format = ",@%s"; +#endif + + fprintf (asm_out_file, format, type); + + if (flags & SECTION_ENTSIZE) + fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE); + if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) + { + if (TREE_CODE (decl) == IDENTIFIER_NODE) + fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl)); + else + fprintf (asm_out_file, ",%s,comdat", + IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl))); + } + } + + putc ('\n', asm_out_file); +} + +/* Select a format to encode pointers in exception handling data. */ +int +aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) +{ + int type; + switch (aarch64_cmodel) + { + case AARCH64_CMODEL_TINY: + case AARCH64_CMODEL_TINY_PIC: + case AARCH64_CMODEL_SMALL: + case AARCH64_CMODEL_SMALL_PIC: + /* text+got+data < 4Gb. 4-byte signed relocs are sufficient + for everything. */ + type = DW_EH_PE_sdata4; + break; + default: + /* No assumptions here. 8-byte relocs required. */ + type = DW_EH_PE_sdata8; + break; + } + return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; +} + +/* Emit load exclusive. */ + +static void +aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval, + rtx mem, rtx model_rtx) +{ + rtx (*gen) (rtx, rtx, rtx); + + switch (mode) + { + case QImode: gen = gen_aarch64_load_exclusiveqi; break; + case HImode: gen = gen_aarch64_load_exclusivehi; break; + case SImode: gen = gen_aarch64_load_exclusivesi; break; + case DImode: gen = gen_aarch64_load_exclusivedi; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (rval, mem, model_rtx)); +} + +/* Emit store exclusive. */ + +static void +aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval, + rtx rval, rtx mem, rtx model_rtx) +{ + rtx (*gen) (rtx, rtx, rtx, rtx); + + switch (mode) + { + case QImode: gen = gen_aarch64_store_exclusiveqi; break; + case HImode: gen = gen_aarch64_store_exclusivehi; break; + case SImode: gen = gen_aarch64_store_exclusivesi; break; + case DImode: gen = gen_aarch64_store_exclusivedi; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (bval, rval, mem, model_rtx)); +} + +/* Mark the previous jump instruction as unlikely. */ + +static void +aarch64_emit_unlikely_jump (rtx insn) +{ + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + + insn = emit_jump_insn (insn); + add_int_reg_note (insn, REG_BR_PROB, very_unlikely); +} + +/* Expand a compare and swap pattern. */ + +void +aarch64_expand_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; + enum machine_mode mode, cmp_mode; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + cmp_mode = mode; + + /* Normally the succ memory model must be stronger than fail, but in the + unlikely event of fail being ACQUIRE and succ being RELEASE we need to + promote succ to ACQ_REL so that we don't lose the acquire semantics. */ + + if (INTVAL (mod_f) == MEMMODEL_ACQUIRE + && INTVAL (mod_s) == MEMMODEL_RELEASE) + mod_s = GEN_INT (MEMMODEL_ACQ_REL); + + switch (mode) + { + case QImode: + case HImode: + /* For short modes, we're going to perform the comparison in SImode, + so do the zero-extension now. */ + cmp_mode = SImode; + rval = gen_reg_rtx (SImode); + oldval = convert_modes (SImode, mode, oldval, true); + /* Fall through. */ + + case SImode: + case DImode: + /* Force the value into a register if needed. */ + if (!aarch64_plus_operand (oldval, mode)) + oldval = force_reg (cmp_mode, oldval); + break; + + default: + gcc_unreachable (); + } + + switch (mode) + { + case QImode: gen = gen_atomic_compare_and_swapqi_1; break; + case HImode: gen = gen_atomic_compare_and_swaphi_1; break; + case SImode: gen = gen_atomic_compare_and_swapsi_1; break; + case DImode: gen = gen_atomic_compare_and_swapdi_1; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); + + if (mode == QImode || mode == HImode) + emit_move_insn (operands[1], gen_lowpart (mode, rval)); + + x = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, bval, x)); +} + +/* Split a compare and swap pattern. */ + +void +aarch64_split_compare_and_swap (rtx operands[]) +{ + rtx rval, mem, oldval, newval, scratch; + enum machine_mode mode; + bool is_weak; + rtx label1, label2, x, cond; + + rval = operands[0]; + mem = operands[1]; + oldval = operands[2]; + newval = operands[3]; + is_weak = (operands[4] != const0_rtx); + scratch = operands[7]; + mode = GET_MODE (mem); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_label_rtx (); + emit_label (label1); + } + label2 = gen_label_rtx (); + + aarch64_emit_load_exclusive (mode, rval, mem, operands[5]); + + cond = aarch64_gen_compare_reg (NE, rval, oldval); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + + aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]); + + if (!is_weak) + { + x = gen_rtx_NE (VOIDmode, scratch, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + } + else + { + cond = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + } + + emit_label (label2); +} + +/* Split an atomic operation. */ + +void +aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, + rtx value, rtx model_rtx, rtx cond) +{ + enum machine_mode mode = GET_MODE (mem); + enum machine_mode wmode = (mode == DImode ? DImode : SImode); + rtx label, x; + + label = gen_label_rtx (); + emit_label (label); + + if (new_out) + new_out = gen_lowpart (wmode, new_out); + if (old_out) + old_out = gen_lowpart (wmode, old_out); + else + old_out = new_out; + value = simplify_gen_subreg (wmode, value, mode, 0); + + aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx); + + switch (code) + { + case SET: + new_out = value; + break; + + case NOT: + x = gen_rtx_AND (wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + x = gen_rtx_NOT (wmode, new_out); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + + case MINUS: + if (CONST_INT_P (value)) + { + value = GEN_INT (-INTVAL (value)); + code = PLUS; + } + /* Fall through. */ + + default: + x = gen_rtx_fmt_ee (code, wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + } + + aarch64_emit_store_exclusive (mode, cond, mem, + gen_lowpart (mode, new_out), model_rtx); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label), pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); +} + +static void +aarch64_print_extension (void) +{ + const struct aarch64_option_extension *opt = NULL; + + for (opt = all_extensions; opt->name != NULL; opt++) + if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on) + asm_fprintf (asm_out_file, "+%s", opt->name); + + asm_fprintf (asm_out_file, "\n"); +} + +static void +aarch64_start_file (void) +{ + if (selected_arch) + { + asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name); + aarch64_print_extension (); + } + else if (selected_cpu) + { + const char *truncated_name + = aarch64_rewrite_selected_cpu (selected_cpu->name); + asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name); + aarch64_print_extension (); + } + default_file_start(); +} + +/* Target hook for c_mode_for_suffix. */ +static enum machine_mode +aarch64_c_mode_for_suffix (char suffix) +{ + if (suffix == 'q') + return TFmode; + + return VOIDmode; +} + +/* We can only represent floating point constants which will fit in + "quarter-precision" values. These values are characterised by + a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given + by: + + (-1)^s * (n/16) * 2^r + + Where: + 's' is the sign bit. + 'n' is an integer in the range 16 <= n <= 31. + 'r' is an integer in the range -3 <= r <= 4. */ + +/* Return true iff X can be represented by a quarter-precision + floating point immediate operand X. Note, we cannot represent 0.0. */ +bool +aarch64_float_const_representable_p (rtx x) +{ + /* This represents our current view of how many bits + make up the mantissa. */ + int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; + int exponent; + unsigned HOST_WIDE_INT mantissa, mask; + HOST_WIDE_INT m1, m2; + REAL_VALUE_TYPE r, m; + + if (!CONST_DOUBLE_P (x)) + return false; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + + /* We cannot represent infinities, NaNs or +/-zero. We won't + know if we have +zero until we analyse the mantissa, but we + can reject the other invalid values. */ + if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) + || REAL_VALUE_MINUS_ZERO (r)) + return false; + + /* Extract exponent. */ + r = real_value_abs (&r); + exponent = REAL_EXP (&r); + + /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the + highest (sign) bit, with a fixed binary point at bit point_pos. + m1 holds the low part of the mantissa, m2 the high part. + WARNING: If we ever have a representation using more than 2 * H_W_I - 1 + bits for the mantissa, this can fail (low bits will be lost). */ + real_ldexp (&m, &r, point_pos - exponent); + REAL_VALUE_TO_INT (&m1, &m2, m); + + /* If the low part of the mantissa has bits set we cannot represent + the value. */ + if (m1 != 0) + return false; + /* We have rejected the lower HOST_WIDE_INT, so update our + understanding of how many bits lie in the mantissa and + look only at the high HOST_WIDE_INT. */ + mantissa = m2; + point_pos -= HOST_BITS_PER_WIDE_INT; + + /* We can only represent values with a mantissa of the form 1.xxxx. */ + mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; + if ((mantissa & mask) != 0) + return false; + + /* Having filtered unrepresentable values, we may now remove all + but the highest 5 bits. */ + mantissa >>= point_pos - 5; + + /* We cannot represent the value 0.0, so reject it. This is handled + elsewhere. */ + if (mantissa == 0) + return false; + + /* Then, as bit 4 is always set, we can mask it off, leaving + the mantissa in the range [0, 15]. */ + mantissa &= ~(1 << 4); + gcc_assert (mantissa <= 15); + + /* GCC internally does not use IEEE754-like encoding (where normalized + significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c). + Our mantissa values are shifted 4 places to the left relative to + normalized IEEE754 so we must modify the exponent returned by REAL_EXP + by 5 places to correct for GCC's representation. */ + exponent = 5 - exponent; + + return (exponent >= 0 && exponent <= 7); +} + +char* +aarch64_output_simd_mov_immediate (rtx const_vector, + enum machine_mode mode, + unsigned width) +{ + bool is_valid; + static char templ[40]; + const char *mnemonic; + const char *shift_op; + unsigned int lane_count = 0; + char element_char; + + struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false }; + + /* This will return true to show const_vector is legal for use as either + a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will + also update INFO to show how the immediate should be generated. */ + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info); + gcc_assert (is_valid); + + element_char = sizetochar (info.element_width); + lane_count = width / info.element_width; + + mode = GET_MODE_INNER (mode); + if (mode == SFmode || mode == DFmode) + { + gcc_assert (info.shift == 0 && ! info.mvn); + if (aarch64_float_const_zero_rtx_p (info.value)) + info.value = GEN_INT (0); + else + { +#define buf_size 20 + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, info.value); + char float_buf[buf_size] = {'\0'}; + real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode); +#undef buf_size + + if (lane_count == 1) + snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); + else + snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", + lane_count, element_char, float_buf); + return templ; + } + } + + mnemonic = info.mvn ? "mvni" : "movi"; + shift_op = info.msl ? "msl" : "lsl"; + + if (lane_count == 1) + snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, UINTVAL (info.value)); + else if (info.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX + ", %s %d", mnemonic, lane_count, element_char, + UINTVAL (info.value), shift_op, info.shift); + else + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX, + mnemonic, lane_count, element_char, UINTVAL (info.value)); + return templ; +} + +char* +aarch64_output_scalar_simd_mov_immediate (rtx immediate, + enum machine_mode mode) +{ + enum machine_mode vmode; + + gcc_assert (!VECTOR_MODE_P (mode)); + vmode = aarch64_simd_container_mode (mode, 64); + rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); + return aarch64_output_simd_mov_immediate (v_op, vmode, 64); +} + +/* Split operands into moves from op[1] + op[2] into op[0]. */ + +void +aarch64_split_combinev16qi (rtx operands[3]) +{ + unsigned int dest = REGNO (operands[0]); + unsigned int src1 = REGNO (operands[1]); + unsigned int src2 = REGNO (operands[2]); + enum machine_mode halfmode = GET_MODE (operands[1]); + unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode); + rtx destlo, desthi; + + gcc_assert (halfmode == V16QImode); + + if (src1 == dest && src2 == dest + halfregs) + { + /* No-op move. Can't split to nothing; emit something. */ + emit_note (NOTE_INSN_DELETED); + return; + } + + /* Preserve register attributes for variable tracking. */ + destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0); + desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs, + GET_MODE_SIZE (halfmode)); + + /* Special case of reversed high/low parts. */ + if (reg_overlap_mentioned_p (operands[2], destlo) + && reg_overlap_mentioned_p (operands[1], desthi)) + { + emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2])); + emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2])); + emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2])); + } + else if (!reg_overlap_mentioned_p (operands[2], destlo)) + { + /* Try to avoid unnecessary moves if part of the result + is in the right place already. */ + if (src1 != dest) + emit_move_insn (destlo, operands[1]); + if (src2 != dest + halfregs) + emit_move_insn (desthi, operands[2]); + } + else + { + if (src2 != dest + halfregs) + emit_move_insn (desthi, operands[2]); + if (src1 != dest) + emit_move_insn (destlo, operands[1]); + } +} + +/* vec_perm support. */ + +#define MAX_VECT_LEN 16 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + enum machine_mode vmode; + unsigned char nelt; + bool one_vector_p; + bool testing_p; +}; + +/* Generate a variable permutation. */ + +static void +aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) +{ + enum machine_mode vmode = GET_MODE (target); + bool one_vector_p = rtx_equal_p (op0, op1); + + gcc_checking_assert (vmode == V8QImode || vmode == V16QImode); + gcc_checking_assert (GET_MODE (op0) == vmode); + gcc_checking_assert (GET_MODE (op1) == vmode); + gcc_checking_assert (GET_MODE (sel) == vmode); + gcc_checking_assert (TARGET_SIMD); + + if (one_vector_p) + { + if (vmode == V8QImode) + { + /* Expand the argument to a V16QI mode by duplicating it. */ + rtx pair = gen_reg_rtx (V16QImode); + emit_insn (gen_aarch64_combinev8qi (pair, op0, op0)); + emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); + } + else + { + emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel)); + } + } + else + { + rtx pair; + + if (vmode == V8QImode) + { + pair = gen_reg_rtx (V16QImode); + emit_insn (gen_aarch64_combinev8qi (pair, op0, op1)); + emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); + } + else + { + pair = gen_reg_rtx (OImode); + emit_insn (gen_aarch64_combinev16qi (pair, op0, op1)); + emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel)); + } + } +} + +void +aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) +{ + enum machine_mode vmode = GET_MODE (target); + unsigned int i, nelt = GET_MODE_NUNITS (vmode); + bool one_vector_p = rtx_equal_p (op0, op1); + rtx rmask[MAX_VECT_LEN], mask; + + gcc_checking_assert (!BYTES_BIG_ENDIAN); + + /* The TBL instruction does not use a modulo index, so we must take care + of that ourselves. */ + mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1); + for (i = 0; i < nelt; ++i) + rmask[i] = mask; + mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask)); + sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN); + + aarch64_expand_vec_perm_1 (target, op0, op1, sel); +} + +/* Recognize patterns suitable for the TRN instructions. */ +static bool +aarch64_evpc_trn (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i += 2) + { + if (d->perm[i] != i + odd) + return false; + if (d->perm[i + 1] != ((i + nelt + odd) & mask)) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + out = d->target; + + if (odd) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_trn2v16qi; break; + case V8QImode: gen = gen_aarch64_trn2v8qi; break; + case V8HImode: gen = gen_aarch64_trn2v8hi; break; + case V4HImode: gen = gen_aarch64_trn2v4hi; break; + case V4SImode: gen = gen_aarch64_trn2v4si; break; + case V2SImode: gen = gen_aarch64_trn2v2si; break; + case V2DImode: gen = gen_aarch64_trn2v2di; break; + case V4SFmode: gen = gen_aarch64_trn2v4sf; break; + case V2SFmode: gen = gen_aarch64_trn2v2sf; break; + case V2DFmode: gen = gen_aarch64_trn2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_trn1v16qi; break; + case V8QImode: gen = gen_aarch64_trn1v8qi; break; + case V8HImode: gen = gen_aarch64_trn1v8hi; break; + case V4HImode: gen = gen_aarch64_trn1v4hi; break; + case V4SImode: gen = gen_aarch64_trn1v4si; break; + case V2SImode: gen = gen_aarch64_trn1v2si; break; + case V2DImode: gen = gen_aarch64_trn1v2di; break; + case V4SFmode: gen = gen_aarch64_trn1v4sf; break; + case V2SFmode: gen = gen_aarch64_trn1v2sf; break; + case V2DFmode: gen = gen_aarch64_trn1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +/* Recognize patterns suitable for the UZP instructions. */ +static bool +aarch64_evpc_uzp (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i++) + { + unsigned elt = (i * 2 + odd) & mask; + if (d->perm[i] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + out = d->target; + + if (odd) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_uzp2v16qi; break; + case V8QImode: gen = gen_aarch64_uzp2v8qi; break; + case V8HImode: gen = gen_aarch64_uzp2v8hi; break; + case V4HImode: gen = gen_aarch64_uzp2v4hi; break; + case V4SImode: gen = gen_aarch64_uzp2v4si; break; + case V2SImode: gen = gen_aarch64_uzp2v2si; break; + case V2DImode: gen = gen_aarch64_uzp2v2di; break; + case V4SFmode: gen = gen_aarch64_uzp2v4sf; break; + case V2SFmode: gen = gen_aarch64_uzp2v2sf; break; + case V2DFmode: gen = gen_aarch64_uzp2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_uzp1v16qi; break; + case V8QImode: gen = gen_aarch64_uzp1v8qi; break; + case V8HImode: gen = gen_aarch64_uzp1v8hi; break; + case V4HImode: gen = gen_aarch64_uzp1v4hi; break; + case V4SImode: gen = gen_aarch64_uzp1v4si; break; + case V2SImode: gen = gen_aarch64_uzp1v2si; break; + case V2DImode: gen = gen_aarch64_uzp1v2di; break; + case V4SFmode: gen = gen_aarch64_uzp1v4sf; break; + case V2SFmode: gen = gen_aarch64_uzp1v2sf; break; + case V2DFmode: gen = gen_aarch64_uzp1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +/* Recognize patterns suitable for the ZIP instructions. */ +static bool +aarch64_evpc_zip (struct expand_vec_perm_d *d) +{ + unsigned int i, high, mask, nelt = d->nelt; + rtx out, in0, in1, x; + rtx (*gen) (rtx, rtx, rtx); + enum machine_mode vmode = d->vmode; + + if (GET_MODE_UNIT_SIZE (vmode) > 8) + return false; + + /* Note that these are little-endian tests. + We correct for big-endian later. */ + high = nelt / 2; + if (d->perm[0] == high) + /* Do Nothing. */ + ; + else if (d->perm[0] == 0) + high = 0; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt / 2; i++) + { + unsigned elt = (i + high) & mask; + if (d->perm[i * 2] != elt) + return false; + elt = (elt + nelt) & mask; + if (d->perm[i * 2 + 1] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + high = !high; + } + out = d->target; + + if (high) + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_zip2v16qi; break; + case V8QImode: gen = gen_aarch64_zip2v8qi; break; + case V8HImode: gen = gen_aarch64_zip2v8hi; break; + case V4HImode: gen = gen_aarch64_zip2v4hi; break; + case V4SImode: gen = gen_aarch64_zip2v4si; break; + case V2SImode: gen = gen_aarch64_zip2v2si; break; + case V2DImode: gen = gen_aarch64_zip2v2di; break; + case V4SFmode: gen = gen_aarch64_zip2v4sf; break; + case V2SFmode: gen = gen_aarch64_zip2v2sf; break; + case V2DFmode: gen = gen_aarch64_zip2v2df; break; + default: + return false; + } + } + else + { + switch (vmode) + { + case V16QImode: gen = gen_aarch64_zip1v16qi; break; + case V8QImode: gen = gen_aarch64_zip1v8qi; break; + case V8HImode: gen = gen_aarch64_zip1v8hi; break; + case V4HImode: gen = gen_aarch64_zip1v4hi; break; + case V4SImode: gen = gen_aarch64_zip1v4si; break; + case V2SImode: gen = gen_aarch64_zip1v2si; break; + case V2DImode: gen = gen_aarch64_zip1v2di; break; + case V4SFmode: gen = gen_aarch64_zip1v4sf; break; + case V2SFmode: gen = gen_aarch64_zip1v2sf; break; + case V2DFmode: gen = gen_aarch64_zip1v2df; break; + default: + return false; + } + } + + emit_insn (gen (out, in0, in1)); + return true; +} + +static bool +aarch64_evpc_dup (struct expand_vec_perm_d *d) +{ + rtx (*gen) (rtx, rtx, rtx); + rtx out = d->target; + rtx in0; + enum machine_mode vmode = d->vmode; + unsigned int i, elt, nelt = d->nelt; + rtx lane; + + /* TODO: This may not be big-endian safe. */ + if (BYTES_BIG_ENDIAN) + return false; + + elt = d->perm[0]; + for (i = 1; i < nelt; i++) + { + if (elt != d->perm[i]) + return false; + } + + /* The generic preparation in aarch64_expand_vec_perm_const_1 + swaps the operand order and the permute indices if it finds + d->perm[0] to be in the second operand. Thus, we can always + use d->op0 and need not do any extra arithmetic to get the + correct lane number. */ + in0 = d->op0; + lane = GEN_INT (elt); + + switch (vmode) + { + case V16QImode: gen = gen_aarch64_dup_lanev16qi; break; + case V8QImode: gen = gen_aarch64_dup_lanev8qi; break; + case V8HImode: gen = gen_aarch64_dup_lanev8hi; break; + case V4HImode: gen = gen_aarch64_dup_lanev4hi; break; + case V4SImode: gen = gen_aarch64_dup_lanev4si; break; + case V2SImode: gen = gen_aarch64_dup_lanev2si; break; + case V2DImode: gen = gen_aarch64_dup_lanev2di; break; + case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break; + case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break; + case V2DFmode: gen = gen_aarch64_dup_lanev2df; break; + default: + return false; + } + + emit_insn (gen (out, in0, lane)); + return true; +} + +static bool +aarch64_evpc_tbl (struct expand_vec_perm_d *d) +{ + rtx rperm[MAX_VECT_LEN], sel; + enum machine_mode vmode = d->vmode; + unsigned int i, nelt = d->nelt; + + /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's + numbering of elements for big-endian, we must reverse the order. */ + if (BYTES_BIG_ENDIAN) + return false; + + if (d->testing_p) + return true; + + /* Generic code will try constant permutation twice. Once with the + original mode and again with the elements lowered to QImode. + So wait and don't do the selector expansion ourselves. */ + if (vmode != V8QImode && vmode != V16QImode) + return false; + + for (i = 0; i < nelt; ++i) + rperm[i] = GEN_INT (d->perm[i]); + sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); + sel = force_reg (vmode, sel); + + aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel); + return true; +} + +static bool +aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + /* The pattern matching functions above are written to look for a small + number to begin the sequence (0, 1, N/2). If we begin with an index + from the second operand, we can swap the operands. */ + if (d->perm[0] >= d->nelt) + { + unsigned i, nelt = d->nelt; + rtx x; + + for (i = 0; i < nelt; ++i) + d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1); + + x = d->op0; + d->op0 = d->op1; + d->op1 = x; + } + + if (TARGET_SIMD) + { + if (aarch64_evpc_zip (d)) + return true; + else if (aarch64_evpc_uzp (d)) + return true; + else if (aarch64_evpc_trn (d)) + return true; + else if (aarch64_evpc_dup (d)) + return true; + return aarch64_evpc_tbl (d); + } + return false; +} + +/* Expand a vec_perm_const pattern. */ + +bool +aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) +{ + struct expand_vec_perm_d d; + int i, nelt, which; + + d.target = target; + d.op0 = op0; + d.op1 = op1; + + d.vmode = GET_MODE (target); + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = false; + + for (i = which = 0; i < nelt; ++i) + { + rtx e = XVECEXP (sel, 0, i); + int ei = INTVAL (e) & (2 * nelt - 1); + which |= (ei < nelt ? 1 : 2); + d.perm[i] = ei; + } + + switch (which) + { + default: + gcc_unreachable (); + + case 3: + d.one_vector_p = false; + if (!rtx_equal_p (op0, op1)) + break; + + /* The elements of PERM do not suggest that only the first operand + is used, but both operands are identical. Allow easier matching + of the permutation by folding the permutation into the single + input vector. */ + /* Fall Through. */ + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] &= nelt - 1; + d.op0 = op1; + d.one_vector_p = true; + break; + + case 1: + d.op1 = op0; + d.one_vector_p = true; + break; + } + + return aarch64_expand_vec_perm_const_1 (&d); +} + +static bool +aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) +{ + struct expand_vec_perm_d d; + unsigned int i, nelt, which; + bool ret; + + d.vmode = vmode; + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = true; + memcpy (d.perm, sel, nelt); + + /* Calculate whether all elements are in one vector. */ + for (i = which = 0; i < nelt; ++i) + { + unsigned char e = d.perm[i]; + gcc_assert (e < 2 * nelt); + which |= (e < nelt ? 1 : 2); + } + + /* If all elements are from the second vector, reindex as if from the + first vector. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + + /* Check whether the mask can be applied to a single vector. */ + d.one_vector_p = (which != 3); + + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ret = aarch64_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; +} + +/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */ +bool +aarch64_cannot_change_mode_class (enum machine_mode from, + enum machine_mode to, + enum reg_class rclass) +{ + /* Full-reg subregs are allowed on general regs or any class if they are + the same size. */ + if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to) + || !reg_classes_intersect_p (FP_REGS, rclass)) + return false; + + /* Limited combinations of subregs are safe on FPREGs. Particularly, + 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed. + 2. Scalar to Scalar for integer modes or same size float modes. + 3. Vector to Vector modes. */ + if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to)) + { + if (aarch64_vector_mode_supported_p (from) + && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to)) + return false; + + if (GET_MODE_NUNITS (from) == 1 + && GET_MODE_NUNITS (to) == 1 + && (GET_MODE_CLASS (from) == MODE_INT + || from == to)) + return false; + + if (aarch64_vector_mode_supported_p (from) + && aarch64_vector_mode_supported_p (to)) + return false; + } + + return true; +} + +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST aarch64_address_cost + +/* This hook will determines whether unnamed bitfields affect the alignment + of the containing structure. The hook returns true if the structure + should inherit the alignment requirements of an unnamed bitfield's + type. */ +#undef TARGET_ALIGN_ANON_BITFIELD +#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true + +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t" + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" + +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ + hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START aarch64_start_file + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk + +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list + +#undef TARGET_CALLEE_COPIES +#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE aarch64_can_eliminate + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage + +/* Only the least significant bit is used for initialization guard + variables. */ +#undef TARGET_CXX_GUARD_MASK_BIT +#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true + +#undef TARGET_C_MODE_FOR_SUFFIX +#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix + +#ifdef TARGET_BIG_ENDIAN_DEFAULT +#undef TARGET_DEFAULT_TARGET_FLAGS +#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END) +#endif + +#undef TARGET_CLASS_MAX_NREGS +#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs + +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL aarch64_builtin_decl + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start + +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN aarch64_fold_builtin + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG aarch64_function_arg + +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance + +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE aarch64_function_value + +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required + +#undef TARGET_GIMPLE_FOLD_BUILTIN +#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin + +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS aarch64_init_builtins + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p + +#undef TARGET_LIBGCC_CMP_RETURN_MODE +#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode + +#undef TARGET_LRA_P +#define TARGET_LRA_P aarch64_lra_p + +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE aarch64_mangle_type + +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost + +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size + +/* This target hook should return true if accesses to volatile bitfields + should use the narrowest mode possible. It should return false if these + accesses should use the bitfield container type. */ +#undef TARGET_NARROW_VOLATILE_BITFIELD +#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE aarch64_override_options + +#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE +#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ + aarch64_override_options_after_change + +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload + +#undef TARGET_SHIFT_TRUNCATION_MASK +#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory + +#undef TARGET_RETURN_IN_MSB +#define TARGET_RETURN_IN_MSB aarch64_return_in_msb + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS aarch64_rtx_costs + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate + +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init + +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p + +#undef TARGET_ARRAY_MODE_SUPPORTED_P +#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p + +#undef TARGET_VECTORIZE_ADD_STMT_COST +#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + aarch64_builtin_vectorization_cost + +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode + +#undef TARGET_VECTORIZE_BUILTINS +#define TARGET_VECTORIZE_BUILTINS + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + aarch64_builtin_vectorized_function + +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + aarch64_autovectorize_vector_sizes + +/* Section anchor support. */ + +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET -256 + +/* Limit the maximum anchor offset to 4k-1, since that's the limit for a + byte offset; we can do much more for larger data types, but have no way + to determine the size of the access. We assume accesses are aligned. */ +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET 4095 + +#undef TARGET_VECTOR_ALIGNMENT +#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment + +#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ + aarch64_simd_vector_alignment_reachable + +/* vec_perm support. */ + +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ + aarch64_vectorize_vec_perm_const_ok + + +#undef TARGET_FIXED_CONDITION_CODE_REGS +#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-aarch64.h" diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.h b/gcc-4.9/gcc/config/aarch64/aarch64.h new file mode 100644 index 000000000..7962aa472 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64.h @@ -0,0 +1,873 @@ +/* Machine description for AArch64 architecture. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#ifndef GCC_AARCH64_H +#define GCC_AARCH64_H + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__aarch64__"); \ + if (TARGET_BIG_END) \ + builtin_define ("__AARCH64EB__"); \ + else \ + builtin_define ("__AARCH64EL__"); \ + \ + if (!TARGET_GENERAL_REGS_ONLY) \ + builtin_define ("__ARM_NEON"); \ + \ + switch (aarch64_cmodel) \ + { \ + case AARCH64_CMODEL_TINY: \ + case AARCH64_CMODEL_TINY_PIC: \ + builtin_define ("__AARCH64_CMODEL_TINY__"); \ + break; \ + case AARCH64_CMODEL_SMALL: \ + case AARCH64_CMODEL_SMALL_PIC: \ + builtin_define ("__AARCH64_CMODEL_SMALL__");\ + break; \ + case AARCH64_CMODEL_LARGE: \ + builtin_define ("__AARCH64_CMODEL_LARGE__"); \ + break; \ + default: \ + break; \ + } \ + \ + if (TARGET_ILP32) \ + { \ + cpp_define (parse_in, "_ILP32"); \ + cpp_define (parse_in, "__ILP32__"); \ + } \ + if (TARGET_CRYPTO) \ + builtin_define ("__ARM_FEATURE_CRYPTO"); \ + } while (0) + + + +/* Target machine storage layout. */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + { \ + if (MODE == QImode || MODE == HImode) \ + { \ + MODE = SImode; \ + } \ + } + +/* Bits are always numbered from the LSBit. */ +#define BITS_BIG_ENDIAN 0 + +/* Big/little-endian flavour. */ +#define BYTES_BIG_ENDIAN (TARGET_BIG_END != 0) +#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN) + +/* AdvSIMD is supported in the default configuration, unless disabled by + -mgeneral-regs-only. */ +#define TARGET_SIMD !TARGET_GENERAL_REGS_ONLY +#define TARGET_FLOAT !TARGET_GENERAL_REGS_ONLY + +#define UNITS_PER_WORD 8 + +#define UNITS_PER_VREG 16 + +#define PARM_BOUNDARY 64 + +#define STACK_BOUNDARY 128 + +#define FUNCTION_BOUNDARY 32 + +#define EMPTY_FIELD_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT 128 + +#define SHORT_TYPE_SIZE 16 + +#define INT_TYPE_SIZE 32 + +#define LONG_TYPE_SIZE (TARGET_ILP32 ? 32 : 64) + +#define POINTER_SIZE (TARGET_ILP32 ? 32 : 64) + +#define LONG_LONG_TYPE_SIZE 64 + +#define FLOAT_TYPE_SIZE 32 + +#define DOUBLE_TYPE_SIZE 64 + +#define LONG_DOUBLE_TYPE_SIZE 128 + +/* The architecture reserves all bits of the address for hardware use, + so the vbit must go into the delta field of pointers to member + functions. This is the same config as that in the AArch32 + port. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta + +/* Make strings word-aligned so that strcpy from constants will be + faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && !optimize_size \ + && (ALIGN) < BITS_PER_WORD) \ + ? BITS_PER_WORD : ALIGN) + +#define DATA_ALIGNMENT(EXP, ALIGN) \ + ((((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (EXP) == ARRAY_TYPE \ + || TREE_CODE (EXP) == UNION_TYPE \ + || TREE_CODE (EXP) == RECORD_TYPE)) \ + ? BITS_PER_WORD : (ALIGN)) + +#define LOCAL_ALIGNMENT(EXP, ALIGN) DATA_ALIGNMENT(EXP, ALIGN) + +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* Defined by the ABI */ +#define WCHAR_TYPE "unsigned int" +#define WCHAR_TYPE_SIZE 32 + +/* Using long long breaks -ansi and -std=c90, so these will need to be + made conditional for an LLP64 ABI. */ + +#define SIZE_TYPE "long unsigned int" + +#define PTRDIFF_TYPE "long int" + +#define PCC_BITFIELD_TYPE_MATTERS 1 + + +/* Instruction tuning/selection flags. */ + +/* Bit values used to identify processor capabilities. */ +#define AARCH64_FL_SIMD (1 << 0) /* Has SIMD instructions. */ +#define AARCH64_FL_FP (1 << 1) /* Has FP. */ +#define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ +#define AARCH64_FL_SLOWMUL (1 << 3) /* A slow multiply core. */ +#define AARCH64_FL_CRC (1 << 4) /* Has CRC. */ + +/* Has FP and SIMD. */ +#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) + +/* Has FP without SIMD. */ +#define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD) + +/* Architecture flags that effect instruction selection. */ +#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) + +/* Macros to test ISA flags. */ +extern unsigned long aarch64_isa_flags; +#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) +#define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) +#define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) +#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) + +/* Macros to test tuning flags. */ +extern unsigned long aarch64_tune_flags; +#define AARCH64_TUNE_SLOWMUL (aarch64_tune_flags & AARCH64_FL_SLOWMUL) + +/* Crypto is an optional feature. */ +#define TARGET_CRYPTO AARCH64_ISA_CRYPTO + +/* Standard register usage. */ + +/* 31 64-bit general purpose registers R0-R30: + R30 LR (link register) + R29 FP (frame pointer) + R19-R28 Callee-saved registers + R18 The platform register; use as temporary register. + R17 IP1 The second intra-procedure-call temporary register + (can be used by call veneers and PLT code); otherwise use + as a temporary register + R16 IP0 The first intra-procedure-call temporary register (can + be used by call veneers and PLT code); otherwise use as a + temporary register + R9-R15 Temporary registers + R8 Structure value parameter / temporary register + R0-R7 Parameter/result registers + + SP stack pointer, encoded as X/R31 where permitted. + ZR zero register, encoded as X/R31 elsewhere + + 32 x 128-bit floating-point/vector registers + V16-V31 Caller-saved (temporary) registers + V8-V15 Callee-saved registers + V0-V7 Parameter/result registers + + The vector register V0 holds scalar B0, H0, S0 and D0 in its least + significant bits. Unlike AArch32 S1 is not packed into D0, + etc. */ + +/* Note that we don't mark X30 as a call-clobbered register. The idea is + that it's really the call instructions themselves which clobber X30. + We don't care what the called function does with it afterwards. + + This approach makes it easier to implement sibcalls. Unlike normal + calls, sibcalls don't clobber X30, so the register reaches the + called function intact. EPILOGUE_USES says that X30 is useful + to the called function. */ + +#define FIXED_REGISTERS \ + { \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R0 - R7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R8 - R15 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* R16 - R23 */ \ + 0, 0, 0, 0, 0, 1, 0, 1, /* R24 - R30, SP */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* V0 - V7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* V8 - V15 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* V16 - V23 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* V24 - V31 */ \ + 1, 1, 1, /* SFP, AP, CC */ \ + } + +#define CALL_USED_REGISTERS \ + { \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* R8 - R15 */ \ + 1, 1, 1, 0, 0, 0, 0, 0, /* R16 - R23 */ \ + 0, 0, 0, 0, 0, 1, 0, 1, /* R24 - R30, SP */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* V0 - V7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* V8 - V15 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* V16 - V23 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* V24 - V31 */ \ + 1, 1, 1, /* SFP, AP, CC */ \ + } + +#define REGISTER_NAMES \ + { \ + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", \ + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", \ + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", \ + "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", \ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", \ + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", \ + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", \ + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", \ + "sfp", "ap", "cc", \ + } + +/* Generate the register aliases for core register N */ +#define R_ALIASES(N) {"r" # N, R0_REGNUM + (N)}, \ + {"w" # N, R0_REGNUM + (N)} + +#define V_ALIASES(N) {"q" # N, V0_REGNUM + (N)}, \ + {"d" # N, V0_REGNUM + (N)}, \ + {"s" # N, V0_REGNUM + (N)}, \ + {"h" # N, V0_REGNUM + (N)}, \ + {"b" # N, V0_REGNUM + (N)} + +/* Provide aliases for all of the ISA defined register name forms. + These aliases are convenient for use in the clobber lists of inline + asm statements. */ + +#define ADDITIONAL_REGISTER_NAMES \ + { R_ALIASES(0), R_ALIASES(1), R_ALIASES(2), R_ALIASES(3), \ + R_ALIASES(4), R_ALIASES(5), R_ALIASES(6), R_ALIASES(7), \ + R_ALIASES(8), R_ALIASES(9), R_ALIASES(10), R_ALIASES(11), \ + R_ALIASES(12), R_ALIASES(13), R_ALIASES(14), R_ALIASES(15), \ + R_ALIASES(16), R_ALIASES(17), R_ALIASES(18), R_ALIASES(19), \ + R_ALIASES(20), R_ALIASES(21), R_ALIASES(22), R_ALIASES(23), \ + R_ALIASES(24), R_ALIASES(25), R_ALIASES(26), R_ALIASES(27), \ + R_ALIASES(28), R_ALIASES(29), R_ALIASES(30), {"wsp", R0_REGNUM + 31}, \ + V_ALIASES(0), V_ALIASES(1), V_ALIASES(2), V_ALIASES(3), \ + V_ALIASES(4), V_ALIASES(5), V_ALIASES(6), V_ALIASES(7), \ + V_ALIASES(8), V_ALIASES(9), V_ALIASES(10), V_ALIASES(11), \ + V_ALIASES(12), V_ALIASES(13), V_ALIASES(14), V_ALIASES(15), \ + V_ALIASES(16), V_ALIASES(17), V_ALIASES(18), V_ALIASES(19), \ + V_ALIASES(20), V_ALIASES(21), V_ALIASES(22), V_ALIASES(23), \ + V_ALIASES(24), V_ALIASES(25), V_ALIASES(26), V_ALIASES(27), \ + V_ALIASES(28), V_ALIASES(29), V_ALIASES(30), V_ALIASES(31) \ + } + +/* Say that the epilogue uses the return address register. Note that + in the case of sibcalls, the values "used by the epilogue" are + considered live at the start of the called function. */ + +#define EPILOGUE_USES(REGNO) \ + ((REGNO) == LR_REGNUM) + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. */ +#define EXIT_IGNORE_STACK 1 + +#define STATIC_CHAIN_REGNUM R18_REGNUM +#define HARD_FRAME_POINTER_REGNUM R29_REGNUM +#define FRAME_POINTER_REGNUM SFP_REGNUM +#define STACK_POINTER_REGNUM SP_REGNUM +#define ARG_POINTER_REGNUM AP_REGNUM +#define FIRST_PSEUDO_REGISTER 67 + +/* The number of (integer) argument register available. */ +#define NUM_ARG_REGS 8 +#define NUM_FP_ARG_REGS 8 + +/* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most + four members. */ +#define HA_MAX_NUM_FLDS 4 + +/* External dwarf register number scheme. These number are used to + identify registers in dwarf debug information, the values are + defined by the AArch64 ABI. The numbering scheme is independent of + GCC's internal register numbering scheme. */ + +#define AARCH64_DWARF_R0 0 + +/* The number of R registers, note 31! not 32. */ +#define AARCH64_DWARF_NUMBER_R 31 + +#define AARCH64_DWARF_SP 31 +#define AARCH64_DWARF_V0 64 + +/* The number of V registers. */ +#define AARCH64_DWARF_NUMBER_V 32 + +/* For signal frames we need to use an alternative return column. This + value must not correspond to a hard register and must be out of the + range of DWARF_FRAME_REGNUM(). */ +#define DWARF_ALT_FRAME_RETURN_COLUMN \ + (AARCH64_DWARF_V0 + AARCH64_DWARF_NUMBER_V) + +/* We add 1 extra frame register for use as the + DWARF_ALT_FRAME_RETURN_COLUMN. */ +#define DWARF_FRAME_REGISTERS (DWARF_ALT_FRAME_RETURN_COLUMN + 1) + + +#define DBX_REGISTER_NUMBER(REGNO) aarch64_dbx_register_number (REGNO) +/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders + can use DWARF_ALT_FRAME_RETURN_COLUMN defined below. This is just the same + as the default definition in dwarf2out.c. */ +#undef DWARF_FRAME_REGNUM +#define DWARF_FRAME_REGNUM(REGNO) DBX_REGISTER_NUMBER (REGNO) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LR_REGNUM) + +#define HARD_REGNO_NREGS(REGNO, MODE) aarch64_hard_regno_nregs (REGNO, MODE) + +#define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE) + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) + +#define DWARF2_UNWIND_INFO 1 + +/* Use R0 through R3 to pass exception handling information. */ +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < 4 ? ((unsigned int) R0_REGNUM + (N)) : INVALID_REGNUM) + +/* Select a format to encode pointers in exception handling data. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + aarch64_asm_preferred_eh_data_format ((CODE), (GLOBAL)) + +/* The register that holds the return address in exception handlers. */ +#define AARCH64_EH_STACKADJ_REGNUM (R0_REGNUM + 4) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM) + +/* Don't use __builtin_setjmp until we've defined it. */ +#undef DONT_USE_BUILTIN_SETJMP +#define DONT_USE_BUILTIN_SETJMP 1 + +/* Register in which the structure value is to be returned. */ +#define AARCH64_STRUCT_VALUE_REGNUM R8_REGNUM + +/* Non-zero if REGNO is part of the Core register set. + + The rather unusual way of expressing this check is to avoid + warnings when building the compiler when R0_REGNUM is 0 and REGNO + is unsigned. */ +#define GP_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - R0_REGNUM)) <= (R30_REGNUM - R0_REGNUM)) + +#define FP_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM)) + +#define FP_LO_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM)) + + +/* Register and constant classes. */ + +enum reg_class +{ + NO_REGS, + CORE_REGS, + GENERAL_REGS, + STACK_REG, + POINTER_REGS, + FP_LO_REGS, + FP_REGS, + ALL_REGS, + LIM_REG_CLASSES /* Last */ +}; + +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "CORE_REGS", \ + "GENERAL_REGS", \ + "STACK_REG", \ + "POINTER_REGS", \ + "FP_LO_REGS", \ + "FP_REGS", \ + "ALL_REGS" \ +} + +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x7fffffff, 0x00000000, 0x00000003 }, /* CORE_REGS */ \ + { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ + { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ + { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ + { 0x00000000, 0x0000ffff, 0x00000000 }, /* FP_LO_REGS */ \ + { 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ + { 0xffffffff, 0xffffffff, 0x00000007 } /* ALL_REGS */ \ +} + +#define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) + +#define INDEX_REG_CLASS CORE_REGS +#define BASE_REG_CLASS POINTER_REGS + +/* Register pairs used to eliminate unneeded registers that point into + the stack frame. */ +#define ELIMINABLE_REGS \ +{ \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM }, \ +} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = aarch64_initial_elimination_offset (FROM, TO) + +/* CPU/ARCH option handling. */ +#include "config/aarch64/aarch64-opts.h" + +enum target_cpus +{ +#define AARCH64_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \ + TARGET_CPU_##INTERNAL_IDENT, +#include "aarch64-cores.def" +#undef AARCH64_CORE + TARGET_CPU_generic +}; + +/* If there is no CPU defined at configure, use generic as default. */ +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT \ + (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6)) +#endif + +/* The processor for which instructions should be scheduled. */ +extern enum aarch64_processor aarch64_tune; + +/* RTL generation support. */ +#define INIT_EXPANDERS aarch64_init_expanders () + + +/* Stack layout; function entry, exit and calling. */ +#define STACK_GROWS_DOWNWARD 1 + +#define FRAME_GROWS_DOWNWARD 1 + +#define STARTING_FRAME_OFFSET 0 + +#define ACCUMULATE_OUTGOING_ARGS 1 + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Fix for VFP */ +#define LIBCALL_VALUE(MODE) \ + gen_rtx_REG (MODE, FLOAT_MODE_P (MODE) ? V0_REGNUM : R0_REGNUM) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#define AARCH64_ROUND_UP(X, ALIGNMENT) \ + (((X) + ((ALIGNMENT) - 1)) & ~((ALIGNMENT) - 1)) + +#define AARCH64_ROUND_DOWN(X, ALIGNMENT) \ + ((X) & ~((ALIGNMENT) - 1)) + +#ifdef HOST_WIDE_INT +struct GTY (()) aarch64_frame +{ + HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; + HOST_WIDE_INT saved_regs_size; + /* Padding if needed after the all the callee save registers have + been saved. */ + HOST_WIDE_INT padding0; + HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ + HOST_WIDE_INT fp_lr_offset; /* Space needed for saving fp and/or lr */ + + bool laid_out; +}; + +typedef struct GTY (()) machine_function +{ + struct aarch64_frame frame; + + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the frame. */ + HOST_WIDE_INT saved_varargs_size; + +} machine_function; +#endif + +/* Which ABI to use. */ +enum aarch64_abi_type +{ + AARCH64_ABI_LP64 = 0, + AARCH64_ABI_ILP32 = 1 +}; + +#ifndef AARCH64_ABI_DEFAULT +#define AARCH64_ABI_DEFAULT AARCH64_ABI_LP64 +#endif + +#define TARGET_ILP32 (aarch64_abi & AARCH64_ABI_ILP32) + +enum arm_pcs +{ + ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ + ARM_PCS_UNKNOWN +}; + + +extern enum arm_pcs arm_pcs_variant; + +#ifndef ARM_DEFAULT_PCS +#define ARM_DEFAULT_PCS ARM_PCS_AAPCS64 +#endif + +/* We can't use enum machine_mode inside a generator file because it + hasn't been created yet; we shouldn't be using any code that + needs the real definition though, so this ought to be safe. */ +#ifdef GENERATOR_FILE +#define MACHMODE int +#else +#include "insn-modes.h" +#define MACHMODE enum machine_mode +#endif + + +/* AAPCS related state tracking. */ +typedef struct +{ + enum arm_pcs pcs_variant; + int aapcs_arg_processed; /* No need to lay out this argument again. */ + int aapcs_ncrn; /* Next Core register number. */ + int aapcs_nextncrn; /* Next next core register number. */ + int aapcs_nvrn; /* Next Vector register number. */ + int aapcs_nextnvrn; /* Next Next Vector register number. */ + rtx aapcs_reg; /* Register assigned to this argument. This + is NULL_RTX if this parameter goes on + the stack. */ + MACHMODE aapcs_vfp_rmode; + int aapcs_stack_words; /* If the argument is passed on the stack, this + is the number of words needed, after rounding + up. Only meaningful when + aapcs_reg == NULL_RTX. */ + int aapcs_stack_size; /* The total size (in words, per 8 byte) of the + stack arg area so far. */ +} CUMULATIVE_ARGS; + +#define FUNCTION_ARG_PADDING(MODE, TYPE) \ + (aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward) + +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (aarch64_pad_reg_upward (MODE, TYPE, FIRST) ? upward : downward) + +#define PAD_VARARGS_DOWN 0 + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + aarch64_init_cumulative_args (&(CUM), FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) + +#define FUNCTION_ARG_REGNO_P(REGNO) \ + aarch64_function_arg_regno_p(REGNO) + + +/* ISA Features. */ + +/* Addressing modes, etc. */ +#define HAVE_POST_INCREMENT 1 +#define HAVE_PRE_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_DECREMENT 1 +#define HAVE_POST_MODIFY_DISP 1 +#define HAVE_PRE_MODIFY_DISP 1 + +#define MAX_REGS_PER_ADDRESS 2 + +#define CONSTANT_ADDRESS_P(X) aarch64_constant_address_p(X) + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ +do { \ + rtx new_x = aarch64_legitimize_reload_address (&(X), MODE, OPNUM, TYPE, \ + IND_L); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ +} while (0) + +#define REGNO_OK_FOR_BASE_P(REGNO) \ + aarch64_regno_ok_for_base_p (REGNO, true) + +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + aarch64_regno_ok_for_index_p (REGNO, true) + +#define LEGITIMATE_PIC_OPERAND_P(X) \ + aarch64_legitimate_pic_operand_p (X) + +#define CASE_VECTOR_MODE Pmode + +#define DEFAULT_SIGNED_CHAR 0 + +/* An integer expression for the size in bits of the largest integer machine + mode that should actually be used. We allow pairs of registers. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode) + +/* Maximum bytes moved by a single instruction (load/store pair). */ +#define MOVE_MAX (UNITS_PER_WORD * 2) + +/* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ +#define AARCH64_CALL_RATIO 8 + +/* When optimizing for size, give a better estimate of the length of a memcpy + call, but use the default otherwise. But move_by_pieces_ninsns() counts + memory-to-memory moves, and we'll have to generate a load & store for each, + so halve the value to take that into account. */ +#define MOVE_RATIO(speed) \ + (((speed) ? 15 : AARCH64_CALL_RATIO) / 2) + +/* For CLEAR_RATIO, when optimizing for size, give a better estimate + of the length of a memset call, but use the default otherwise. */ +#define CLEAR_RATIO(speed) \ + ((speed) ? 15 : AARCH64_CALL_RATIO) + +/* SET_RATIO is similar to CLEAR_RATIO, but for a non-zero constant, so when + optimizing for size adjust the ratio to account for the overhead of loading + the constant. */ +#define SET_RATIO(speed) \ + ((speed) ? 15 : AARCH64_CALL_RATIO - 2) + +/* STORE_BY_PIECES_P can be used when copying a constant string, but + in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR). + For now we always fail this and let the move_by_pieces code copy + the string from read-only memory. */ +#define STORE_BY_PIECES_P(SIZE, ALIGN) 0 + +/* Disable auto-increment in move_by_pieces et al. Use of auto-increment is + rarely a good idea in straight-line code since it adds an extra address + dependency between each instruction. Better to use incrementing offsets. */ +#define USE_LOAD_POST_INCREMENT(MODE) 0 +#define USE_LOAD_POST_DECREMENT(MODE) 0 +#define USE_LOAD_PRE_INCREMENT(MODE) 0 +#define USE_LOAD_PRE_DECREMENT(MODE) 0 +#define USE_STORE_POST_INCREMENT(MODE) 0 +#define USE_STORE_POST_DECREMENT(MODE) 0 +#define USE_STORE_PRE_INCREMENT(MODE) 0 +#define USE_STORE_PRE_DECREMENT(MODE) 0 + +/* ?? #define WORD_REGISTER_OPERATIONS */ + +/* Define if loading from memory in MODE, an integral mode narrower than + BITS_PER_WORD will either zero-extend or sign-extend. The value of this + macro should be the code that says which one of the two operations is + implicitly done, or UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Define this macro to be non-zero if instructions will fail to work + if given data not on the nominal alignment. */ +#define STRICT_ALIGNMENT TARGET_STRICT_ALIGN + +/* Define this macro to be non-zero if accessing less than a word of + memory is no faster than accessing a word of memory, i.e., if such + accesses require more than one instruction or if there is no + difference in cost. + Although there's no difference in instruction count or cycles, + in AArch64 we don't want to expand to a sub-word to a 64-bit access + if we don't have to, for power-saving reasons. */ +#define SLOW_BYTE_ACCESS 0 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define NO_FUNCTION_CSE 1 + +/* Specify the machine mode that the hardware addresses have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode DImode + +/* A C expression whose value is zero if pointers that need to be extended + from being `POINTER_SIZE' bits wide to `Pmode' are sign-extended and + greater then zero if they are zero-extended and less then zero if the + ptr_extend instruction should be used. */ +#define POINTERS_EXTEND_UNSIGNED 1 + +/* Mode of a function address in a call instruction (for indexing purposes). */ +#define FUNCTION_MODE Pmode + +#define SELECT_CC_MODE(OP, X, Y) aarch64_select_cc_mode (OP, X, Y) + +#define REVERSIBLE_CC_MODE(MODE) 1 + +#define REVERSE_CONDITION(CODE, MODE) \ + (((MODE) == CCFPmode || (MODE) == CCFPEmode) \ + ? reverse_condition_maybe_unordered (CODE) \ + : reverse_condition (CODE)) + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE)) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ + ((VALUE) = ((MODE) == SImode ? 32 : 64), 2) + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM) + +#define RETURN_ADDR_RTX aarch64_return_addr + +/* 3 insns + padding + 2 pointer-sized entries. */ +#define TRAMPOLINE_SIZE (TARGET_ILP32 ? 24 : 32) + +/* Trampolines contain dwords, so must be dword aligned. */ +#define TRAMPOLINE_ALIGNMENT 64 + +/* Put trampolines in the text section so that mapping symbols work + correctly. */ +#define TRAMPOLINE_SECTION text_section + +/* To start with. */ +#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2 + + +/* Assembly output. */ + +/* For now we'll make all jump tables pc-relative. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ + ((min < -0x1fff0 || max > 0x1fff0) ? SImode \ + : (min < -0x1f0 || max > 0x1f0) ? HImode \ + : QImode) + +/* Jump table alignment is explicit in ASM_OUTPUT_CASE_LABEL. */ +#define ADDR_VEC_ALIGN(JUMPTABLE) 0 + +#define PRINT_OPERAND(STREAM, X, CODE) aarch64_print_operand (STREAM, X, CODE) + +#define PRINT_OPERAND_ADDRESS(STREAM, X) \ + aarch64_print_operand_address (STREAM, X) + +#define MCOUNT_NAME "_mcount" + +#define NO_PROFILE_COUNTERS 1 + +/* Emit rtl for profiling. Output assembler code to FILE + to call "_mcount" for profiling a function entry. */ +#define PROFILE_HOOK(LABEL) \ + { \ + rtx fun, lr; \ + lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ + fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ + emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ + } + +/* All the work done in PROFILE_HOOK, but still required. */ +#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) + +/* For some reason, the Linux headers think they know how to define + these macros. They don't!!! */ +#undef ASM_APP_ON +#undef ASM_APP_OFF +#define ASM_APP_ON "\t" ASM_COMMENT_START " Start of user assembly\n" +#define ASM_APP_OFF "\t" ASM_COMMENT_START " End of user assembly\n" + +#define CONSTANT_POOL_BEFORE_FUNCTION 0 + +/* This definition should be relocated to aarch64-elf-raw.h. This macro + should be undefined in aarch64-linux.h and a clear_cache pattern + implmented to emit either the call to __aarch64_sync_cache_range() + directly or preferably the appropriate sycall or cache clear + instructions inline. */ +#define CLEAR_INSN_CACHE(beg, end) \ + extern void __aarch64_sync_cache_range (void *, void *); \ + __aarch64_sync_cache_range (beg, end) + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + aarch64_cannot_change_mode_class (FROM, TO, CLASS) + +#define SHIFT_COUNT_TRUNCATED !TARGET_SIMD + +/* Callee only saves lower 64-bits of a 128-bit register. Tell the + compiler the callee clobbers the top 64-bits when restoring the + bottom 64-bits. */ +#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ + (FP_REGNUM_P (REGNO) && GET_MODE_SIZE (MODE) > 8) + +/* Check TLS Descriptors mechanism is selected. */ +#define TARGET_TLS_DESC (aarch64_tls_dialect == TLS_DESCRIPTORS) + +extern enum aarch64_code_model aarch64_cmodel; + +/* When using the tiny addressing model conditional and unconditional branches + can span the whole of the available address space (1MB). */ +#define HAS_LONG_COND_BRANCH \ + (aarch64_cmodel == AARCH64_CMODEL_TINY \ + || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) + +#define HAS_LONG_UNCOND_BRANCH \ + (aarch64_cmodel == AARCH64_CMODEL_TINY \ + || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) + +/* Modes valid for AdvSIMD Q registers. */ +#define AARCH64_VALID_SIMD_QREG_MODE(MODE) \ + ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \ + || (MODE) == V4SFmode || (MODE) == V2DImode || mode == V2DFmode) + +#define ENDIAN_LANE_N(mode, n) \ + (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n) + +#define BIG_LITTLE_SPEC \ + " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}" + +extern const char *aarch64_rewrite_mcpu (int argc, const char **argv); +#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \ + { "rewrite_mcpu", aarch64_rewrite_mcpu }, + +#define ASM_CPU_SPEC \ + BIG_LITTLE_SPEC + +#define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS + +#define EXTRA_SPECS \ + { "asm_cpu_spec", ASM_CPU_SPEC } + +#endif /* GCC_AARCH64_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md new file mode 100644 index 000000000..c86a29d8e --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -0,0 +1,3642 @@ +;; Machine description for AArch64 architecture. +;; Copyright (C) 2009-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register numbers +(define_constants + [ + (R0_REGNUM 0) + (R1_REGNUM 1) + (R2_REGNUM 2) + (R3_REGNUM 3) + (R4_REGNUM 4) + (R5_REGNUM 5) + (R6_REGNUM 6) + (R7_REGNUM 7) + (R8_REGNUM 8) + (R9_REGNUM 9) + (R10_REGNUM 10) + (R11_REGNUM 11) + (R12_REGNUM 12) + (R13_REGNUM 13) + (R14_REGNUM 14) + (R15_REGNUM 15) + (R16_REGNUM 16) + (IP0_REGNUM 16) + (R17_REGNUM 17) + (IP1_REGNUM 17) + (R18_REGNUM 18) + (R19_REGNUM 19) + (R20_REGNUM 20) + (R21_REGNUM 21) + (R22_REGNUM 22) + (R23_REGNUM 23) + (R24_REGNUM 24) + (R25_REGNUM 25) + (R26_REGNUM 26) + (R27_REGNUM 27) + (R28_REGNUM 28) + (R29_REGNUM 29) + (R30_REGNUM 30) + (LR_REGNUM 30) + (SP_REGNUM 31) + (V0_REGNUM 32) + (V15_REGNUM 47) + (V31_REGNUM 63) + (SFP_REGNUM 64) + (AP_REGNUM 65) + (CC_REGNUM 66) + ] +) + +(define_c_enum "unspec" [ + UNSPEC_CASESI + UNSPEC_CLS + UNSPEC_FRECPE + UNSPEC_FRECPS + UNSPEC_FRECPX + UNSPEC_FRINTA + UNSPEC_FRINTI + UNSPEC_FRINTM + UNSPEC_FRINTN + UNSPEC_FRINTP + UNSPEC_FRINTX + UNSPEC_FRINTZ + UNSPEC_GOTSMALLPIC + UNSPEC_GOTSMALLTLS + UNSPEC_GOTTINYPIC + UNSPEC_LD1 + UNSPEC_LD2 + UNSPEC_LD3 + UNSPEC_LD4 + UNSPEC_MB + UNSPEC_NOP + UNSPEC_PRLG_STK + UNSPEC_RBIT + UNSPEC_SISD_NEG + UNSPEC_SISD_SSHL + UNSPEC_SISD_USHL + UNSPEC_SSHL_2S + UNSPEC_SSHR64 + UNSPEC_ST1 + UNSPEC_ST2 + UNSPEC_ST3 + UNSPEC_ST4 + UNSPEC_TLS + UNSPEC_TLSDESC + UNSPEC_USHL_2S + UNSPEC_USHR64 + UNSPEC_VSTRUCTDUMMY +]) + +(define_c_enum "unspecv" [ + UNSPECV_EH_RETURN ; Represent EH_RETURN + ] +) + +;; If further include files are added the defintion of MD_INCLUDES +;; must be updated. + +(include "constraints.md") +(include "predicates.md") +(include "iterators.md") + +;; ------------------------------------------------------------------- +;; Instruction types and attributes +;; ------------------------------------------------------------------- + +; The "type" attribute is is included here from AArch32 backend to be able +; to share pipeline descriptions. +(include "../arm/types.md") + +;; Attribute that specifies whether or not the instruction touches fp +;; registers. +(define_attr "fp" "no,yes" (const_string "no")) + +;; Attribute that specifies whether or not the instruction touches simd +;; registers. +(define_attr "simd" "no,yes" (const_string "no")) + +(define_attr "length" "" + (const_int 4)) + +;; Attribute that controls whether an alternative is enabled or not. +;; Currently it is only used to disable alternatives which touch fp or simd +;; registers when -mgeneral-regs-only is specified. +(define_attr "enabled" "no,yes" + (cond [(ior + (and (eq_attr "fp" "yes") + (eq (symbol_ref "TARGET_FLOAT") (const_int 0))) + (and (eq_attr "simd" "yes") + (eq (symbol_ref "TARGET_SIMD") (const_int 0)))) + (const_string "no") + ] (const_string "yes"))) + +;; ------------------------------------------------------------------- +;; Pipeline descriptions and scheduling +;; ------------------------------------------------------------------- + +;; Processor types. +(include "aarch64-tune.md") + +;; True if the generic scheduling description should be used. + +(define_attr "generic_sched" "yes,no" + (const (if_then_else + (eq_attr "tune" "cortexa53,cortexa15") + (const_string "no") + (const_string "yes")))) + +;; Scheduling +(include "../arm/cortex-a53.md") +(include "../arm/cortex-a15.md") + +;; ------------------------------------------------------------------- +;; Jumps and other miscellaneous insns +;; ------------------------------------------------------------------- + +(define_insn "indirect_jump" + [(set (pc) (match_operand:DI 0 "register_operand" "r"))] + "" + "br\\t%0" + [(set_attr "type" "branch")] +) + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "b\\t%l0" + [(set_attr "type" "branch")] +) + +(define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPI 1 "register_operand" "") + (match_operand:GPI 2 "aarch64_plus_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + " + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[2] = const0_rtx; + " +) + +(define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand:GPF 1 "register_operand" "") + (match_operand:GPF 2 "aarch64_reg_or_zero" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + " + operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1], + operands[2]); + operands[2] = const0_rtx; + " +) + +(define_insn "*condjump" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "b%m0\\t%l2" + [(set_attr "type" "branch")] +) + +(define_expand "casesi" + [(match_operand:SI 0 "register_operand" "") ; Index + (match_operand:SI 1 "const_int_operand" "") ; Lower bound + (match_operand:SI 2 "const_int_operand" "") ; Total range + (match_operand:DI 3 "" "") ; Table label + (match_operand:DI 4 "" "")] ; Out of range label + "" + { + if (operands[1] != const0_rtx) + { + rtx reg = gen_reg_rtx (SImode); + + /* Canonical RTL says that if you have: + + (minus (X) (CONST)) + + then this should be emitted as: + + (plus (X) (-CONST)) + + The use of trunc_int_for_mode ensures that the resulting + constant can be represented in SImode, this is important + for the corner case where operand[1] is INT_MIN. */ + + operands[1] = GEN_INT (trunc_int_for_mode (-INTVAL (operands[1]), SImode)); + + if (!(*insn_data[CODE_FOR_addsi3].operand[2].predicate) + (operands[1], SImode)) + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_addsi3 (reg, operands[0], operands[1])); + operands[0] = reg; + } + + if (!aarch64_plus_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + emit_jump_insn (gen_cbranchsi4 (gen_rtx_GTU (SImode, const0_rtx, + const0_rtx), + operands[0], operands[2], operands[4])); + + operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (VOIDmode, operands[3])); + emit_jump_insn (gen_casesi_dispatch (operands[2], operands[0], + operands[3])); + DONE; + } +) + +(define_insn "casesi_dispatch" + [(parallel + [(set (pc) + (mem:DI (unspec [(match_operand:DI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_CASESI))) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:DI 3 "=r")) + (clobber (match_scratch:DI 4 "=r")) + (use (label_ref (match_operand 2 "" "")))])] + "" + "* + return aarch64_output_casesi (operands); + " + [(set_attr "length" "16") + (set_attr "type" "branch")] +) + +(define_insn "nop" + [(unspec[(const_int 0)] UNSPEC_NOP)] + "" + "nop" + [(set_attr "type" "no_insn")] +) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 8))] + "" + "brk #1000" + [(set_attr "type" "trap")]) + +(define_expand "prologue" + [(clobber (const_int 0))] + "" + " + aarch64_expand_prologue (); + DONE; + " +) + +(define_expand "epilogue" + [(clobber (const_int 0))] + "" + " + aarch64_expand_epilogue (false); + DONE; + " +) + +(define_expand "sibcall_epilogue" + [(clobber (const_int 0))] + "" + " + aarch64_expand_epilogue (true); + DONE; + " +) + +(define_insn "*do_return" + [(return)] + "" + "ret" + [(set_attr "type" "branch")] +) + +(define_insn "eh_return" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")] + UNSPECV_EH_RETURN)] + "" + "#" + [(set_attr "type" "branch")] + +) + +(define_split + [(unspec_volatile [(match_operand:DI 0 "register_operand" "")] + UNSPECV_EH_RETURN)] + "reload_completed" + [(set (match_dup 1) (match_dup 0))] + { + operands[1] = aarch64_final_eh_return_addr (); + } +) + +(define_insn "*cb1" + [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "\\t%0, %l1" + [(set_attr "type" "branch")] + +) + +(define_insn "*tb1" + [(set (pc) (if_then_else + (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r") + (const_int 1) + (match_operand 1 "const_int_operand" "n")) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) + (clobber (match_scratch:DI 3 "=r"))] + "" + "* + if (get_attr_length (insn) == 8) + return \"ubfx\\t%3, %0, %1, #1\;\\t%3, %l2\"; + return \"\\t%0, %1, %l2\"; + " + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) + (lt (minus (match_dup 2) (pc)) (const_int 32764))) + (const_int 4) + (const_int 8)))] +) + +(define_insn "*cb1" + [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (clobber (match_scratch:DI 2 "=r"))] + "" + "* + if (get_attr_length (insn) == 8) + return \"ubfx\\t%2, %0, , #1\;\\t%2, %l1\"; + return \"\\t%0, , %l1\"; + " + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) + (lt (minus (match_dup 1) (pc)) (const_int 32764))) + (const_int 4) + (const_int 8)))] +) + +;; ------------------------------------------------------------------- +;; Subroutine calls and sibcalls +;; ------------------------------------------------------------------- + +(define_expand "call" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (use (match_operand 2 "" "")) + (clobber (reg:DI LR_REGNUM))])] + "" + " + { + rtx callee; + + /* In an untyped call, we can get NULL for operand 2. */ + if (operands[2] == NULL) + operands[2] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 64-bit address of the callee into a register before performing + the branch-and-link. */ + callee = XEXP (operands[0], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? aarch64_is_long_call_p (callee) + : !REG_P (callee)) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + }" +) + +(define_insn "*call_reg" + [(call (mem:DI (match_operand:DI 0 "register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:DI LR_REGNUM))] + "" + "blr\\t%0" + [(set_attr "type" "call")] +) + +(define_insn "*call_symbol" + [(call (mem:DI (match_operand:DI 0 "" "")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:DI LR_REGNUM))] + "GET_CODE (operands[0]) == SYMBOL_REF + && !aarch64_is_long_call_p (operands[0])" + "bl\\t%a0" + [(set_attr "type" "call")] +) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:DI LR_REGNUM))])] + "" + " + { + rtx callee; + + /* In an untyped call, we can get NULL for operand 3. */ + if (operands[3] == NULL) + operands[3] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 64-bit address of the callee into a register before performing + the branch-and-link. */ + callee = XEXP (operands[1], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? aarch64_is_long_call_p (callee) + : !REG_P (callee)) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + }" +) + +(define_insn "*call_value_reg" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "register_operand" "r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:DI LR_REGNUM))] + "" + "blr\\t%1" + [(set_attr "type" "call")] + +) + +(define_insn "*call_value_symbol" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "" "")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:DI LR_REGNUM))] + "GET_CODE (operands[1]) == SYMBOL_REF + && !aarch64_is_long_call_p (operands[1])" + "bl\\t%a1" + [(set_attr "type" "call")] +) + +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (return) + (use (match_operand 2 "" ""))])] + "" + { + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + } +) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (return) + (use (match_operand 3 "" ""))])] + "" + { + if (operands[3] == NULL_RTX) + operands[3] = const0_rtx; + } +) + +(define_insn "*sibcall_insn" + [(call (mem:DI (match_operand:DI 0 "" "X")) + (match_operand 1 "" "")) + (return) + (use (match_operand 2 "" ""))] + "GET_CODE (operands[0]) == SYMBOL_REF" + "b\\t%a0" + [(set_attr "type" "branch")] + +) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand 1 "" "X")) + (match_operand 2 "" ""))) + (return) + (use (match_operand 3 "" ""))] + "GET_CODE (operands[1]) == SYMBOL_REF" + "b\\t%a1" + [(set_attr "type" "branch")] +) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "") + (const_int 0)) + (match_operand 1 "") + (match_operand 2 "")])] + "" +{ + int i; + + emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + DONE; +}) + +;; ------------------------------------------------------------------- +;; Moves +;; ------------------------------------------------------------------- + +(define_expand "mov" + [(set (match_operand:SHORT 0 "nonimmediate_operand" "") + (match_operand:SHORT 1 "general_operand" ""))] + "" + " + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) + operands[1] = force_reg (mode, operands[1]); + " +) + +(define_insn "*mov_aarch64" + [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w") + (match_operand:SHORT 1 "general_operand" " r,M,D,m, m,rZ,*w,*w, r,*w"))] + "(register_operand (operands[0], mode) + || aarch64_reg_or_zero (operands[1], mode))" +{ + switch (which_alternative) + { + case 0: + return "mov\t%w0, %w1"; + case 1: + return "mov\t%w0, %1"; + case 2: + return aarch64_output_scalar_simd_mov_immediate (operands[1], + mode); + case 3: + return "ldr\t%w0, %1"; + case 4: + return "ldr\t%0, %1"; + case 5: + return "str\t%w1, %0"; + case 6: + return "str\t%1, %0"; + case 7: + return "umov\t%w0, %1.[0]"; + case 8: + return "dup\t%0., %w1"; + case 9: + return "dup\t%0, %1.[0]"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\ + neon_from_gp,neon_from_gp, neon_dup") + (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")] +) + +(define_expand "mov" + [(set (match_operand:GPI 0 "nonimmediate_operand" "") + (match_operand:GPI 1 "general_operand" ""))] + "" + " + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) + operands[1] = force_reg (mode, operands[1]); + + if (CONSTANT_P (operands[1])) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } + " +) + +(define_insn "*movsi_aarch64" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))] + "(register_operand (operands[0], SImode) + || aarch64_reg_or_zero (operands[1], SImode))" + "@ + mov\\t%w0, %w1 + mov\\t%w0, %w1 + mov\\t%w0, %w1 + mov\\t%w0, %1 + ldr\\t%w0, %1 + ldr\\t%s0, %1 + str\\t%w1, %0 + str\\t%s1, %0 + adr\\t%x0, %a1 + adrp\\t%x0, %A1 + fmov\\t%s0, %w1 + fmov\\t%w0, %s1 + fmov\\t%s0, %s1" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + adr,adr,fmov,fmov,fmov") + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] +) + +(define_insn "*movdi_aarch64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))] + "(register_operand (operands[0], DImode) + || aarch64_reg_or_zero (operands[1], DImode))" + "@ + mov\\t%x0, %x1 + mov\\t%0, %x1 + mov\\t%x0, %1 + mov\\t%x0, %1 + ldr\\t%x0, %1 + ldr\\t%d0, %1 + str\\t%x1, %0 + str\\t%d1, %0 + adr\\t%x0, %a1 + adrp\\t%x0, %A1 + fmov\\t%d0, %x1 + fmov\\t%x0, %d1 + fmov\\t%d0, %d1 + movi\\t%d0, %1" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ + adr,adr,fmov,fmov,fmov,fmov") + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] +) + +(define_insn "insv_imm" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (const_int 16) + (match_operand:GPI 1 "const_int_operand" "n")) + (match_operand:GPI 2 "const_int_operand" "n"))] + "UINTVAL (operands[1]) < GET_MODE_BITSIZE (mode) + && UINTVAL (operands[1]) % 16 == 0" + "movk\\t%0, %X2, lsl %1" + [(set_attr "type" "mov_imm")] +) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "" + " + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) + operands[1] = force_reg (TImode, operands[1]); + " +) + +(define_insn "*movti_aarch64" + [(set (match_operand:TI 0 + "nonimmediate_operand" "=r, *w,r ,*w,r ,Ump,Ump,*w,m") + (match_operand:TI 1 + "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r ,Z , m,*w"))] + "(register_operand (operands[0], TImode) + || aarch64_reg_or_zero (operands[1], TImode))" + "@ + # + # + # + orr\\t%0.16b, %1.16b, %1.16b + ldp\\t%0, %H0, %1 + stp\\t%1, %H1, %0 + stp\\txzr, xzr, %0 + ldr\\t%q0, %1 + str\\t%q1, %0" + [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \ + load2,store2,store2,f_loadd,f_stored") + (set_attr "length" "8,8,8,4,4,4,4,4,4") + (set_attr "simd" "*,*,*,yes,*,*,*,*,*") + (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")] +) + +;; Split a TImode register-register or register-immediate move into +;; its component DImode pieces, taking care to handle overlapping +;; source and dest registers. +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "aarch64_reg_or_imm" ""))] + "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" + [(const_int 0)] +{ + aarch64_split_128bit_move (operands[0], operands[1]); + DONE; +}) + +(define_expand "mov" + [(set (match_operand:GPF 0 "nonimmediate_operand" "") + (match_operand:GPF 1 "general_operand" ""))] + "" + " + if (!TARGET_FLOAT) + { + sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\"); + FAIL; + } + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (mode, operands[1]); + " +) + +(define_insn "*movsf_aarch64" + [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") + (match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode))" + "@ + fmov\\t%s0, %w1 + fmov\\t%w0, %s1 + fmov\\t%s0, %s1 + fmov\\t%s0, %1 + ldr\\t%s0, %1 + str\\t%s1, %0 + ldr\\t%w0, %1 + str\\t%w1, %0 + mov\\t%w0, %w1" + [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ + f_loads,f_stores,f_loads,f_stores,fmov")] +) + +(define_insn "*movdf_aarch64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") + (match_operand:DF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "@ + fmov\\t%d0, %x1 + fmov\\t%x0, %d1 + fmov\\t%d0, %d1 + fmov\\t%d0, %1 + ldr\\t%d0, %1 + str\\t%d1, %0 + ldr\\t%x0, %1 + str\\t%x1, %0 + mov\\t%x0, %x1" + [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\ + f_loadd,f_stored,f_loadd,f_stored,mov_reg")] +) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "" + " + if (!TARGET_FLOAT) + { + sorry (\"%qs and floating point code\", \"-mgeneral-regs-only\"); + FAIL; + } + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (TFmode, operands[1]); + " +) + +(define_insn "*movtf_aarch64" + [(set (match_operand:TF 0 + "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump") + (match_operand:TF 1 + "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,Ump,?rY"))] + "TARGET_FLOAT && (register_operand (operands[0], TFmode) + || register_operand (operands[1], TFmode))" + "@ + orr\\t%0.16b, %1.16b, %1.16b + # + # + # + movi\\t%0.2d, #0 + fmov\\t%s0, wzr + ldr\\t%q0, %1 + str\\t%q1, %0 + ldp\\t%0, %H0, %1 + stp\\t%1, %H1, %0" + [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,fconstd,fconstd,\ + f_loadd,f_stored,neon_load1_2reg,neon_store1_2reg") + (set_attr "length" "4,8,8,8,4,4,4,4,4,4") + (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*") + (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")] +) + +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (match_operand:TF 1 "aarch64_reg_or_imm" ""))] + "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" + [(const_int 0)] + { + aarch64_split_128bit_move (operands[0], operands[1]); + DONE; + } +) + +;; Operands 1 and 3 are tied together by the final condition; so we allow +;; fairly lax checking on the second memory operation. +(define_insn "load_pair" + [(set (match_operand:GPI 0 "register_operand" "=r") + (match_operand:GPI 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:GPI 2 "register_operand" "=r") + (match_operand:GPI 3 "memory_operand" "m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (mode)))" + "ldp\\t%0, %2, %1" + [(set_attr "type" "load2")] +) + +;; Operands 0 and 2 are tied together by the final condition; so we allow +;; fairly lax checking on the second memory operation. +(define_insn "store_pair" + [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "=Ump") + (match_operand:GPI 1 "register_operand" "r")) + (set (match_operand:GPI 2 "memory_operand" "=m") + (match_operand:GPI 3 "register_operand" "r"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (mode)))" + "stp\\t%1, %3, %0" + [(set_attr "type" "store2")] +) + +;; Operands 1 and 3 are tied together by the final condition; so we allow +;; fairly lax checking on the second memory operation. +(define_insn "load_pair" + [(set (match_operand:GPF 0 "register_operand" "=w") + (match_operand:GPF 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:GPF 2 "register_operand" "=w") + (match_operand:GPF 3 "memory_operand" "m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (mode)))" + "ldp\\t%0, %2, %1" + [(set_attr "type" "neon_load1_2reg")] +) + +;; Operands 0 and 2 are tied together by the final condition; so we allow +;; fairly lax checking on the second memory operation. +(define_insn "store_pair" + [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "=Ump") + (match_operand:GPF 1 "register_operand" "w")) + (set (match_operand:GPF 2 "memory_operand" "=m") + (match_operand:GPF 3 "register_operand" "w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (mode)))" + "stp\\t%1, %3, %0" + [(set_attr "type" "neon_store1_2reg")] +) + +;; Load pair with writeback. This is primarily used in function epilogues +;; when restoring [fp,lr] +(define_insn "loadwb_pair_" + [(parallel + [(set (match_operand:P 0 "register_operand" "=k") + (plus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 4 "const_int_operand" "n"))) + (set (match_operand:GPI 2 "register_operand" "=r") + (mem:GPI (plus:P (match_dup 1) + (match_dup 4)))) + (set (match_operand:GPI 3 "register_operand" "=r") + (mem:GPI (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))])] + "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" + "ldp\\t%2, %3, [%1], %4" + [(set_attr "type" "load2")] +) + +;; Store pair with writeback. This is primarily used in function prologues +;; when saving [fp,lr] +(define_insn "storewb_pair_" + [(parallel + [(set (match_operand:P 0 "register_operand" "=&k") + (plus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 4 "const_int_operand" "n"))) + (set (mem:GPI (plus:P (match_dup 0) + (match_dup 4))) + (match_operand:GPI 2 "register_operand" "r")) + (set (mem:GPI (plus:P (match_dup 0) + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:GPI 3 "register_operand" "r"))])] + "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" + "stp\\t%2, %3, [%0, %4]!" + [(set_attr "type" "store2")] +) + +;; ------------------------------------------------------------------- +;; Sign/Zero extension +;; ------------------------------------------------------------------- + +(define_expand "sidi2" + [(set (match_operand:DI 0 "register_operand") + (ANY_EXTEND:DI (match_operand:SI 1 "nonimmediate_operand")))] + "" +) + +(define_insn "*extendsidi2_aarch64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] + "" + "@ + sxtw\t%0, %w1 + ldrsw\t%0, %1" + [(set_attr "type" "extend,load1")] +) + +(define_insn "*zero_extendsidi2_aarch64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] + "" + "@ + uxtw\t%0, %w1 + ldr\t%w0, %1" + [(set_attr "type" "extend,load1")] +) + +(define_expand "2" + [(set (match_operand:GPI 0 "register_operand") + (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))] + "" +) + +(define_insn "*extend2_aarch64" + [(set (match_operand:GPI 0 "register_operand" "=r,r") + (sign_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m")))] + "" + "@ + sxt\t%0, %w1 + ldrs\t%0, %1" + [(set_attr "type" "extend,load1")] +) + +(define_insn "*zero_extend2_aarch64" + [(set (match_operand:GPI 0 "register_operand" "=r,r,*w") + (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))] + "" + "@ + uxt\t%0, %w1 + ldr\t%w0, %1 + ldr\t%0, %1" + [(set_attr "type" "extend,load1,load1")] +) + +(define_expand "qihi2" + [(set (match_operand:HI 0 "register_operand") + (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand")))] + "" +) + +(define_insn "*qihi2_aarch64" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "" + "@ + xtb\t%w0, %w1 + b\t%w0, %1" + [(set_attr "type" "extend,load1")] +) + +;; ------------------------------------------------------------------- +;; Simple arithmetic +;; ------------------------------------------------------------------- + +(define_expand "add3" + [(set + (match_operand:GPI 0 "register_operand" "") + (plus:GPI (match_operand:GPI 1 "register_operand" "") + (match_operand:GPI 2 "aarch64_pluslong_operand" "")))] + "" + " + if (! aarch64_plus_operand (operands[2], VOIDmode)) + { + rtx subtarget = ((optimize && can_create_pseudo_p ()) + ? gen_reg_rtx (mode) : operands[0]); + HOST_WIDE_INT imm = INTVAL (operands[2]); + + if (imm < 0) + imm = -(-imm & ~0xfff); + else + imm &= ~0xfff; + + emit_insn (gen_add3 (subtarget, operands[1], GEN_INT (imm))); + operands[1] = subtarget; + operands[2] = GEN_INT (INTVAL (operands[2]) - imm); + } + " +) + +(define_insn "*addsi3_aarch64" + [(set + (match_operand:SI 0 "register_operand" "=rk,rk,rk") + (plus:SI + (match_operand:SI 1 "register_operand" "%rk,rk,rk") + (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))] + "" + "@ + add\\t%w0, %w1, %2 + add\\t%w0, %w1, %w2 + sub\\t%w0, %w1, #%n2" + [(set_attr "type" "alu_imm,alu_reg,alu_imm")] +) + +;; zero_extend version of above +(define_insn "*addsi3_aarch64_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=rk,rk,rk") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk") + (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))] + "" + "@ + add\\t%w0, %w1, %2 + add\\t%w0, %w1, %w2 + sub\\t%w0, %w1, #%n2" + [(set_attr "type" "alu_imm,alu_reg,alu_imm")] +) + +(define_insn "*adddi3_aarch64" + [(set + (match_operand:DI 0 "register_operand" "=rk,rk,rk,!w") + (plus:DI + (match_operand:DI 1 "register_operand" "%rk,rk,rk,!w") + (match_operand:DI 2 "aarch64_plus_operand" "I,r,J,!w")))] + "" + "@ + add\\t%x0, %x1, %2 + add\\t%x0, %x1, %x2 + sub\\t%x0, %x1, #%n2 + add\\t%d0, %d1, %d2" + [(set_attr "type" "alu_imm,alu_reg,alu_imm,alu_reg") + (set_attr "simd" "*,*,*,yes")] +) + +(define_insn "*add3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") + (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + adds\\t%0, %1, %2 + adds\\t%0, %1, %2 + subs\\t%0, %1, #%n2" + [(set_attr "type" "alus_reg,alus_imm,alus_imm")] +) + +;; zero_extend version of above +(define_insn "*addsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:SI (match_operand:SI 1 "register_operand" "%r,r,r") + (match_operand:SI 2 "aarch64_plus_operand" "r,I,J")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "" + "@ + adds\\t%w0, %w1, %w2 + adds\\t%w0, %w1, %w2 + subs\\t%w0, %w1, #%n2" + [(set_attr "type" "alus_reg,alus_imm,alus_imm")] +) + +(define_insn "*adds_mul_imm_" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (mult:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_" "n")) + (match_operand:GPI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (mult:GPI (match_dup 1) (match_dup 2)) + (match_dup 3)))] + "" + "adds\\t%0, %3, %1, lsl %p2" + [(set_attr "type" "alus_shift_imm")] +) + +(define_insn "*subs_mul_imm_" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (mult:GPI + (match_operand:GPI 2 "register_operand" "r") + (match_operand:QI 3 "aarch64_pwr_2_" "n"))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 1) + (mult:GPI (match_dup 2) (match_dup 3))))] + "" + "subs\\t%0, %1, %2, lsl %p3" + [(set_attr "type" "alus_shift_imm")] +) + +(define_insn "*adds__" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI + (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))] + "" + "adds\\t%0, %2, %1, xt" + [(set_attr "type" "alus_ext")] +) + +(define_insn "*subs__" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (ANY_EXTEND:GPI + (match_operand:ALLX 2 "register_operand" "r"))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 1) (ANY_EXTEND:GPI (match_dup 2))))] + "" + "subs\\t%0, %1, %2, xt" + [(set_attr "type" "alus_ext")] +) + +(define_insn "*adds__multp2" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)) + (match_operand:GPI 4 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2)) + (match_dup 3) + (const_int 0)) + (match_dup 4)))] + "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" + "adds\\t%0, %4, %1, xt%e3 %p2" + [(set_attr "type" "alus_ext")] +) + +(define_insn "*subs__multp2" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (minus:GPI (match_operand:GPI 4 "register_operand" "r") + (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 4) (ANY_EXTRACT:GPI + (mult:GPI (match_dup 1) (match_dup 2)) + (match_dup 3) + (const_int 0))))] + "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" + "subs\\t%0, %4, %1, xt%e3 %p2" + [(set_attr "type" "alus_ext")] +) + +(define_insn "*add3nr_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (match_operand:GPI 0 "register_operand" "%r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")) + (const_int 0)))] + "" + "@ + cmn\\t%0, %1 + cmn\\t%0, %1 + cmp\\t%0, #%n1" + [(set_attr "type" "alus_reg,alus_imm,alus_imm")] +) + +(define_insn "*compare_neg" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (neg:GPI (match_operand:GPI 0 "register_operand" "r")) + (match_operand:GPI 1 "register_operand" "r")))] + "" + "cmn\\t%1, %0" + [(set_attr "type" "alus_reg")] +) + +(define_insn "*add__" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "add\\t%0, %3, %1, %2" + [(set_attr "type" "alu_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*add__si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (ASHIFT:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "add\\t%w0, %w3, %w1, %2" + [(set_attr "type" "alu_shift_imm")] +) + +(define_insn "*add_mul_imm_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_" "n")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "add\\t%0, %3, %1, lsl %p2" + [(set_attr "type" "alu_shift_imm")] +) + +(define_insn "*add__" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (plus:GPI (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "add\\t%0, %2, %1, xt" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*add__si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r"))))] + "" + "add\\t%w0, %w2, %w1, xt" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*add__shft_" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (plus:GPI (ashift:GPI (ANY_EXTEND:GPI + (match_operand:ALLX 1 "register_operand" "r")) + (match_operand 2 "aarch64_imm3" "Ui3")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "add\\t%0, %3, %1, xt %2" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*add__shft_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ashift:SI (ANY_EXTEND:SI + (match_operand:SHORT 1 "register_operand" "r")) + (match_operand 2 "aarch64_imm3" "Ui3")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "add\\t%w0, %w3, %w1, xt %2" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*add__mult_" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (plus:GPI (mult:GPI (ANY_EXTEND:GPI + (match_operand:ALLX 1 "register_operand" "r")) + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "add\\t%0, %3, %1, xt %p2" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*add__mult_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI + (match_operand:SHORT 1 "register_operand" "r")) + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "add\\t%w0, %w3, %w1, xt %p2" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*add__multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (plus:GPI (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)) + (match_operand:GPI 4 "register_operand" "r")))] + "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" + "add\\t%0, %4, %1, xt%e3 %p2" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*add_si_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (ANY_EXTRACT:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)) + (match_operand:SI 4 "register_operand" "r"))))] + "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])" + "add\\t%w0, %w4, %w1, xt%e3 %p2" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*add3_carryin" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) + (plus:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r"))))] + "" + "adc\\t%0, %1, %2" + [(set_attr "type" "adc_reg")] +) + +;; zero_extend version of above +(define_insn "*addsi3_carryin_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add3_carryin_alt1" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (plus:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (geu:GPI (reg:CC CC_REGNUM) (const_int 0))))] + "" + "adc\\t%0, %1, %2" + [(set_attr "type" "adc_reg")] +) + +;; zero_extend version of above +(define_insn "*addsi3_carryin_alt1_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add3_carryin_alt2" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (plus:GPI + (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "adc\\t%0, %1, %2" + [(set_attr "type" "adc_reg")] +) + +;; zero_extend version of above +(define_insn "*addsi3_carryin_alt2_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r"))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add3_carryin_alt3" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (plus:GPI + (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:GPI 1 "register_operand" "r")))] + "" + "adc\\t%0, %1, %2" + [(set_attr "type" "adc_reg")] +) + +;; zero_extend version of above +(define_insn "*addsi3_carryin_alt3_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI + (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 1 "register_operand" "r"))))] + "" + "adc\\t%w0, %w1, %w2" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add_uxt_multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (plus:GPI (and:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n")) + (match_operand:GPI 4 "register_operand" "r")))] + "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0" + "* + operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), + INTVAL (operands[3]))); + return \"add\t%0, %4, %1, uxt%e3 %p2\";" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*add_uxtsi_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (plus:SI (and:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n")) + (match_operand:SI 4 "register_operand" "r"))))] + "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0" + "* + operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), + INTVAL (operands[3]))); + return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";" + [(set_attr "type" "alu_ext")] +) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=rk") + (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))] + "" + "sub\\t%w0, %w1, %w2" + [(set_attr "type" "alu_reg")] +) + +;; zero_extend version of above +(define_insn "*subsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "sub\\t%w0, %w1, %w2" + [(set_attr "type" "alu_reg")] +) + +(define_insn "subdi3" + [(set (match_operand:DI 0 "register_operand" "=rk,!w") + (minus:DI (match_operand:DI 1 "register_operand" "r,!w") + (match_operand:DI 2 "register_operand" "r,!w")))] + "" + "@ + sub\\t%x0, %x1, %x2 + sub\\t%d0, %d1, %d2" + [(set_attr "type" "alu_reg, neon_sub") + (set_attr "simd" "*,yes")] +) + + +(define_insn "*sub3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 1) (match_dup 2)))] + "" + "subs\\t%0, %1, %2" + [(set_attr "type" "alus_reg")] +) + +;; zero_extend version of above +(define_insn "*subsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] + "" + "subs\\t%w0, %w1, %w2" + [(set_attr "type" "alus_reg")] +) + +(define_insn "*sub__" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_operand:GPI 3 "register_operand" "r") + (ASHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))))] + "" + "sub\\t%0, %3, %1, %2" + [(set_attr "type" "alu_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*sub__si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 3 "register_operand" "r") + (ASHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))] + "" + "sub\\t%w0, %w3, %w1, %2" + [(set_attr "type" "alu_shift_imm")] +) + +(define_insn "*sub_mul_imm_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_operand:GPI 3 "register_operand" "r") + (mult:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_" "n"))))] + "" + "sub\\t%0, %3, %1, lsl %p2" + [(set_attr "type" "alu_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*sub_mul_imm_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 3 "register_operand" "r") + (mult:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))] + "" + "sub\\t%w0, %w3, %w1, lsl %p2" + [(set_attr "type" "alu_shift_imm")] +) + +(define_insn "*sub__" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (ANY_EXTEND:GPI + (match_operand:ALLX 2 "register_operand" "r"))))] + "" + "sub\\t%0, %1, %2, xt" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*sub__si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "r") + (ANY_EXTEND:SI + (match_operand:SHORT 2 "register_operand" "r")))))] + "" + "sub\\t%w0, %w1, %w2, xt" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*sub__shft_" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (ashift:GPI (ANY_EXTEND:GPI + (match_operand:ALLX 2 "register_operand" "r")) + (match_operand 3 "aarch64_imm3" "Ui3"))))] + "" + "sub\\t%0, %1, %2, xt %3" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*sub__shft_si_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "r") + (ashift:SI (ANY_EXTEND:SI + (match_operand:SHORT 2 "register_operand" "r")) + (match_operand 3 "aarch64_imm3" "Ui3")))))] + "" + "sub\\t%w0, %w1, %w2, xt %3" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*sub__multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (minus:GPI (match_operand:GPI 4 "register_operand" "r") + (ANY_EXTRACT:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0))))] + "aarch64_is_extend_from_extract (mode, operands[2], operands[3])" + "sub\\t%0, %4, %1, xt%e3 %p2" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*sub_si_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 4 "register_operand" "r") + (ANY_EXTRACT:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n") + (const_int 0)))))] + "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])" + "sub\\t%w0, %w4, %w1, xt%e3 %p2" + [(set_attr "type" "alu_ext")] +) + +(define_insn "*sub3_carryin" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (minus:GPI + (match_operand:GPI 1 "register_operand" "r") + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "sbc\\t%0, %1, %2" + [(set_attr "type" "adc_reg")] +) + +;; zero_extend version of the above +(define_insn "*subsi3_carryin_uxtw" + [(set + (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (minus:SI + (match_operand:SI 1 "register_operand" "r") + (ltu:SI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:SI 2 "register_operand" "r"))))] + "" + "sbc\\t%w0, %w1, %w2" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub_uxt_multp2" + [(set (match_operand:GPI 0 "register_operand" "=rk") + (minus:GPI (match_operand:GPI 4 "register_operand" "r") + (and:GPI + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n"))))] + "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0" + "* + operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), + INTVAL (operands[3]))); + return \"sub\t%0, %4, %1, uxt%e3 %p2\";" + [(set_attr "type" "alu_ext")] +) + +;; zero_extend version of above +(define_insn "*sub_uxtsi_multp2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=rk") + (zero_extend:DI + (minus:SI (match_operand:SI 4 "register_operand" "r") + (and:SI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "aarch64_pwr_imm3" "Up3")) + (match_operand 3 "const_int_operand" "n")))))] + "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0" + "* + operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), + INTVAL (operands[3]))); + return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";" + [(set_attr "type" "alu_ext")] +) + +(define_insn_and_split "absdi2" + [(set (match_operand:DI 0 "register_operand" "=r,w") + (abs:DI (match_operand:DI 1 "register_operand" "r,w"))) + (clobber (match_scratch:DI 2 "=&r,X"))] + "" + "@ + # + abs\\t%d0, %d1" + "reload_completed + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" + [(const_int 0)] + { + emit_insn (gen_rtx_SET (VOIDmode, operands[2], + gen_rtx_XOR (DImode, + gen_rtx_ASHIFTRT (DImode, + operands[1], + GEN_INT (63)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (DImode, + operands[2], + gen_rtx_ASHIFTRT (DImode, + operands[1], + GEN_INT (63))))); + DONE; + } + [(set_attr "type" "alu_reg")] +) + +(define_insn "neg2" + [(set (match_operand:GPI 0 "register_operand" "=r,w") + (neg:GPI (match_operand:GPI 1 "register_operand" "r,w")))] + "" + "@ + neg\\t%0, %1 + neg\\t%0, %1" + [(set_attr "type" "alu_reg, neon_neg") + (set_attr "simd" "*,yes")] +) + +;; zero_extend version of above +(define_insn "*negsi2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))] + "" + "neg\\t%w0, %w1" + [(set_attr "type" "alu_reg")] +) + +(define_insn "*ngc" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:GPI 1 "register_operand" "r")))] + "" + "ngc\\t%0, %1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*ngcsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0))) + (match_operand:SI 1 "register_operand" "r"))))] + "" + "ngc\\t%w0, %w1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*neg2_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_dup 1)))] + "" + "negs\\t%0, %1" + [(set_attr "type" "alus_reg")] +) + +;; zero_extend version of above +(define_insn "*negsi2_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (neg:SI (match_dup 1))))] + "" + "negs\\t%w0, %w1" + [(set_attr "type" "alus_reg")] +) + +(define_insn "*neg_3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (neg:GPI (ASHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (ASHIFT:GPI (match_dup 1) (match_dup 2))))] + "" + "negs\\t%0, %1, %2" + [(set_attr "type" "alus_shift_imm")] +) + +(define_insn "*neg__2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (ASHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))))] + "" + "neg\\t%0, %1, %2" + [(set_attr "type" "alu_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*neg__si2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (neg:SI (ASHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))] + "" + "neg\\t%w0, %w1, %2" + [(set_attr "type" "alu_shift_imm")] +) + +(define_insn "*neg_mul_imm_2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (mult:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_" "n"))))] + "" + "neg\\t%0, %1, lsl %p2" + [(set_attr "type" "alu_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*neg_mul_imm_si2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (neg:SI (mult:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))] + "" + "neg\\t%w0, %w1, lsl %p2" + [(set_attr "type" "alu_shift_imm")] +) + +(define_insn "mul3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")))] + "" + "mul\\t%0, %1, %2" + [(set_attr "type" "mul")] +) + +;; zero_extend version of above +(define_insn "*mulsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "mul\\t%w0, %w1, %w2" + [(set_attr "type" "mul")] +) + +(define_insn "*madd" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "madd\\t%0, %1, %2, %3" + [(set_attr "type" "mla")] +) + +;; zero_extend version of above +(define_insn "*maddsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "madd\\t%w0, %w1, %w2, %w3" + [(set_attr "type" "mla")] +) + +(define_insn "*msub" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_operand:GPI 3 "register_operand" "r") + (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r"))))] + + "" + "msub\\t%0, %1, %2, %3" + [(set_attr "type" "mla")] +) + +;; zero_extend version of above +(define_insn "*msubsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 3 "register_operand" "r") + (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")))))] + + "" + "msub\\t%w0, %w1, %w2, %w3" + [(set_attr "type" "mla")] +) + +(define_insn "*mul_neg" + [(set (match_operand:GPI 0 "register_operand" "=r") + (mult:GPI (neg:GPI (match_operand:GPI 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")))] + + "" + "mneg\\t%0, %1, %2" + [(set_attr "type" "mul")] +) + +;; zero_extend version of above +(define_insn "*mulsi_neg_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (mult:SI (neg:SI (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r"))))] + + "" + "mneg\\t%w0, %w1, %w2" + [(set_attr "type" "mul")] +) + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))] + "" + "mull\\t%0, %w1, %w2" + [(set_attr "type" "mull")] +) + +(define_insn "maddsidi4" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))) + (match_operand:DI 3 "register_operand" "r")))] + "" + "maddl\\t%0, %w1, %w2, %3" + [(set_attr "type" "mlal")] +) + +(define_insn "msubsidi4" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (match_operand:DI 3 "register_operand" "r") + (mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r")) + (ANY_EXTEND:DI + (match_operand:SI 2 "register_operand" "r")))))] + "" + "msubl\\t%0, %w1, %w2, %3" + [(set_attr "type" "mlal")] +) + +(define_insn "*mulsidi_neg" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (neg:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand" "r"))))] + "" + "mnegl\\t%0, %w1, %w2" + [(set_attr "type" "mull")] +) + +(define_insn "muldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI + (ANY_EXTEND:TI (match_operand:DI 1 "register_operand" "r")) + (ANY_EXTEND:TI (match_operand:DI 2 "register_operand" "r"))) + (const_int 64))))] + "" + "mulh\\t%0, %1, %2" + [(set_attr "type" "mull")] +) + +(define_insn "div3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ANY_DIV:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")))] + "" + "div\\t%0, %1, %2" + [(set_attr "type" "div")] +) + +;; zero_extend version of above +(define_insn "*divsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ANY_DIV:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))))] + "" + "div\\t%w0, %w1, %w2" + [(set_attr "type" "div")] +) + +;; ------------------------------------------------------------------- +;; Comparison insns +;; ------------------------------------------------------------------- + +(define_insn "*cmp" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))] + "" + "@ + cmp\\t%0, %1 + cmp\\t%0, %1 + cmn\\t%0, #%n1" + [(set_attr "type" "alus_reg,alus_imm,alus_imm")] +) + +(define_insn "*cmp" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:GPF 0 "register_operand" "w,w") + (match_operand:GPF 1 "aarch64_fp_compare_operand" "Y,w")))] + "TARGET_FLOAT" + "@ + fcmp\\t%0, #0.0 + fcmp\\t%0, %1" + [(set_attr "type" "fcmp")] +) + +(define_insn "*cmpe" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:GPF 0 "register_operand" "w,w") + (match_operand:GPF 1 "aarch64_fp_compare_operand" "Y,w")))] + "TARGET_FLOAT" + "@ + fcmpe\\t%0, #0.0 + fcmpe\\t%0, %1" + [(set_attr "type" "fcmp")] +) + +(define_insn "*cmp_swp__reg" + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (ASHIFT:GPI + (match_operand:GPI 0 "register_operand" "r") + (match_operand:QI 1 "aarch64_shift_imm_" "n")) + (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))] + "" + "cmp\\t%2, %0, %1" + [(set_attr "type" "alus_shift_imm")] +) + +(define_insn "*cmp_swp__reg" + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (ANY_EXTEND:GPI + (match_operand:ALLX 0 "register_operand" "r")) + (match_operand:GPI 1 "register_operand" "r")))] + "" + "cmp\\t%1, %0, xt" + [(set_attr "type" "alus_ext")] +) + +(define_insn "*cmp_swp__shft_" + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (ashift:GPI + (ANY_EXTEND:GPI + (match_operand:ALLX 0 "register_operand" "r")) + (match_operand 1 "aarch64_imm3" "Ui3")) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "cmp\\t%2, %0, xt %1" + [(set_attr "type" "alus_ext")] +) + +;; ------------------------------------------------------------------- +;; Store-flag and conditional select insns +;; ------------------------------------------------------------------- + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "aarch64_comparison_operator" + [(match_operand:GPI 2 "register_operand" "") + (match_operand:GPI 3 "aarch64_plus_operand" "")]))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + operands[3] = const0_rtx; + " +) + +(define_expand "cstore4" + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "aarch64_comparison_operator" + [(match_operand:GPF 2 "register_operand" "") + (match_operand:GPF 3 "register_operand" "")]))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + operands[3] = const0_rtx; + " +) + +(define_insn "*cstore_insn" + [(set (match_operand:ALLI 0 "register_operand" "=r") + (match_operator:ALLI 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]))] + "" + "cset\\t%0, %m1" + [(set_attr "type" "csel")] +) + +;; zero_extend version of the above +(define_insn "*cstoresi_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (match_operator:SI 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "" + "cset\\t%w0, %m1" + [(set_attr "type" "csel")] +) + +(define_insn "cstore_neg" + [(set (match_operand:ALLI 0 "register_operand" "=r") + (neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "" + "csetm\\t%0, %m1" + [(set_attr "type" "csel")] +) + +;; zero_extend version of the above +(define_insn "*cstoresi_neg_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (neg:SI (match_operator:SI 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]))))] + "" + "csetm\\t%w0, %m1" + [(set_attr "type" "csel")] +) + +(define_expand "cmov6" + [(set (match_operand:GPI 0 "register_operand" "") + (if_then_else:GPI + (match_operator 1 "aarch64_comparison_operator" + [(match_operand:GPI 2 "register_operand" "") + (match_operand:GPI 3 "aarch64_plus_operand" "")]) + (match_operand:GPI 4 "register_operand" "") + (match_operand:GPI 5 "register_operand" "")))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + operands[3] = const0_rtx; + " +) + +(define_expand "cmov6" + [(set (match_operand:GPF 0 "register_operand" "") + (if_then_else:GPF + (match_operator 1 "aarch64_comparison_operator" + [(match_operand:GPF 2 "register_operand" "") + (match_operand:GPF 3 "register_operand" "")]) + (match_operand:GPF 4 "register_operand" "") + (match_operand:GPF 5 "register_operand" "")))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], + operands[3]); + operands[3] = const0_rtx; + " +) + +(define_insn "*cmov_insn" + [(set (match_operand:ALLI 0 "register_operand" "=r,r,r,r,r,r,r") + (if_then_else:ALLI + (match_operator 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:ALLI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1") + (match_operand:ALLI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1")))] + "!((operands[3] == const1_rtx && operands[4] == constm1_rtx) + || (operands[3] == constm1_rtx && operands[4] == const1_rtx))" + ;; Final two alternatives should be unreachable, but included for completeness + "@ + csel\\t%0, %3, %4, %m1 + csinv\\t%0, %3, zr, %m1 + csinv\\t%0, %4, zr, %M1 + csinc\\t%0, %3, zr, %m1 + csinc\\t%0, %4, zr, %M1 + mov\\t%0, -1 + mov\\t%0, 1" + [(set_attr "type" "csel")] +) + +;; zero_extend version of above +(define_insn "*cmovsi_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r") + (zero_extend:DI + (if_then_else:SI + (match_operator 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "aarch64_reg_zero_or_m1_or_1" "rZ,rZ,UsM,rZ,Ui1,UsM,Ui1") + (match_operand:SI 4 "aarch64_reg_zero_or_m1_or_1" "rZ,UsM,rZ,Ui1,rZ,UsM,Ui1"))))] + "!((operands[3] == const1_rtx && operands[4] == constm1_rtx) + || (operands[3] == constm1_rtx && operands[4] == const1_rtx))" + ;; Final two alternatives should be unreachable, but included for completeness + "@ + csel\\t%w0, %w3, %w4, %m1 + csinv\\t%w0, %w3, wzr, %m1 + csinv\\t%w0, %w4, wzr, %M1 + csinc\\t%w0, %w3, wzr, %m1 + csinc\\t%w0, %w4, wzr, %M1 + mov\\t%w0, -1 + mov\\t%w0, 1" + [(set_attr "type" "csel")] +) + +(define_insn "*cmov_insn" + [(set (match_operand:GPF 0 "register_operand" "=w") + (if_then_else:GPF + (match_operator 1 "aarch64_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:GPF 3 "register_operand" "w") + (match_operand:GPF 4 "register_operand" "w")))] + "TARGET_FLOAT" + "fcsel\\t%0, %3, %4, %m1" + [(set_attr "type" "fcsel")] +) + +(define_expand "movcc" + [(set (match_operand:ALLI 0 "register_operand" "") + (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator" "") + (match_operand:ALLI 2 "register_operand" "") + (match_operand:ALLI 3 "register_operand" "")))] + "" + { + rtx ccreg; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + } +) + +(define_expand "movcc" + [(set (match_operand:GPI 0 "register_operand" "") + (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") + (match_operand:GPF 2 "register_operand" "") + (match_operand:GPF 3 "register_operand" "")))] + "" + { + rtx ccreg; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + } +) + +(define_insn "*csinc2_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator" + [(match_operand:CC 3 "cc_register" "") (const_int 0)]) + (match_operand:GPI 1 "register_operand" "r")))] + "" + "csinc\\t%0, %1, %1, %M2" + [(set_attr "type" "csel")] +) + +(define_insn "csinc3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (if_then_else:GPI + (match_operator:GPI 1 "aarch64_comparison_operator" + [(match_operand:CC 2 "cc_register" "") (const_int 0)]) + (plus:GPI (match_operand:GPI 3 "register_operand" "r") + (const_int 1)) + (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))] + "" + "csinc\\t%0, %4, %3, %M1" + [(set_attr "type" "csel")] +) + +(define_insn "*csinv3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (if_then_else:GPI + (match_operator:GPI 1 "aarch64_comparison_operator" + [(match_operand:CC 2 "cc_register" "") (const_int 0)]) + (not:GPI (match_operand:GPI 3 "register_operand" "r")) + (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))] + "" + "csinv\\t%0, %4, %3, %M1" + [(set_attr "type" "csel")] +) + +(define_insn "*csneg3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (if_then_else:GPI + (match_operator:GPI 1 "aarch64_comparison_operator" + [(match_operand:CC 2 "cc_register" "") (const_int 0)]) + (neg:GPI (match_operand:GPI 3 "register_operand" "r")) + (match_operand:GPI 4 "aarch64_reg_or_zero" "rZ")))] + "" + "csneg\\t%0, %4, %3, %M1" + [(set_attr "type" "csel")] +) + +;; ------------------------------------------------------------------- +;; Logical operations +;; ------------------------------------------------------------------- + +(define_insn "3" + [(set (match_operand:GPI 0 "register_operand" "=r,rk") + (LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r") + (match_operand:GPI 2 "aarch64_logical_operand" "r,")))] + "" + "\\t%0, %1, %2" + [(set_attr "type" "logic_reg,logic_imm")] +) + +;; zero_extend version of above +(define_insn "*si3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r,rk") + (zero_extend:DI + (LOGICAL:SI (match_operand:SI 1 "register_operand" "%r,r") + (match_operand:SI 2 "aarch64_logical_operand" "r,K"))))] + "" + "\\t%w0, %w1, %w2" + [(set_attr "type" "logic_reg,logic_imm")] +) + +(define_insn "*and3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_operand:GPI 1 "register_operand" "%r,r") + (match_operand:GPI 2 "aarch64_logical_operand" "r,")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (and:GPI (match_dup 1) (match_dup 2)))] + "" + "ands\\t%0, %1, %2" + [(set_attr "type" "logics_reg,logics_imm")] +) + +;; zero_extend version of above +(define_insn "*andsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (match_operand:SI 1 "register_operand" "%r,r") + (match_operand:SI 2 "aarch64_logical_operand" "r,K")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "" + "ands\\t%w0, %w1, %w2" + [(set_attr "type" "logics_reg,logics_imm")] +) + +(define_insn "*and_3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n")) + (match_operand:GPI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (SHIFT:GPI (match_dup 1) (match_dup 2)) (match_dup 3)))] + "" + "ands\\t%0, %3, %1, %2" + [(set_attr "type" "logics_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*and_si3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (SHIFT:SI (match_dup 1) (match_dup 2)) + (match_dup 3))))] + "" + "ands\\t%w0, %w3, %w1, %2" + [(set_attr "type" "logics_shift_imm")] +) + +(define_insn "*_3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (LOGICAL:GPI (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "\\t%0, %3, %1, %2" + [(set_attr "type" "logic_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*_si3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (LOGICAL:SI (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "\\t%w0, %w3, %w1, %2" + [(set_attr "type" "logic_shift_imm")] +) + +(define_insn "one_cmpl2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (not:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "mvn\\t%0, %1" + [(set_attr "type" "logic_reg")] +) + +(define_insn "*one_cmpl_2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (not:GPI (SHIFT:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))))] + "" + "mvn\\t%0, %1, %2" + [(set_attr "type" "logic_shift_imm")] +) + +(define_insn "*_one_cmpl3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (LOGICAL:GPI (not:GPI + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")))] + "" + "\\t%0, %2, %1" + [(set_attr "type" "logic_reg")] +) + +(define_insn "*and_one_cmpl3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (not:GPI + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (not:GPI (match_dup 1)) (match_dup 2)))] + "" + "bics\\t%0, %2, %1" + [(set_attr "type" "logics_reg")] +) + +;; zero_extend version of above +(define_insn "*and_one_cmplsi3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (not:SI + (match_operand:SI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (not:SI (match_dup 1)) (match_dup 2))))] + "" + "bics\\t%w0, %w2, %w1" + [(set_attr "type" "logics_reg")] +) + +(define_insn "*_one_cmpl_3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (LOGICAL:GPI (not:GPI + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "\\t%0, %3, %1, %2" + [(set_attr "type" "logics_shift_imm")] +) + +(define_insn "*and_one_cmpl_3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (not:GPI + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n"))) + (match_operand:GPI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (not:GPI + (SHIFT:GPI + (match_dup 1) (match_dup 2))) (match_dup 3)))] + "" + "bics\\t%0, %3, %1, %2" + [(set_attr "type" "logics_shift_imm")] +) + +;; zero_extend version of above +(define_insn "*and_one_cmpl_si3_compare0_uxtw" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:SI (not:SI + (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n"))) + (match_operand:SI 3 "register_operand" "r")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI + (not:SI + (SHIFT:SI (match_dup 1) (match_dup 2))) (match_dup 3))))] + "" + "bics\\t%w0, %w3, %w1, %2" + [(set_attr "type" "logics_shift_imm")] +) + +(define_insn "clz2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (clz:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "clz\\t%0, %1" + [(set_attr "type" "clz")] +) + +(define_expand "ffs2" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand")] + "" + { + rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); + rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); + + emit_insn (gen_rbit2 (operands[0], operands[1])); + emit_insn (gen_clz2 (operands[0], operands[0])); + emit_insn (gen_csinc3_insn (operands[0], x, ccreg, operands[0], const0_rtx)); + DONE; + } +) + +(define_insn "clrsb2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_CLS))] + "" + "cls\\t%0, %1" + [(set_attr "type" "clz")] +) + +(define_insn "rbit2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))] + "" + "rbit\\t%0, %1" + [(set_attr "type" "rbit")] +) + +(define_expand "ctz2" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand")] + "" + { + emit_insn (gen_rbit2 (operands[0], operands[1])); + emit_insn (gen_clz2 (operands[0], operands[0])); + DONE; + } +) + +(define_insn "*and3nr_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (match_operand:GPI 0 "register_operand" "%r,r") + (match_operand:GPI 1 "aarch64_logical_operand" "r,")) + (const_int 0)))] + "" + "tst\\t%0, %1" + [(set_attr "type" "logics_reg")] +) + +(define_insn "*and_3nr_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (and:GPI (SHIFT:GPI + (match_operand:GPI 0 "register_operand" "r") + (match_operand:QI 1 "aarch64_shift_imm_" "n")) + (match_operand:GPI 2 "register_operand" "r")) + (const_int 0)))] + "" + "tst\\t%2, %0, %1" + [(set_attr "type" "logics_shift_imm")] +) + +;; ------------------------------------------------------------------- +;; Shifts +;; ------------------------------------------------------------------- + +(define_expand "3" + [(set (match_operand:GPI 0 "register_operand") + (ASHIFT:GPI (match_operand:GPI 1 "register_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "" + { + if (CONST_INT_P (operands[2])) + { + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (mode) - 1)); + + if (operands[2] == const0_rtx) + { + emit_insn (gen_mov (operands[0], operands[1])); + DONE; + } + } + } +) + +(define_expand "ashl3" + [(set (match_operand:SHORT 0 "register_operand") + (ashift:SHORT (match_operand:SHORT 1 "register_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "" + { + if (CONST_INT_P (operands[2])) + { + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (mode) - 1)); + + if (operands[2] == const0_rtx) + { + emit_insn (gen_mov (operands[0], operands[1])); + DONE; + } + } + } +) + +(define_expand "rotr3" + [(set (match_operand:GPI 0 "register_operand") + (rotatert:GPI (match_operand:GPI 1 "register_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "" + { + if (CONST_INT_P (operands[2])) + { + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (mode) - 1)); + + if (operands[2] == const0_rtx) + { + emit_insn (gen_mov (operands[0], operands[1])); + DONE; + } + } + } +) + +(define_expand "rotl3" + [(set (match_operand:GPI 0 "register_operand") + (rotatert:GPI (match_operand:GPI 1 "register_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "" + { + /* (SZ - cnt) % SZ == -cnt % SZ */ + if (CONST_INT_P (operands[2])) + { + operands[2] = GEN_INT ((-INTVAL (operands[2])) + & (GET_MODE_BITSIZE (mode) - 1)); + if (operands[2] == const0_rtx) + { + emit_insn (gen_mov (operands[0], operands[1])); + DONE; + } + } + else + operands[2] = expand_simple_unop (QImode, NEG, operands[2], + NULL_RTX, 1); + } +) + +;; Logical left shift using SISD or Integer instruction +(define_insn "*aarch64_ashl_sisd_or_int_3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (ashift:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] + "" + "@ + shl\t%0, %1, %2 + ushl\t%0, %1, %2 + lsl\t%0, %1, %2" + [(set_attr "simd" "yes,yes,no") + (set_attr "type" "neon_shift_imm, neon_shift_reg,shift_reg")] +) + +;; Logical right shift using SISD or Integer instruction +(define_insn "*aarch64_lshr_sisd_or_int_3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (lshiftrt:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] + "" + "@ + ushr\t%0, %1, %2 + # + lsr\t%0, %1, %2" + [(set_attr "simd" "yes,yes,no") + (set_attr "type" "neon_shift_imm,neon_shift_reg,shift_reg")] +) + +(define_split + [(set (match_operand:DI 0 "aarch64_simd_register") + (lshiftrt:DI + (match_operand:DI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))] + "" +) + +(define_split + [(set (match_operand:SI 0 "aarch64_simd_register") + (lshiftrt:SI + (match_operand:SI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))] + "" +) + +;; Arithmetic right shift using SISD or Integer instruction +(define_insn "*aarch64_ashr_sisd_or_int_3" + [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + (ashiftrt:GPI + (match_operand:GPI 1 "register_operand" "w,w,r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,rUs")))] + "" + "@ + sshr\t%0, %1, %2 + # + asr\t%0, %1, %2" + [(set_attr "simd" "yes,yes,no") + (set_attr "type" "neon_shift_imm,neon_shift_reg,shift_reg")] +) + +(define_split + [(set (match_operand:DI 0 "aarch64_simd_register") + (ashiftrt:DI + (match_operand:DI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))] + "" +) + +(define_split + [(set (match_operand:SI 0 "aarch64_simd_register") + (ashiftrt:SI + (match_operand:SI 1 "aarch64_simd_register") + (match_operand:QI 2 "aarch64_simd_register")))] + "TARGET_SIMD && reload_completed" + [(set (match_dup 2) + (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG)) + (set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))] + "" +) + +(define_insn "*aarch64_sisd_ushl" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SISD_USHL))] + "TARGET_SIMD" + "ushl\t%d0, %d1, %d2" + [(set_attr "simd" "yes") + (set_attr "type" "neon_shift_reg")] +) + +(define_insn "*aarch64_ushl_2s" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_USHL_2S))] + "TARGET_SIMD" + "ushl\t%0.2s, %1.2s, %2.2s" + [(set_attr "simd" "yes") + (set_attr "type" "neon_shift_reg")] +) + +(define_insn "*aarch64_sisd_sshl" + [(set (match_operand:DI 0 "register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SISD_SSHL))] + "TARGET_SIMD" + "sshl\t%d0, %d1, %d2" + [(set_attr "simd" "yes") + (set_attr "type" "neon_shift_reg")] +) + +(define_insn "*aarch64_sshl_2s" + [(set (match_operand:SI 0 "register_operand" "=w") + (unspec:SI [(match_operand:SI 1 "register_operand" "w") + (match_operand:QI 2 "register_operand" "w")] + UNSPEC_SSHL_2S))] + "TARGET_SIMD" + "sshl\t%0.2s, %1.2s, %2.2s" + [(set_attr "simd" "yes") + (set_attr "type" "neon_shift_reg")] +) + +(define_insn "*aarch64_sisd_neg_qi" + [(set (match_operand:QI 0 "register_operand" "=w") + (unspec:QI [(match_operand:QI 1 "register_operand" "w")] + UNSPEC_SISD_NEG))] + "TARGET_SIMD" + "neg\t%d0, %d1" + [(set_attr "simd" "yes") + (set_attr "type" "neon_neg")] +) + +;; Rotate right +(define_insn "*ror3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (rotatert:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs")))] + "" + "ror\\t%0, %1, %2" + [(set_attr "type" "shift_reg")] +) + +;; zero_extend version of above +(define_insn "*si3_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss"))))] + "" + "\\t%w0, %w1, %w2" + [(set_attr "type" "shift_reg")] +) + +(define_insn "*ashl3_insn" + [(set (match_operand:SHORT 0 "register_operand" "=r") + (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss")))] + "" + "lsl\\t%0, %1, %2" + [(set_attr "type" "shift_reg")] +) + +(define_insn "*3_insn" + [(set (match_operand:SHORT 0 "register_operand" "=r") + (ASHIFT:SHORT (match_operand:SHORT 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n")))] + "UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)" +{ + operands[3] = GEN_INT ( - UINTVAL (operands[2])); + return "\t%w0, %w1, %2, %3"; +} + [(set_attr "type" "bfm")] +) + +(define_insn "*extr5_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 3 "const_int_operand" "n")) + (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r") + (match_operand 4 "const_int_operand" "n"))))] + "UINTVAL (operands[3]) < GET_MODE_BITSIZE (mode) && + (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (mode))" + "extr\\t%0, %1, %2, %4" + [(set_attr "type" "shift_imm")] +) + +;; zero_extend version of the above +(define_insn "*extrsi5_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 3 "const_int_operand" "n")) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 4 "const_int_operand" "n")))))] + "UINTVAL (operands[3]) < 32 && + (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" + "extr\\t%w0, %w1, %w2, %4" + [(set_attr "type" "shift_imm")] +) + +(define_insn "*ror3_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (rotate:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n")))] + "UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)" +{ + operands[3] = GEN_INT ( - UINTVAL (operands[2])); + return "ror\\t%0, %1, %3"; +} + [(set_attr "type" "shift_imm")] +) + +;; zero_extend version of the above +(define_insn "*rorsi3_insn_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n"))))] + "UINTVAL (operands[2]) < 32" +{ + operands[3] = GEN_INT (32 - UINTVAL (operands[2])); + return "ror\\t%w0, %w1, %3"; +} + [(set_attr "type" "shift_imm")] +) + +(define_insn "*_ashl" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ANY_EXTEND:GPI + (ashift:SHORT (match_operand:SHORT 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n"))))] + "UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)" +{ + operands[3] = GEN_INT ( - UINTVAL (operands[2])); + return "bfiz\t%0, %1, %2, %3"; +} + [(set_attr "type" "bfm")] +) + +(define_insn "*zero_extend_lshr" + [(set (match_operand:GPI 0 "register_operand" "=r") + (zero_extend:GPI + (lshiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n"))))] + "UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)" +{ + operands[3] = GEN_INT ( - UINTVAL (operands[2])); + return "ubfx\t%0, %1, %2, %3"; +} + [(set_attr "type" "bfm")] +) + +(define_insn "*extend_ashr" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI + (ashiftrt:SHORT (match_operand:SHORT 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n"))))] + "UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)" +{ + operands[3] = GEN_INT ( - UINTVAL (operands[2])); + return "sbfx\\t%0, %1, %2, %3"; +} + [(set_attr "type" "bfm")] +) + +;; ------------------------------------------------------------------- +;; Bitfields +;; ------------------------------------------------------------------- + +(define_expand "" + [(set (match_operand:DI 0 "register_operand" "=r") + (ANY_EXTRACT:DI (match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n") + (match_operand 3 "const_int_operand" "n")))] + "" + "" +) + +(define_insn "*" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n") + (match_operand 3 "const_int_operand" "n")))] + "" + "bfx\\t%0, %1, %3, %2" + [(set_attr "type" "bfm")] +) + +;; Bitfield Insert (insv) +(define_expand "insv" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) + (match_operand:GPI 3 "general_operand"))] + "" +{ + unsigned HOST_WIDE_INT width = UINTVAL (operands[1]); + unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]); + rtx value = operands[3]; + + if (width == 0 || (pos + width) > GET_MODE_BITSIZE (mode)) + FAIL; + + if (CONST_INT_P (value)) + { + unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1; + + /* Prefer AND/OR for inserting all zeros or all ones. */ + if ((UINTVAL (value) & mask) == 0 + || (UINTVAL (value) & mask) == mask) + FAIL; + + /* 16-bit aligned 16-bit wide insert is handled by insv_imm. */ + if (width == 16 && (pos % 16) == 0) + DONE; + } + operands[3] = force_reg (mode, value); +}) + +(define_insn "*insv_reg" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (match_operand:GPI 3 "register_operand" "r"))] + "!(UINTVAL (operands[1]) == 0 + || (UINTVAL (operands[2]) + UINTVAL (operands[1]) + > GET_MODE_BITSIZE (mode)))" + "bfi\\t%0, %3, %2, %1" + [(set_attr "type" "bfm")] +) + +(define_insn "*extr_insv_lower_reg" + [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") + (match_operand 1 "const_int_operand" "n") + (const_int 0)) + (zero_extract:GPI (match_operand:GPI 2 "register_operand" "+r") + (match_dup 1) + (match_operand 3 "const_int_operand" "n")))] + "!(UINTVAL (operands[1]) == 0 + || (UINTVAL (operands[3]) + UINTVAL (operands[1]) + > GET_MODE_BITSIZE (mode)))" + "bfxil\\t%0, %2, %3, %1" + [(set_attr "type" "bfm")] +) + +(define_insn "*_shft_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ashift:GPI (ANY_EXTEND:GPI + (match_operand:ALLX 1 "register_operand" "r")) + (match_operand 2 "const_int_operand" "n")))] + "UINTVAL (operands[2]) < " +{ + operands[3] = ( <= ( - UINTVAL (operands[2]))) + ? GEN_INT () + : GEN_INT ( - UINTVAL (operands[2])); + return "bfiz\t%0, %1, %2, %3"; +} + [(set_attr "type" "bfm")] +) + +;; XXX We should match (any_extend (ashift)) here, like (and (ashift)) below + +(define_insn "*andim_ashift_bfiz" + [(set (match_operand:GPI 0 "register_operand" "=r") + (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n")) + (match_operand 3 "const_int_operand" "n")))] + "exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0 + && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0" + "ubfiz\\t%0, %1, %2, %P3" + [(set_attr "type" "bfm")] +) + +(define_insn "bswap2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (bswap:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "rev\\t%0, %1" + [(set_attr "type" "rev")] +) + +(define_insn "bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (bswap:HI (match_operand:HI 1 "register_operand" "r")))] + "" + "rev16\\t%w0, %w1" + [(set_attr "type" "rev")] +) + +;; zero_extend version of above +(define_insn "*bswapsi2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (bswap:SI (match_operand:SI 1 "register_operand" "r"))))] + "" + "rev\\t%w0, %w1" + [(set_attr "type" "rev")] +) + +;; ------------------------------------------------------------------- +;; Floating-point intrinsics +;; ------------------------------------------------------------------- + +;; frint floating-point round to integral standard patterns. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round. + +(define_insn "2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FRINT))] + "TARGET_FLOAT" + "frint\\t%0, %1" + [(set_attr "type" "f_rint")] +) + +;; frcvt floating-point round to integer and convert standard patterns. +;; Expands to lbtrunc, lceil, lfloor, lround. +(define_insn "l2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FCVT)))] + "TARGET_FLOAT" + "fcvt\\t%0, %1" + [(set_attr "type" "f_cvtf2i")] +) + +;; fma - no throw + +(define_insn "fma4" + [(set (match_operand:GPF 0 "register_operand" "=w") + (fma:GPF (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w") + (match_operand:GPF 3 "register_operand" "w")))] + "TARGET_FLOAT" + "fmadd\\t%0, %1, %2, %3" + [(set_attr "type" "fmac")] +) + +(define_insn "fnma4" + [(set (match_operand:GPF 0 "register_operand" "=w") + (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w")) + (match_operand:GPF 2 "register_operand" "w") + (match_operand:GPF 3 "register_operand" "w")))] + "TARGET_FLOAT" + "fmsub\\t%0, %1, %2, %3" + [(set_attr "type" "fmac")] +) + +(define_insn "fms4" + [(set (match_operand:GPF 0 "register_operand" "=w") + (fma:GPF (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w") + (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))] + "TARGET_FLOAT" + "fnmsub\\t%0, %1, %2, %3" + [(set_attr "type" "fmac")] +) + +(define_insn "fnms4" + [(set (match_operand:GPF 0 "register_operand" "=w") + (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w")) + (match_operand:GPF 2 "register_operand" "w") + (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))] + "TARGET_FLOAT" + "fnmadd\\t%0, %1, %2, %3" + [(set_attr "type" "fmac")] +) + +;; If signed zeros are ignored, -(a * b + c) = -a * b - c. +(define_insn "*fnmadd4" + [(set (match_operand:GPF 0 "register_operand" "=w") + (neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w") + (match_operand:GPF 3 "register_operand" "w"))))] + "!HONOR_SIGNED_ZEROS (mode) && TARGET_FLOAT" + "fnmadd\\t%0, %1, %2, %3" + [(set_attr "type" "fmac")] +) + +;; ------------------------------------------------------------------- +;; Floating-point conversions +;; ------------------------------------------------------------------- + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=w") + (float_extend:DF (match_operand:SF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvt\\t%d0, %s1" + [(set_attr "type" "f_cvt")] +) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=w") + (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvt\\t%s0, %d1" + [(set_attr "type" "f_cvt")] +) + +(define_insn "fix_trunc2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (fix:GPI (match_operand:GPF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvtzs\\t%0, %1" + [(set_attr "type" "f_cvtf2i")] +) + +(define_insn "fixuns_trunc2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvtzu\\t%0, %1" + [(set_attr "type" "f_cvtf2i")] +) + +(define_insn "float2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (float:GPF (match_operand:GPI 1 "register_operand" "r")))] + "TARGET_FLOAT" + "scvtf\\t%0, %1" + [(set_attr "type" "f_cvti2f")] +) + +(define_insn "floatuns2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unsigned_float:GPF (match_operand:GPI 1 "register_operand" "r")))] + "TARGET_FLOAT" + "ucvtf\\t%0, %1" + [(set_attr "type" "f_cvt")] +) + +;; ------------------------------------------------------------------- +;; Floating-point arithmetic +;; ------------------------------------------------------------------- + +(define_insn "add3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (plus:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fadd\\t%0, %1, %2" + [(set_attr "type" "fadd")] +) + +(define_insn "sub3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (minus:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fsub\\t%0, %1, %2" + [(set_attr "type" "fadd")] +) + +(define_insn "mul3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (mult:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fmul\\t%0, %1, %2" + [(set_attr "type" "fmul")] +) + +(define_insn "*fnmul3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (mult:GPF + (neg:GPF (match_operand:GPF 1 "register_operand" "w")) + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fnmul\\t%0, %1, %2" + [(set_attr "type" "fmul")] +) + +(define_insn "div3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (div:GPF + (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fdiv\\t%0, %1, %2" + [(set_attr "type" "fdiv")] +) + +(define_insn "neg2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (neg:GPF (match_operand:GPF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fneg\\t%0, %1" + [(set_attr "type" "ffarith")] +) + +(define_insn "sqrt2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fsqrt\\t%0, %1" + [(set_attr "type" "fsqrt")] +) + +(define_insn "abs2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (abs:GPF (match_operand:GPF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fabs\\t%0, %1" + [(set_attr "type" "ffarith")] +) + +;; Given that smax/smin do not specify the result when either input is NaN, +;; we could use either FMAXNM or FMAX for smax, and either FMINNM or FMIN +;; for smin. + +(define_insn "smax3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (smax:GPF (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fmaxnm\\t%0, %1, %2" + [(set_attr "type" "f_minmax")] +) + +(define_insn "smin3" + [(set (match_operand:GPF 0 "register_operand" "=w") + (smin:GPF (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] + "TARGET_FLOAT" + "fminnm\\t%0, %1, %2" + [(set_attr "type" "f_minmax")] +) + +;; ------------------------------------------------------------------- +;; Reload support +;; ------------------------------------------------------------------- + +(define_expand "aarch64_reload_mov" + [(set (match_operand:TX 0 "register_operand" "=w") + (match_operand:TX 1 "register_operand" "w")) + (clobber (match_operand:DI 2 "register_operand" "=&r")) + ] + "" + { + rtx op0 = simplify_gen_subreg (TImode, operands[0], mode, 0); + rtx op1 = simplify_gen_subreg (TImode, operands[1], mode, 0); + gen_aarch64_movtilow_tilow (op0, op1); + gen_aarch64_movdi_tihigh (operands[2], op1); + gen_aarch64_movtihigh_di (op0, operands[2]); + DONE; + } +) + +;; The following secondary reload helpers patterns are invoked +;; after or during reload as we don't want these patterns to start +;; kicking in during the combiner. + +(define_insn "aarch64_movdi_low" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI (match_operand:TX 1 "register_operand" "w")))] + "reload_completed || reload_in_progress" + "fmov\\t%x0, %d1" + [(set_attr "type" "f_mrc") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_movdi_high" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TX (match_operand:TX 1 "register_operand" "w") + (const_int 64))))] + "reload_completed || reload_in_progress" + "fmov\\t%x0, %1.d[1]" + [(set_attr "type" "f_mrc") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_movhigh_di" + [(set (zero_extract:TX (match_operand:TX 0 "register_operand" "+w") + (const_int 64) (const_int 64)) + (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))] + "reload_completed || reload_in_progress" + "fmov\\t%0.d[1], %x1" + [(set_attr "type" "f_mcr") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_movlow_di" + [(set (match_operand:TX 0 "register_operand" "=w") + (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))] + "reload_completed || reload_in_progress" + "fmov\\t%d0, %x1" + [(set_attr "type" "f_mcr") + (set_attr "length" "4") + ]) + +(define_insn "aarch64_movtilow_tilow" + [(set (match_operand:TI 0 "register_operand" "=w") + (zero_extend:TI + (truncate:DI (match_operand:TI 1 "register_operand" "w"))))] + "reload_completed || reload_in_progress" + "fmov\\t%d0, %d1" + [(set_attr "type" "f_mcr") + (set_attr "length" "4") + ]) + +;; There is a deliberate reason why the parameters of high and lo_sum's +;; don't have modes for ADRP and ADD instructions. This is to allow high +;; and lo_sum's to be used with the labels defining the jump tables in +;; rodata section. + +(define_expand "add_losym" + [(set (match_operand 0 "register_operand" "=r") + (lo_sum (match_operand 1 "register_operand" "r") + (match_operand 2 "aarch64_valid_symref" "S")))] + "" +{ + enum machine_mode mode = GET_MODE (operands[0]); + + emit_insn ((mode == DImode + ? gen_add_losym_di + : gen_add_losym_si) (operands[0], + operands[1], + operands[2])); + DONE; +}) + +(define_insn "add_losym_" + [(set (match_operand:P 0 "register_operand" "=r") + (lo_sum:P (match_operand:P 1 "register_operand" "r") + (match_operand 2 "aarch64_valid_symref" "S")))] + "" + "add\\t%0, %1, :lo12:%a2" + [(set_attr "type" "alu_reg")] +) + +(define_insn "ldr_got_small_" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(mem:PTR (lo_sum:PTR + (match_operand:PTR 1 "register_operand" "r") + (match_operand:PTR 2 "aarch64_valid_symref" "S")))] + UNSPEC_GOTSMALLPIC))] + "" + "ldr\\t%0, [%1, #:got_lo12:%a2]" + [(set_attr "type" "load1")] +) + +(define_insn "ldr_got_small_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI [(mem:SI (lo_sum:DI + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "aarch64_valid_symref" "S")))] + UNSPEC_GOTSMALLPIC)))] + "TARGET_ILP32" + "ldr\\t%w0, [%1, #:got_lo12:%a2]" + [(set_attr "type" "load1")] +) + +(define_insn "ldr_got_tiny" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] + UNSPEC_GOTTINYPIC))] + "" + "ldr\\t%0, %L1" + [(set_attr "type" "load1")] +) + +(define_insn "aarch64_load_tp_hard" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_TLS))] + "" + "mrs\\t%0, tpidr_el0" + [(set_attr "type" "mrs")] +) + +;; The TLS ABI specifically requires that the compiler does not schedule +;; instructions in the TLS stubs, in order to enable linker relaxation. +;; Therefore we treat the stubs as an atomic sequence. +(define_expand "tlsgd_small" + [(parallel [(set (match_operand 0 "register_operand" "") + (call (mem:DI (match_dup 2)) (const_int 1))) + (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS) + (clobber (reg:DI LR_REGNUM))])] + "" +{ + operands[2] = aarch64_tls_get_addr (); +}) + +(define_insn "*tlsgd_small" + [(set (match_operand 0 "register_operand" "") + (call (mem:DI (match_operand:DI 2 "" "")) (const_int 1))) + (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) + (clobber (reg:DI LR_REGNUM)) + ] + "" + "adrp\\tx0, %A1\;add\\tx0, x0, %L1\;bl\\t%2\;nop" + [(set_attr "type" "call") + (set_attr "length" "16")]) + +(define_insn "tlsie_small" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")] + UNSPEC_GOTSMALLTLS))] + "" + "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" + [(set_attr "type" "load1") + (set_attr "length" "8")] +) + +(define_insn "tlsle_small" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "aarch64_tls_le_symref" "S")] + UNSPEC_GOTSMALLTLS))] + "" + "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" + [(set_attr "type" "alu_reg") + (set_attr "length" "8")] +) + +(define_insn "tlsdesc_small" + [(set (reg:DI R0_REGNUM) + (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] + UNSPEC_TLSDESC)) + (clobber (reg:DI LR_REGNUM)) + (clobber (match_scratch:DI 1 "=r"))] + "TARGET_TLS_DESC" + "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" + [(set_attr "type" "call") + (set_attr "length" "16")]) + +(define_insn "stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:DI 0 "register_operand" "rk") + (match_operand:DI 1 "register_operand" "rk")] + UNSPEC_PRLG_STK))] + "" + "" + [(set_attr "length" "0")] +) + +;; Named pattern for expanding thread pointer reference. +(define_expand "get_thread_pointerdi" + [(match_operand:DI 0 "register_operand" "=r")] + "" +{ + rtx tmp = aarch64_load_tp (operands[0]); + if (tmp != operands[0]) + emit_move_insn (operands[0], tmp); + DONE; +}) + +;; AdvSIMD Stuff +(include "aarch64-simd.md") + +;; Atomic Operations +(include "atomics.md") diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.opt b/gcc-4.9/gcc/config/aarch64/aarch64.opt new file mode 100644 index 000000000..f5a15b729 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/aarch64.opt @@ -0,0 +1,118 @@ +; Machine description for AArch64 architecture. +; Copyright (C) 2009-2014 Free Software Foundation, Inc. +; Contributed by ARM Ltd. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License as published by +; the Free Software Foundation; either version 3, or (at your option) +; any later version. +; +; GCC is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/aarch64/aarch64-opts.h + +; The TLS dialect names to use with -mtls-dialect. + +Enum +Name(tls_type) Type(enum aarch64_tls_type) +The possible TLS dialects: + +EnumValue +Enum(tls_type) String(trad) Value(TLS_TRADITIONAL) + +EnumValue +Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS) + +; The code model option names for -mcmodel. + +Enum +Name(cmodel) Type(enum aarch64_code_model) +The code model option names for -mcmodel: + +EnumValue +Enum(cmodel) String(tiny) Value(AARCH64_CMODEL_TINY) + +EnumValue +Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL) + +EnumValue +Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE) + +; The cpu/arch option names to use in cpu/arch selection. + +Variable +const char *aarch64_arch_string + +Variable +const char *aarch64_cpu_string + +Variable +const char *aarch64_tune_string + +mbig-endian +Target Report RejectNegative Mask(BIG_END) +Assume target CPU is configured as big endian + +mgeneral-regs-only +Target Report RejectNegative Mask(GENERAL_REGS_ONLY) +Generate code which uses only the general registers + +mlittle-endian +Target Report RejectNegative InverseMask(BIG_END) +Assume target CPU is configured as little endian + +mcmodel= +Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) +Specify the code model + +mstrict-align +Target Report RejectNegative Mask(STRICT_ALIGN) +Don't assume that unaligned accesses are handled by the system + +momit-leaf-frame-pointer +Target Report Save Var(flag_omit_leaf_frame_pointer) Init(1) +Omit the frame pointer in leaf functions + +mtls-dialect= +Target RejectNegative Joined Enum(tls_type) Var(aarch64_tls_dialect) Init(TLS_DESCRIPTORS) +Specify TLS dialect + +march= +Target RejectNegative ToLower Joined Var(aarch64_arch_string) +-march=ARCH Use features of architecture ARCH + +mcpu= +Target RejectNegative ToLower Joined Var(aarch64_cpu_string) +-mcpu=CPU Use features of and optimize for CPU + +mtune= +Target RejectNegative ToLower Joined Var(aarch64_tune_string) +-mtune=CPU Optimize for CPU + +mabi= +Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI_DEFAULT) +-mabi=ABI Generate code that conforms to the specified ABI + +mlra +Target Report Var(aarch64_lra_flag) Init(1) Save +Use LRA instead of reload (transitional) + +Enum +Name(aarch64_abi) Type(int) +Known AArch64 ABIs (for use with the -mabi= option): + +EnumValue +Enum(aarch64_abi) String(ilp32) Value(AARCH64_ABI_ILP32) + +EnumValue +Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64) diff --git a/gcc-4.9/gcc/config/aarch64/arm_neon.h b/gcc-4.9/gcc/config/aarch64/arm_neon.h new file mode 100644 index 000000000..747a292ba --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/arm_neon.h @@ -0,0 +1,25403 @@ +/* ARM NEON intrinsics include file. + + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _AARCH64_NEON_H_ +#define _AARCH64_NEON_H_ + +#include + +#define __AARCH64_UINT64_C(__C) ((uint64_t) __C) +#define __AARCH64_INT64_C(__C) ((int64_t) __C) + +typedef __builtin_aarch64_simd_qi int8x8_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_hi int16x4_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_si int32x2_t + __attribute__ ((__vector_size__ (8))); +typedef int64_t int64x1_t; +typedef int32_t int32x1_t; +typedef int16_t int16x1_t; +typedef int8_t int8x1_t; +typedef double float64x1_t; +typedef __builtin_aarch64_simd_sf float32x2_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_poly8 poly8x8_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_poly16 poly16x4_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_uqi uint8x8_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_uhi uint16x4_t + __attribute__ ((__vector_size__ (8))); +typedef __builtin_aarch64_simd_usi uint32x2_t + __attribute__ ((__vector_size__ (8))); +typedef uint64_t uint64x1_t; +typedef uint32_t uint32x1_t; +typedef uint16_t uint16x1_t; +typedef uint8_t uint8x1_t; +typedef __builtin_aarch64_simd_qi int8x16_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_hi int16x8_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_si int32x4_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_di int64x2_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_sf float32x4_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_df float64x2_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_poly8 poly8x16_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_poly16 poly16x8_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_poly64 poly64x2_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_uqi uint8x16_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_uhi uint16x8_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_usi uint32x4_t + __attribute__ ((__vector_size__ (16))); +typedef __builtin_aarch64_simd_udi uint64x2_t + __attribute__ ((__vector_size__ (16))); + +typedef float float32_t; +typedef double float64_t; +typedef __builtin_aarch64_simd_poly8 poly8_t; +typedef __builtin_aarch64_simd_poly16 poly16_t; +typedef __builtin_aarch64_simd_poly64 poly64_t; +typedef __builtin_aarch64_simd_poly128 poly128_t; + +typedef struct int8x8x2_t +{ + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t +{ + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t +{ + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t +{ + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t +{ + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t +{ + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t +{ + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t +{ + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t +{ + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t +{ + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t +{ + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t +{ + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t +{ + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t +{ + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t +{ + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t +{ + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float32x2x2_t +{ + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t +{ + float32x4_t val[2]; +} float32x4x2_t; + +typedef struct float64x2x2_t +{ + float64x2_t val[2]; +} float64x2x2_t; + +typedef struct float64x1x2_t +{ + float64x1_t val[2]; +} float64x1x2_t; + +typedef struct poly8x8x2_t +{ + poly8x8_t val[2]; +} poly8x8x2_t; + +typedef struct poly8x16x2_t +{ + poly8x16_t val[2]; +} poly8x16x2_t; + +typedef struct poly16x4x2_t +{ + poly16x4_t val[2]; +} poly16x4x2_t; + +typedef struct poly16x8x2_t +{ + poly16x8_t val[2]; +} poly16x8x2_t; + +typedef struct int8x8x3_t +{ + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t +{ + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t +{ + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t +{ + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t +{ + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t +{ + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t +{ + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t +{ + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t +{ + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t +{ + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t +{ + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t +{ + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t +{ + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t +{ + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t +{ + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t +{ + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float32x2x3_t +{ + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t +{ + float32x4_t val[3]; +} float32x4x3_t; + +typedef struct float64x2x3_t +{ + float64x2_t val[3]; +} float64x2x3_t; + +typedef struct float64x1x3_t +{ + float64x1_t val[3]; +} float64x1x3_t; + +typedef struct poly8x8x3_t +{ + poly8x8_t val[3]; +} poly8x8x3_t; + +typedef struct poly8x16x3_t +{ + poly8x16_t val[3]; +} poly8x16x3_t; + +typedef struct poly16x4x3_t +{ + poly16x4_t val[3]; +} poly16x4x3_t; + +typedef struct poly16x8x3_t +{ + poly16x8_t val[3]; +} poly16x8x3_t; + +typedef struct int8x8x4_t +{ + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t +{ + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t +{ + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t +{ + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t +{ + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t +{ + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t +{ + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t +{ + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t +{ + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t +{ + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t +{ + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t +{ + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t +{ + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t +{ + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t +{ + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t +{ + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float32x2x4_t +{ + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t +{ + float32x4_t val[4]; +} float32x4x4_t; + +typedef struct float64x2x4_t +{ + float64x2_t val[4]; +} float64x2x4_t; + +typedef struct float64x1x4_t +{ + float64x1_t val[4]; +} float64x1x4_t; + +typedef struct poly8x8x4_t +{ + poly8x8_t val[4]; +} poly8x8x4_t; + +typedef struct poly8x16x4_t +{ + poly8x16_t val[4]; +} poly8x16x4_t; + +typedef struct poly16x4x4_t +{ + poly16x4_t val[4]; +} poly16x4x4_t; + +typedef struct poly16x8x4_t +{ + poly16x8_t val[4]; +} poly16x8x4_t; + +/* vget_lane internal macros. */ + +#define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \ + (__cast_ret \ + __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b)) + +#define __aarch64_vget_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v2sf, , , __a, __b) +#define __aarch64_vget_lane_f64(__a, __b) (__a) + +#define __aarch64_vget_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b) + +#define __aarch64_vget_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, , ,__a, __b) +#define __aarch64_vget_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, , ,__a, __b) +#define __aarch64_vget_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v2si, , ,__a, __b) +#define __aarch64_vget_lane_s64(__a, __b) (__a) + +#define __aarch64_vget_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b) +#define __aarch64_vget_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b) +#define __aarch64_vget_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b) +#define __aarch64_vget_lane_u64(__a, __b) (__a) + +#define __aarch64_vgetq_lane_f32(__a, __b) \ + __aarch64_vget_lane_any (v4sf, , , __a, __b) +#define __aarch64_vgetq_lane_f64(__a, __b) \ + __aarch64_vget_lane_any (v2df, , , __a, __b) + +#define __aarch64_vgetq_lane_p8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_p16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b) + +#define __aarch64_vgetq_lane_s8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, , ,__a, __b) +#define __aarch64_vgetq_lane_s16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, , ,__a, __b) +#define __aarch64_vgetq_lane_s32(__a, __b) \ + __aarch64_vget_lane_any (v4si, , ,__a, __b) +#define __aarch64_vgetq_lane_s64(__a, __b) \ + __aarch64_vget_lane_any (v2di, , ,__a, __b) + +#define __aarch64_vgetq_lane_u8(__a, __b) \ + __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b) +#define __aarch64_vgetq_lane_u16(__a, __b) \ + __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b) +#define __aarch64_vgetq_lane_u32(__a, __b) \ + __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b) +#define __aarch64_vgetq_lane_u64(__a, __b) \ + __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b) + +/* __aarch64_vdup_lane internal macros. */ +#define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \ + vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b)) + +#define __aarch64_vdup_lane_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, , , __a, __b) +#define __aarch64_vdup_lane_f64(__a, __b) (__a) +#define __aarch64_vdup_lane_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, , , __a, __b) +#define __aarch64_vdup_lane_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, , , __a, __b) +#define __aarch64_vdup_lane_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, , , __a, __b) +#define __aarch64_vdup_lane_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, , , __a, __b) +#define __aarch64_vdup_lane_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, , , __a, __b) +#define __aarch64_vdup_lane_s64(__a, __b) (__a) +#define __aarch64_vdup_lane_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, , , __a, __b) +#define __aarch64_vdup_lane_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, , , __a, __b) +#define __aarch64_vdup_lane_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, , , __a, __b) +#define __aarch64_vdup_lane_u64(__a, __b) (__a) + +/* __aarch64_vdup_laneq internal macros. */ +#define __aarch64_vdup_laneq_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, , q, __a, __b) +#define __aarch64_vdup_laneq_f64(__a, __b) \ + __aarch64_vdup_lane_any (f64, , q, __a, __b) +#define __aarch64_vdup_laneq_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, , q, __a, __b) +#define __aarch64_vdup_laneq_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, , q, __a, __b) +#define __aarch64_vdup_laneq_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, , q, __a, __b) +#define __aarch64_vdup_laneq_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, , q, __a, __b) +#define __aarch64_vdup_laneq_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, , q, __a, __b) +#define __aarch64_vdup_laneq_s64(__a, __b) \ + __aarch64_vdup_lane_any (s64, , q, __a, __b) +#define __aarch64_vdup_laneq_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, , q, __a, __b) +#define __aarch64_vdup_laneq_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, , q, __a, __b) +#define __aarch64_vdup_laneq_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, , q, __a, __b) +#define __aarch64_vdup_laneq_u64(__a, __b) \ + __aarch64_vdup_lane_any (u64, , q, __a, __b) + +/* __aarch64_vdupq_lane internal macros. */ +#define __aarch64_vdupq_lane_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, q, , __a, __b) +#define __aarch64_vdupq_lane_f64(__a, __b) (vdupq_n_f64 (__a)) +#define __aarch64_vdupq_lane_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, q, , __a, __b) +#define __aarch64_vdupq_lane_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, q, , __a, __b) +#define __aarch64_vdupq_lane_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, q, , __a, __b) +#define __aarch64_vdupq_lane_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, q, , __a, __b) +#define __aarch64_vdupq_lane_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, q, , __a, __b) +#define __aarch64_vdupq_lane_s64(__a, __b) (vdupq_n_s64 (__a)) +#define __aarch64_vdupq_lane_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, q, , __a, __b) +#define __aarch64_vdupq_lane_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, q, , __a, __b) +#define __aarch64_vdupq_lane_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, q, , __a, __b) +#define __aarch64_vdupq_lane_u64(__a, __b) (vdupq_n_u64 (__a)) + +/* __aarch64_vdupq_laneq internal macros. */ +#define __aarch64_vdupq_laneq_f32(__a, __b) \ + __aarch64_vdup_lane_any (f32, q, q, __a, __b) +#define __aarch64_vdupq_laneq_f64(__a, __b) \ + __aarch64_vdup_lane_any (f64, q, q, __a, __b) +#define __aarch64_vdupq_laneq_p8(__a, __b) \ + __aarch64_vdup_lane_any (p8, q, q, __a, __b) +#define __aarch64_vdupq_laneq_p16(__a, __b) \ + __aarch64_vdup_lane_any (p16, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s8(__a, __b) \ + __aarch64_vdup_lane_any (s8, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s16(__a, __b) \ + __aarch64_vdup_lane_any (s16, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s32(__a, __b) \ + __aarch64_vdup_lane_any (s32, q, q, __a, __b) +#define __aarch64_vdupq_laneq_s64(__a, __b) \ + __aarch64_vdup_lane_any (s64, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u8(__a, __b) \ + __aarch64_vdup_lane_any (u8, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u16(__a, __b) \ + __aarch64_vdup_lane_any (u16, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u32(__a, __b) \ + __aarch64_vdup_lane_any (u32, q, q, __a, __b) +#define __aarch64_vdupq_laneq_u64(__a, __b) \ + __aarch64_vdup_lane_any (u64, q, q, __a, __b) + +/* vadd */ +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vadd_f32 (float32x2_t __a, float32x2_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vadd_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vadd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vaddq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __a + __b; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vaddq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a + __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddl_high_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddl_high_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddl_high_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddw_s8 (int16x8_t __a, int8x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddw_s16 (int32x4_t __a, int16x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddw_s32 (int64x2_t __a, int32x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddw_u8 (uint16x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddw_u16 (uint32x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddw_u32 (uint64x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddw_high_s8 (int16x8_t __a, int8x16_t __b) +{ + return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddw_high_s16 (int32x4_t __a, int16x8_t __b) +{ + return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddw_high_s32 (int64x2_t __a, int32x4_t __b) +{ + return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrhadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrhadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrhadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrhaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrhaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrhaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vaddhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vaddhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vaddhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vraddhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vraddhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vraddhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a, + (int16x8_t) __b, + (int16x8_t) __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a, + (int32x4_t) __b, + (int32x4_t) __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a, + (int64x2_t) __b, + (int64x2_t) __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a, + (int16x8_t) __b, + (int16x8_t) __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a, + (int32x4_t) __b, + (int32x4_t) __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a, + (int64x2_t) __b, + (int64x2_t) __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdiv_f32 (float32x2_t __a, float32x2_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdiv_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdivq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __a / __b; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdivq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __a / __b; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmul_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a * __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a * __b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_f32 (float32x2_t __a, float32x2_t __b) +{ + return __a * __b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmul_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a * __b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a * __b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a * __b; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmul_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmulq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a * __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a * __b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __a * __b; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __a * __b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmulq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a * __b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a * __b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a * __b; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmulq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vand_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vand_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vand_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vand_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vand_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vand_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vand_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vand_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vandq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vandq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vandq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vandq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vandq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vandq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vandq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a & __b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vandq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a & __b; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vorr_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vorr_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vorr_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vorr_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vorr_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vorr_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vorr_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vorr_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vorrq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vorrq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vorrq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vorrq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vorrq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vorrq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vorrq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a | __b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vorrq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a | __b; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +veor_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +veor_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +veor_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +veor_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +veor_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +veor_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +veor_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +veor_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +veorq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +veorq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +veorq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +veorq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +veorq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +veorq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +veorq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +veorq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a ^ __b; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vbic_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vbic_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vbic_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vbic_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vbic_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vbic_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vbic_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vbic_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vbicq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vbicq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vbicq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vbicq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vbicq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vbicq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vbicq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vbicq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a & ~__b; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vorn_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vorn_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vorn_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vorn_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vorn_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vorn_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vorn_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vorn_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vornq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vornq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vornq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vornq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vornq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vornq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vornq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vornq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a | ~__b; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return __a - __b; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vsub_f32 (float32x2_t __a, float32x2_t __b) +{ + return __a - __b; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vsub_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsub_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __a - __b; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vsubq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __a - __b; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vsubq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return __a - __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubl_high_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubl_high_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubl_high_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubw_s8 (int16x8_t __a, int8x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubw_s16 (int32x4_t __a, int16x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubw_s32 (int64x2_t __a, int32x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubw_u8 (uint16x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubw_u16 (uint32x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubw_u32 (uint64x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubw_high_s8 (int16x8_t __a, int8x16_t __b) +{ + return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubw_high_s16 (int32x4_t __a, int16x8_t __b) +{ + return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubw_high_s32 (int64x2_t __a, int32x4_t __b) +{ + return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b) +{ + return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b) +{ + return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b) +{ + return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqadd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a, + (int64x1_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqaddq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqsub_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a, + (int64x1_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqsubq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqneg_s8 (int8x8_t __a) +{ + return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqneg_s16 (int16x4_t __a) +{ + return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqneg_s32 (int32x2_t __a) +{ + return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqnegq_s8 (int8x16_t __a) +{ + return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqnegq_s16 (int16x8_t __a) +{ + return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqnegq_s32 (int32x4_t __a) +{ + return (int32x4_t) __builtin_aarch64_sqnegv4si (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqabs_s8 (int8x8_t __a) +{ + return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqabs_s16 (int16x4_t __a) +{ + return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqabs_s32 (int32x2_t __a) +{ + return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqabsq_s8 (int8x16_t __a) +{ + return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqabsq_s16 (int16x8_t __a) +{ + return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqabsq_s32 (int32x4_t __a) +{ + return (int32x4_t) __builtin_aarch64_sqabsv4si (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcreate_s8 (uint64_t __a) +{ + return (int8x8_t) __a; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcreate_s16 (uint64_t __a) +{ + return (int16x4_t) __a; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcreate_s32 (uint64_t __a) +{ + return (int32x2_t) __a; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vcreate_s64 (uint64_t __a) +{ + return (int64x1_t) __a; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcreate_f32 (uint64_t __a) +{ + return (float32x2_t) __a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcreate_u8 (uint64_t __a) +{ + return (uint8x8_t) __a; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcreate_u16 (uint64_t __a) +{ + return (uint16x4_t) __a; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcreate_u32 (uint64_t __a) +{ + return (uint32x2_t) __a; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcreate_u64 (uint64_t __a) +{ + return (uint64x1_t) __a; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vcreate_f64 (uint64_t __a) +{ + return (float64x1_t) __builtin_aarch64_createdf (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcreate_p8 (uint64_t __a) +{ + return (poly8x8_t) __a; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vcreate_p16 (uint64_t __a) +{ + return (poly16x4_t) __a; +} + +/* vget_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_f32 (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vget_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_p16 (__a, __b); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vget_lane_s8 (int8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_s8 (__a, __b); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vget_lane_s16 (int16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_s16 (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vget_lane_s32 (int32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_s32 (__a, __b); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_s64 (__a, __b); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vget_lane_u8 (uint8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_u8 (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vget_lane_u16 (uint16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_u16 (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vget_lane_u32 (uint32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_u32 (__a, __b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) +{ + return __aarch64_vget_lane_u64 (__a, __b); +} + +/* vgetq_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) +{ + return __aarch64_vgetq_lane_f32 (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vgetq_lane_f64 (float64x2_t __a, const int __b) +{ + return __aarch64_vgetq_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p16 (__a, __b); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vgetq_lane_s8 (int8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s8 (__a, __b); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vgetq_lane_s16 (int16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s16 (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vgetq_lane_s32 (int32x4_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s32 (__a, __b); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s64 (__a, __b); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vgetq_lane_u8 (uint8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u8 (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vgetq_lane_u16 (uint16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u16 (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vgetq_lane_u32 (uint32x4_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u32 (__a, __b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vgetq_lane_u64 (uint64x2_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u64 (__a, __b); +} + +/* vreinterpret */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s8 (int8x8_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s16 (int16x4_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s32 (int32x2_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s64 (int64x1_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_f32 (float32x2_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u8 (uint8x8_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u16 (uint16x4_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u32 (uint32x2_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u64 (uint64x1_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_p16 (poly16x4_t __a) +{ + return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s8 (int8x16_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s16 (int16x8_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s32 (int32x4_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s64 (int64x2_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_f32 (float32x4_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u8 (uint8x16_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u16 (uint16x8_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) + __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u32 (uint32x4_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) + __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u64 (uint64x2_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) + __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p16 (poly16x8_t __a) +{ + return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) + __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s8 (int8x8_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s16 (int16x4_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s32 (int32x2_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s64 (int64x1_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_f32 (float32x2_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u8 (uint8x8_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u16 (uint16x4_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u32 (uint32x2_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u64 (uint64x1_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_p8 (poly8x8_t __a) +{ + return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s8 (int8x16_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s16 (int16x8_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s32 (int32x4_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s64 (int64x2_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_f32 (float32x4_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u8 (uint8x16_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u16 (uint16x8_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u32 (uint32x4_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u64 (uint64x2_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p8 (poly8x16_t __a) +{ + return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s8 (int8x8_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s16 (int16x4_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s32 (int32x2_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s64 (int64x1_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u8 (uint8x8_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u16 (uint16x4_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) + __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u32 (uint32x2_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t) + __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u64 (uint64x1_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p8 (poly8x8_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p16 (poly16x4_t __a) +{ + return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) + __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s8 (int8x16_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s16 (int16x8_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s32 (int32x4_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s64 (int64x2_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u8 (uint8x16_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u16 (uint16x8_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) + __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u32 (uint32x4_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t) + __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u64 (uint64x2_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t) + __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p8 (poly8x16_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p16 (poly16x8_t __a) +{ + return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) + __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s8 (int8x8_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s16 (int16x4_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s32 (int32x2_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_f32 (float32x2_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u8 (uint8x8_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u16 (uint16x4_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u32 (uint32x2_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u64 (uint64x1_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p8 (poly8x8_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p16 (poly16x4_t __a) +{ + return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s8 (int8x16_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s16 (int16x8_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s32 (int32x4_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_f32 (float32x4_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u8 (uint8x16_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u16 (uint16x8_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u32 (uint32x4_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u64 (uint64x2_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p8 (poly8x16_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p16 (poly16x8_t __a) +{ + return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s8 (int8x8_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s16 (int16x4_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s32 (int32x2_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s64 (int64x1_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_f32 (float32x2_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u8 (uint8x8_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u16 (uint16x4_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u32 (uint32x2_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p8 (poly8x8_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p16 (poly16x4_t __a) +{ + return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s8 (int8x16_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s16 (int16x8_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s32 (int32x4_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s64 (int64x2_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_f32 (float32x4_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u8 (uint8x16_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u16 (uint16x8_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u32 (uint32x4_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p8 (poly8x16_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p16 (poly16x8_t __a) +{ + return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s16 (int16x4_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s32 (int32x2_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s64 (int64x1_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_f32 (float32x2_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u8 (uint8x8_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u16 (uint16x4_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u32 (uint32x2_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u64 (uint64x1_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p8 (poly8x8_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p16 (poly16x4_t __a) +{ + return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s16 (int16x8_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s32 (int32x4_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s64 (int64x2_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_f32 (float32x4_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u8 (uint8x16_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u16 (uint16x8_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u32 (uint32x4_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u64 (uint64x2_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p8 (poly8x16_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p16 (poly16x8_t __a) +{ + return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s8 (int8x8_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s32 (int32x2_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s64 (int64x1_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_f32 (float32x2_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u8 (uint8x8_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u16 (uint16x4_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u32 (uint32x2_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u64 (uint64x1_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p8 (poly8x8_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p16 (poly16x4_t __a) +{ + return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s8 (int8x16_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s32 (int32x4_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s64 (int64x2_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_f32 (float32x4_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u8 (uint8x16_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u16 (uint16x8_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u32 (uint32x4_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u64 (uint64x2_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p8 (poly8x16_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p16 (poly16x8_t __a) +{ + return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s8 (int8x8_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s16 (int16x4_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s64 (int64x1_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_f32 (float32x2_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u8 (uint8x8_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u16 (uint16x4_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u32 (uint32x2_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u64 (uint64x1_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p8 (poly8x8_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p16 (poly16x4_t __a) +{ + return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s8 (int8x16_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s16 (int16x8_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s64 (int64x2_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_f32 (float32x4_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u8 (uint8x16_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u16 (uint16x8_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u32 (uint32x4_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u64 (uint64x2_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p8 (poly8x16_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p16 (poly16x8_t __a) +{ + return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s8 (int8x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s16 (int16x4_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s32 (int32x2_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s64 (int64x1_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_f32 (float32x2_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u16 (uint16x4_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u32 (uint32x2_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u64 (uint64x1_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p8 (poly8x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p16 (poly16x4_t __a) +{ + return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s8 (int8x16_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s16 (int16x8_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s32 (int32x4_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s64 (int64x2_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_f32 (float32x4_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u16 (uint16x8_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) + __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u32 (uint32x4_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) + __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u64 (uint64x2_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) + __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p8 (poly8x16_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p16 (poly16x8_t __a) +{ + return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) + __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s8 (int8x8_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s16 (int16x4_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s32 (int32x2_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s64 (int64x1_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_f32 (float32x2_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u8 (uint8x8_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u32 (uint32x2_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u64 (uint64x1_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p8 (poly8x8_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p16 (poly16x4_t __a) +{ + return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s8 (int8x16_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s16 (int16x8_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s32 (int32x4_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s64 (int64x2_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_f32 (float32x4_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u8 (uint8x16_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u32 (uint32x4_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u64 (uint64x2_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p8 (poly8x16_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p16 (poly16x8_t __a) +{ + return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s8 (int8x8_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s16 (int16x4_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s32 (int32x2_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s64 (int64x1_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_f32 (float32x2_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u8 (uint8x8_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u16 (uint16x4_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u64 (uint64x1_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p8 (poly8x8_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p16 (poly16x4_t __a) +{ + return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s8 (int8x16_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s16 (int16x8_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s32 (int32x4_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s64 (int64x2_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_f32 (float32x4_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u8 (uint8x16_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u16 (uint16x8_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u64 (uint64x2_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p8 (poly8x16_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) + __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p16 (poly16x8_t __a) +{ + return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); +} + +#define __GET_LOW(__TYPE) \ + uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ + uint64_t lo = vgetq_lane_u64 (tmp, 0); \ + return vreinterpret_##__TYPE##_u64 (lo); + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_low_f32 (float32x4_t __a) +{ + __GET_LOW (f32); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_low_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_low_p8 (poly8x16_t __a) +{ + __GET_LOW (p8); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_low_p16 (poly16x8_t __a) +{ + __GET_LOW (p16); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_low_s8 (int8x16_t __a) +{ + __GET_LOW (s8); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_low_s16 (int16x8_t __a) +{ + __GET_LOW (s16); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_low_s32 (int32x4_t __a) +{ + __GET_LOW (s32); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_low_s64 (int64x2_t __a) +{ + return vgetq_lane_s64 (__a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_low_u8 (uint8x16_t __a) +{ + __GET_LOW (u8); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_low_u16 (uint16x8_t __a) +{ + __GET_LOW (u16); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_low_u32 (uint32x4_t __a) +{ + __GET_LOW (u32); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_low_u64 (uint64x2_t __a) +{ + return vgetq_lane_u64 (__a, 0); +} + +#undef __GET_LOW + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcombine_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcombine_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcombine_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcombine_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x2_t) __builtin_aarch64_combinedi (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcombine_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcombine_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcombine_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcombine_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcombine_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a, + (int64x1_t) __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcombine_f64 (float64x1_t __a, float64x1_t __b) +{ + return (float64x2_t) __builtin_aarch64_combinedf (__a, __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcombine_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vcombine_p16 (poly16x4_t __a, poly16x4_t __b) +{ + return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +/* Start of temporary inline asm implementations. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) +{ + int8x8_t result; + __asm__ ("saba %0.8b,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) +{ + int16x4_t result; + __asm__ ("saba %0.4h,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) +{ + int32x2_t result; + __asm__ ("saba %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint8x8_t result; + __asm__ ("uaba %0.8b,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint16x4_t result; + __asm__ ("uaba %0.4h,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint32x2_t result; + __asm__ ("uaba %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) +{ + int16x8_t result; + __asm__ ("sabal2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) +{ + int32x4_t result; + __asm__ ("sabal2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) +{ + int64x2_t result; + __asm__ ("sabal2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) +{ + uint16x8_t result; + __asm__ ("uabal2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) +{ + uint32x4_t result; + __asm__ ("uabal2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) +{ + uint64x2_t result; + __asm__ ("uabal2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) +{ + int16x8_t result; + __asm__ ("sabal %0.8h,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) +{ + int32x4_t result; + __asm__ ("sabal %0.4s,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) +{ + int64x2_t result; + __asm__ ("sabal %0.2d,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint16x8_t result; + __asm__ ("uabal %0.8h,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint32x4_t result; + __asm__ ("uabal %0.4s,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint64x2_t result; + __asm__ ("uabal %0.2d,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) +{ + int8x16_t result; + __asm__ ("saba %0.16b,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) +{ + int16x8_t result; + __asm__ ("saba %0.8h,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) +{ + int32x4_t result; + __asm__ ("saba %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) +{ + uint8x16_t result; + __asm__ ("uaba %0.16b,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint16x8_t result; + __asm__ ("uaba %0.8h,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint32x4_t result; + __asm__ ("uaba %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabd_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("fabd %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabd_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("sabd %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabd_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("sabd %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabd_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("sabd %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vabd_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("uabd %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vabd_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("uabd %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vabd_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("uabd %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vabdd_f64 (float64_t a, float64_t b) +{ + float64_t result; + __asm__ ("fabd %d0, %d1, %d2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdl_high_s8 (int8x16_t a, int8x16_t b) +{ + int16x8_t result; + __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdl_high_s16 (int16x8_t a, int16x8_t b) +{ + int32x4_t result; + __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabdl_high_s32 (int32x4_t a, int32x4_t b) +{ + int64x2_t result; + __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdl_high_u8 (uint8x16_t a, uint8x16_t b) +{ + uint16x8_t result; + __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdl_high_u16 (uint16x8_t a, uint16x8_t b) +{ + uint32x4_t result; + __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabdl_high_u32 (uint32x4_t a, uint32x4_t b) +{ + uint64x2_t result; + __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdl_s8 (int8x8_t a, int8x8_t b) +{ + int16x8_t result; + __asm__ ("sabdl %0.8h, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdl_s16 (int16x4_t a, int16x4_t b) +{ + int32x4_t result; + __asm__ ("sabdl %0.4s, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabdl_s32 (int32x2_t a, int32x2_t b) +{ + int64x2_t result; + __asm__ ("sabdl %0.2d, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdl_u8 (uint8x8_t a, uint8x8_t b) +{ + uint16x8_t result; + __asm__ ("uabdl %0.8h, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdl_u16 (uint16x4_t a, uint16x4_t b) +{ + uint32x4_t result; + __asm__ ("uabdl %0.4s, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabdl_u32 (uint32x2_t a, uint32x2_t b) +{ + uint64x2_t result; + __asm__ ("uabdl %0.2d, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabdq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("fabd %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vabdq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("fabd %0.2d, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabdq_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("sabd %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdq_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("sabd %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdq_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("sabd %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vabdq_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("uabd %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdq_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("uabd %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdq_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("uabd %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vabds_f32 (float32_t a, float32_t b) +{ + float32_t result; + __asm__ ("fabd %s0, %s1, %s2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddlv_s8 (int8x8_t a) +{ + int16_t result; + __asm__ ("saddlv %h0,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddlv_s16 (int16x4_t a) +{ + int32_t result; + __asm__ ("saddlv %s0,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddlv_u8 (uint8x8_t a) +{ + uint16_t result; + __asm__ ("uaddlv %h0,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddlv_u16 (uint16x4_t a) +{ + uint32_t result; + __asm__ ("uaddlv %s0,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddlvq_s8 (int8x16_t a) +{ + int16_t result; + __asm__ ("saddlv %h0,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddlvq_s16 (int16x8_t a) +{ + int32_t result; + __asm__ ("saddlv %s0,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vaddlvq_s32 (int32x4_t a) +{ + int64_t result; + __asm__ ("saddlv %d0,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddlvq_u8 (uint8x16_t a) +{ + uint16_t result; + __asm__ ("uaddlv %h0,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddlvq_u16 (uint16x8_t a) +{ + uint32_t result; + __asm__ ("uaddlv %s0,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vaddlvq_u32 (uint32x4_t a) +{ + uint64_t result; + __asm__ ("uaddlv %d0,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcls_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("cls %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcls_s16 (int16x4_t a) +{ + int16x4_t result; + __asm__ ("cls %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcls_s32 (int32x2_t a) +{ + int32x2_t result; + __asm__ ("cls %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclsq_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("cls %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclsq_s16 (int16x8_t a) +{ + int16x8_t result; + __asm__ ("cls %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclsq_s32 (int32x4_t a) +{ + int32x4_t result; + __asm__ ("cls %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcnt_p8 (poly8x8_t a) +{ + poly8x8_t result; + __asm__ ("cnt %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcnt_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("cnt %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcnt_u8 (uint8x8_t a) +{ + uint8x8_t result; + __asm__ ("cnt %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcntq_p8 (poly8x16_t a) +{ + poly8x16_t result; + __asm__ ("cnt %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcntq_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("cnt %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcntq_u8 (uint8x16_t a) +{ + uint8x16_t result; + __asm__ ("cnt %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +#define vcopyq_lane_f32(a, b, c, d) \ + __extension__ \ + ({ \ + float32x4_t c_ = (c); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("ins %0.s[%2], %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_f64(a, b, c, d) \ + __extension__ \ + ({ \ + float64x2_t c_ = (c); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("ins %0.d[%2], %3.d[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_p8(a, b, c, d) \ + __extension__ \ + ({ \ + poly8x16_t c_ = (c); \ + poly8x16_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ins %0.b[%2], %3.b[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_p16(a, b, c, d) \ + __extension__ \ + ({ \ + poly16x8_t c_ = (c); \ + poly16x8_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ins %0.h[%2], %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_s8(a, b, c, d) \ + __extension__ \ + ({ \ + int8x16_t c_ = (c); \ + int8x16_t a_ = (a); \ + int8x16_t result; \ + __asm__ ("ins %0.b[%2], %3.b[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("ins %0.h[%2], %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("ins %0.s[%2], %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_s64(a, b, c, d) \ + __extension__ \ + ({ \ + int64x2_t c_ = (c); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("ins %0.d[%2], %3.d[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_u8(a, b, c, d) \ + __extension__ \ + ({ \ + uint8x16_t c_ = (c); \ + uint8x16_t a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ins %0.b[%2], %3.b[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ins %0.h[%2], %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ins %0.s[%2], %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcopyq_lane_u64(a, b, c, d) \ + __extension__ \ + ({ \ + uint64x2_t c_ = (c); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ins %0.d[%2], %3.d[%4]" \ + : "=w"(result) \ + : "0"(a_), "i"(b), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +/* vcvt_f16_f32 not supported */ + +/* vcvt_f32_f16 not supported */ + +/* vcvt_high_f16_f32 not supported */ + +/* vcvt_high_f32_f16 not supported */ + +static float32x2_t vdup_n_f32 (float32_t); + +#define vcvt_n_f32_s32(a, b) \ + __extension__ \ + ({ \ + int32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("scvtf %0.2s, %1.2s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvt_n_f32_u32(a, b) \ + __extension__ \ + ({ \ + uint32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("ucvtf %0.2s, %1.2s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvt_n_s32_f32(a, b) \ + __extension__ \ + ({ \ + float32x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvt_n_u32_f32(a, b) \ + __extension__ \ + ({ \ + float32x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtd_n_f64_s64(a, b) \ + __extension__ \ + ({ \ + int64_t a_ = (a); \ + float64_t result; \ + __asm__ ("scvtf %d0,%d1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtd_n_f64_u64(a, b) \ + __extension__ \ + ({ \ + uint64_t a_ = (a); \ + float64_t result; \ + __asm__ ("ucvtf %d0,%d1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtd_n_s64_f64(a, b) \ + __extension__ \ + ({ \ + float64_t a_ = (a); \ + int64_t result; \ + __asm__ ("fcvtzs %d0,%d1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtd_n_u64_f64(a, b) \ + __extension__ \ + ({ \ + float64_t a_ = (a); \ + uint64_t result; \ + __asm__ ("fcvtzu %d0,%d1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_f32_s32(a, b) \ + __extension__ \ + ({ \ + int32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("scvtf %0.4s, %1.4s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_f32_u32(a, b) \ + __extension__ \ + ({ \ + uint32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("ucvtf %0.4s, %1.4s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_f64_s64(a, b) \ + __extension__ \ + ({ \ + int64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("scvtf %0.2d, %1.2d, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_f64_u64(a, b) \ + __extension__ \ + ({ \ + uint64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("ucvtf %0.2d, %1.2d, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_s32_f32(a, b) \ + __extension__ \ + ({ \ + float32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_s64_f64(a, b) \ + __extension__ \ + ({ \ + float64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_u32_f32(a, b) \ + __extension__ \ + ({ \ + float32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvtq_n_u64_f64(a, b) \ + __extension__ \ + ({ \ + float64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvts_n_f32_s32(a, b) \ + __extension__ \ + ({ \ + int32_t a_ = (a); \ + float32_t result; \ + __asm__ ("scvtf %s0,%s1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvts_n_f32_u32(a, b) \ + __extension__ \ + ({ \ + uint32_t a_ = (a); \ + float32_t result; \ + __asm__ ("ucvtf %s0,%s1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvts_n_s32_f32(a, b) \ + __extension__ \ + ({ \ + float32_t a_ = (a); \ + int32_t result; \ + __asm__ ("fcvtzs %s0,%s1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vcvts_n_u32_f32(a, b) \ + __extension__ \ + ({ \ + float32_t a_ = (a); \ + uint32_t result; \ + __asm__ ("fcvtzu %s0,%s1,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvtx_f32_f64 (float64x2_t a) +{ + float32x2_t result; + __asm__ ("fcvtxn %0.2s,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) +{ + float32x4_t result; + __asm__ ("fcvtxn2 %0.4s,%1.2d" + : "=w"(result) + : "w" (b), "0"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvtxd_f32_f64 (float64_t a) +{ + float32_t result; + __asm__ ("fcvtxn %s0,%d1" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +#define vext_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x2_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x1_t b_ = (b); \ + float64x1_t a_ = (a); \ + float64x1_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x8_t b_ = (b); \ + poly8x8_t a_ = (a); \ + poly8x8_t result; \ + __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x4_t b_ = (b); \ + poly16x4_t a_ = (a); \ + poly16x4_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x8_t result; \ + __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x1_t b_ = (b); \ + int64x1_t a_ = (a); \ + int64x1_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x8_t result; \ + __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vext_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x1_t b_ = (b); \ + uint64x1_t a_ = (a); \ + uint64x1_t result; \ + __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8x16_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16x8_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x16_t b_ = (b); \ + int8x16_t a_ = (a); \ + int8x16_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x16_t b_ = (b); \ + uint8x16_t a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vextq_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) +{ + float32x2_t result; + __asm__ ("fmla %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) +{ + float32x4_t result; + __asm__ ("fmla %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) +{ + float64x2_t result; + __asm__ ("fmla %0.2d,%2.2d,%3.2d" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c) +{ + float32x2_t result; + __asm__ ("fmla %0.2s, %2.2s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) +{ + float32x4_t result; + __asm__ ("fmla %0.4s, %2.4s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) +{ + float64x2_t result; + __asm__ ("fmla %0.2d, %2.2d, %3.d[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c) +{ + float32x2_t result; + __asm__ ("fmls %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) +{ + float32x4_t result; + __asm__ ("fmls %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) +{ + float64x2_t result; + __asm__ ("fmls %0.2d,%2.2d,%3.2d" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t a) +{ + float32x2_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_high_f64 (float64x2_t a) +{ + float64x1_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t a) +{ + poly8x8_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t a) +{ + poly16x4_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t a) +{ + int8x8_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t a) +{ + int16x4_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t a) +{ + int32x2_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t a) +{ + int64x1_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t a) +{ + uint8x8_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t a) +{ + uint16x4_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t a) +{ + uint32x2_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t a) +{ + uint64x1_t result; + __asm__ ("ins %0.d[0], %1.d[1]" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhsub_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("shsub %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhsub_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("shsub %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhsub_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("shsub %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhsub_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("uhsub %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhsub_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("uhsub %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhsub_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("uhsub %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhsubq_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("shsub %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhsubq_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("shsub %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhsubq_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("shsub %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhsubq_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("uhsub %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhsubq_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("uhsub %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhsubq_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("uhsub %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_dup_f32 (const float32_t * a) +{ + float32x2_t result; + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vld1_dup_f64 (const float64_t * a) +{ + float64x1_t result; + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_dup_p8 (const poly8_t * a) +{ + poly8x8_t result; + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_dup_p16 (const poly16_t * a) +{ + poly16x4_t result; + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_dup_s8 (const int8_t * a) +{ + int8x8_t result; + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_dup_s16 (const int16_t * a) +{ + int16x4_t result; + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_dup_s32 (const int32_t * a) +{ + int32x2_t result; + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_dup_s64 (const int64_t * a) +{ + int64x1_t result; + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_dup_u8 (const uint8_t * a) +{ + uint8x8_t result; + __asm__ ("ld1r {%0.8b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_dup_u16 (const uint16_t * a) +{ + uint16x4_t result; + __asm__ ("ld1r {%0.4h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_dup_u32 (const uint32_t * a) +{ + uint32x2_t result; + __asm__ ("ld1r {%0.2s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_dup_u64 (const uint64_t * a) +{ + uint64x1_t result; + __asm__ ("ld1r {%0.1d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +#define vld1_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x2_t b_ = (b); \ + const float32_t * a_ = (a); \ + float32x2_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x1_t b_ = (b); \ + const float64_t * a_ = (a); \ + float64x1_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x8_t b_ = (b); \ + const poly8_t * a_ = (a); \ + poly8x8_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x4_t b_ = (b); \ + const poly16_t * a_ = (a); \ + poly16x4_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x8_t b_ = (b); \ + const int8_t * a_ = (a); \ + int8x8_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + const int16_t * a_ = (a); \ + int16x4_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + const int32_t * a_ = (a); \ + int32x2_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x1_t b_ = (b); \ + const int64_t * a_ = (a); \ + int64x1_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x8_t b_ = (b); \ + const uint8_t * a_ = (a); \ + uint8x8_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + const uint16_t * a_ = (a); \ + uint16x4_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + const uint32_t * a_ = (a); \ + uint32x2_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x1_t b_ = (b); \ + const uint64_t * a_ = (a); \ + uint64x1_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i" (c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_f32 (const float32_t * a) +{ + float32x4_t result; + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_f64 (const float64_t * a) +{ + float64x2_t result; + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_p8 (const poly8_t * a) +{ + poly8x16_t result; + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_p16 (const poly16_t * a) +{ + poly16x8_t result; + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_s8 (const int8_t * a) +{ + int8x16_t result; + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_s16 (const int16_t * a) +{ + int16x8_t result; + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_s32 (const int32_t * a) +{ + int32x4_t result; + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_s64 (const int64_t * a) +{ + int64x2_t result; + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_u8 (const uint8_t * a) +{ + uint8x16_t result; + __asm__ ("ld1r {%0.16b}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_u16 (const uint16_t * a) +{ + uint16x8_t result; + __asm__ ("ld1r {%0.8h}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_u32 (const uint32_t * a) +{ + uint32x4_t result; + __asm__ ("ld1r {%0.4s}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_u64 (const uint64_t * a) +{ + uint64x2_t result; + __asm__ ("ld1r {%0.2d}, %1" + : "=w"(result) + : "Utv"(*a) + : /* No clobbers */); + return result; +} + +#define vld1q_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + const float32_t * a_ = (a); \ + float32x4_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + const float64_t * a_ = (a); \ + float64x2_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + const poly8_t * a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + const poly16_t * a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x16_t b_ = (b); \ + const int8_t * a_ = (a); \ + int8x16_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + const int16_t * a_ = (a); \ + int16x8_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + const int32_t * a_ = (a); \ + int32x4_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + const int64_t * a_ = (a); \ + int64x2_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x16_t b_ = (b); \ + const uint8_t * a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ld1 {%0.b}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + const uint16_t * a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ld1 {%0.h}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + const uint32_t * a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ld1 {%0.s}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +#define vld1q_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + const uint64_t * a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ld1 {%0.d}[%1], %2" \ + : "=w"(result) \ + : "i"(c), "Utv"(*a_), "0"(b_) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) +{ + float32x2_t result; + float32x2_t t1; + __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) +{ + int16x4_t result; + __asm__ ("mla %0.4h,%2.4h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) +{ + int32x2_t result; + __asm__ ("mla %0.2s,%2.2s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) +{ + uint16x4_t result; + __asm__ ("mla %0.4h,%2.4h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) +{ + uint32x2_t result; + __asm__ ("mla %0.2s,%2.2s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) +{ + int8x8_t result; + __asm__ ("mla %0.8b, %2.8b, %3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) +{ + int16x4_t result; + __asm__ ("mla %0.4h, %2.4h, %3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) +{ + int32x2_t result; + __asm__ ("mla %0.2s, %2.2s, %3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint8x8_t result; + __asm__ ("mla %0.8b, %2.8b, %3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint16x4_t result; + __asm__ ("mla %0.4h, %2.4h, %3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint32x2_t result; + __asm__ ("mla %0.2s, %2.2s, %3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vmlal_high_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_lane_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_lane_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_lane_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_laneq_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_laneq_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_laneq_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_high_laneq_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) +{ + int32x4_t result; + __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) +{ + int64x2_t result; + __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) +{ + uint32x4_t result; + __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) +{ + uint64x2_t result; + __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) +{ + int16x8_t result; + __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) +{ + int32x4_t result; + __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) +{ + int64x2_t result; + __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) +{ + uint16x8_t result; + __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) +{ + uint32x4_t result; + __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) +{ + uint64x2_t result; + __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vmlal_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x4_t c_ = (c); \ + int16x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_lane_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x2_t c_ = (c); \ + int32x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_lane_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x4_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_lane_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x2_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_laneq_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_laneq_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_laneq_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlal_laneq_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) +{ + int32x4_t result; + __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) +{ + int64x2_t result; + __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) +{ + uint32x4_t result; + __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) +{ + uint64x2_t result; + __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) +{ + int16x8_t result; + __asm__ ("smlal %0.8h,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) +{ + int32x4_t result; + __asm__ ("smlal %0.4s,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) +{ + int64x2_t result; + __asm__ ("smlal %0.2d,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint16x8_t result; + __asm__ ("umlal %0.8h,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint32x4_t result; + __asm__ ("umlal %0.4s,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint64x2_t result; + __asm__ ("umlal %0.2d,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) +{ + float32x4_t result; + float32x4_t t1; + __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) +{ + float64x2_t result; + float64x2_t t1; + __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) +{ + int16x8_t result; + __asm__ ("mla %0.8h,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) +{ + int32x4_t result; + __asm__ ("mla %0.4s,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) +{ + uint16x8_t result; + __asm__ ("mla %0.8h,%2.8h,%3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) +{ + uint32x4_t result; + __asm__ ("mla %0.4s,%2.4s,%3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) +{ + int8x16_t result; + __asm__ ("mla %0.16b, %2.16b, %3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) +{ + int16x8_t result; + __asm__ ("mla %0.8h, %2.8h, %3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) +{ + int32x4_t result; + __asm__ ("mla %0.4s, %2.4s, %3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) +{ + uint8x16_t result; + __asm__ ("mla %0.16b, %2.16b, %3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint16x8_t result; + __asm__ ("mla %0.8h, %2.8h, %3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint32x4_t result; + __asm__ ("mla %0.4s, %2.4s, %3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) +{ + float32x2_t result; + float32x2_t t1; + __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) +{ + int16x4_t result; + __asm__ ("mls %0.4h, %2.4h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) +{ + int32x2_t result; + __asm__ ("mls %0.2s, %2.2s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) +{ + uint16x4_t result; + __asm__ ("mls %0.4h, %2.4h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) +{ + uint32x2_t result; + __asm__ ("mls %0.2s, %2.2s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) +{ + int8x8_t result; + __asm__ ("mls %0.8b,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) +{ + int16x4_t result; + __asm__ ("mls %0.4h,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) +{ + int32x2_t result; + __asm__ ("mls %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint8x8_t result; + __asm__ ("mls %0.8b,%2.8b,%3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint16x4_t result; + __asm__ ("mls %0.4h,%2.4h,%3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint32x2_t result; + __asm__ ("mls %0.2s,%2.2s,%3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vmlsl_high_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_lane_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_lane_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_lane_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_laneq_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x8_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_laneq_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x4_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_laneq_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x8_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_high_laneq_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x4_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) +{ + int32x4_t result; + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) +{ + int64x2_t result; + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) +{ + uint32x4_t result; + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) +{ + uint64x2_t result; + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) +{ + int16x8_t result; + __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) +{ + int32x4_t result; + __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) +{ + int64x2_t result; + __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) +{ + uint16x8_t result; + __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) +{ + uint32x4_t result; + __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) +{ + uint64x2_t result; + __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +#define vmlsl_lane_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x4_t c_ = (c); \ + int16x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_lane_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x2_t c_ = (c); \ + int32x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_lane_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x4_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_lane_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x2_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_laneq_s16(a, b, c, d) \ + __extension__ \ + ({ \ + int16x8_t c_ = (c); \ + int16x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_laneq_s32(a, b, c, d) \ + __extension__ \ + ({ \ + int32x4_t c_ = (c); \ + int32x2_t b_ = (b); \ + int64x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_laneq_u16(a, b, c, d) \ + __extension__ \ + ({ \ + uint16x8_t c_ = (c); \ + uint16x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmlsl_laneq_u32(a, b, c, d) \ + __extension__ \ + ({ \ + uint32x4_t c_ = (c); \ + uint32x2_t b_ = (b); \ + uint64x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) +{ + int32x4_t result; + __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) +{ + int64x2_t result; + __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) +{ + uint32x4_t result; + __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) +{ + uint64x2_t result; + __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) +{ + int16x8_t result; + __asm__ ("smlsl %0.8h, %2.8b, %3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) +{ + int32x4_t result; + __asm__ ("smlsl %0.4s, %2.4h, %3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) +{ + int64x2_t result; + __asm__ ("smlsl %0.2d, %2.2s, %3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) +{ + uint16x8_t result; + __asm__ ("umlsl %0.8h, %2.8b, %3.8b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) +{ + uint32x4_t result; + __asm__ ("umlsl %0.4s, %2.4h, %3.4h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) +{ + uint64x2_t result; + __asm__ ("umlsl %0.2d, %2.2s, %3.2s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) +{ + float32x4_t result; + float32x4_t t1; + __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c) +{ + float64x2_t result; + float64x2_t t1; + __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d" + : "=w"(result), "=w"(t1) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) +{ + int16x8_t result; + __asm__ ("mls %0.8h, %2.8h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) +{ + int32x4_t result; + __asm__ ("mls %0.4s, %2.4s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) +{ + uint16x8_t result; + __asm__ ("mls %0.8h, %2.8h, %3.h[0]" + : "=w"(result) + : "0"(a), "w"(b), "x"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) +{ + uint32x4_t result; + __asm__ ("mls %0.4s, %2.4s, %3.s[0]" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) +{ + int8x16_t result; + __asm__ ("mls %0.16b,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) +{ + int16x8_t result; + __asm__ ("mls %0.8h,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) +{ + int32x4_t result; + __asm__ ("mls %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) +{ + uint8x16_t result; + __asm__ ("mls %0.16b,%2.16b,%3.16b" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint16x8_t result; + __asm__ ("mls %0.8h,%2.8h,%3.8h" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint32x4_t result; + __asm__ ("mls %0.4s,%2.4s,%3.4s" + : "=w"(result) + : "0"(a), "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovl_high_s8 (int8x16_t a) +{ + int16x8_t result; + __asm__ ("sshll2 %0.8h,%1.16b,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovl_high_s16 (int16x8_t a) +{ + int32x4_t result; + __asm__ ("sshll2 %0.4s,%1.8h,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovl_high_s32 (int32x4_t a) +{ + int64x2_t result; + __asm__ ("sshll2 %0.2d,%1.4s,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovl_high_u8 (uint8x16_t a) +{ + uint16x8_t result; + __asm__ ("ushll2 %0.8h,%1.16b,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovl_high_u16 (uint16x8_t a) +{ + uint32x4_t result; + __asm__ ("ushll2 %0.4s,%1.8h,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovl_high_u32 (uint32x4_t a) +{ + uint64x2_t result; + __asm__ ("ushll2 %0.2d,%1.4s,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovl_s8 (int8x8_t a) +{ + int16x8_t result; + __asm__ ("sshll %0.8h,%1.8b,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovl_s16 (int16x4_t a) +{ + int32x4_t result; + __asm__ ("sshll %0.4s,%1.4h,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovl_s32 (int32x2_t a) +{ + int64x2_t result; + __asm__ ("sshll %0.2d,%1.2s,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovl_u8 (uint8x8_t a) +{ + uint16x8_t result; + __asm__ ("ushll %0.8h,%1.8b,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovl_u16 (uint16x4_t a) +{ + uint32x4_t result; + __asm__ ("ushll %0.4s,%1.4h,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovl_u32 (uint32x2_t a) +{ + uint64x2_t result; + __asm__ ("ushll %0.2d,%1.2s,#0" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmovn_high_s16 (int8x8_t a, int16x8_t b) +{ + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.16b,%1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovn_high_s32 (int16x4_t a, int32x4_t b) +{ + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.8h,%1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovn_high_s64 (int32x2_t a, int64x2_t b) +{ + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.4s,%1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmovn_high_u16 (uint8x8_t a, uint16x8_t b) +{ + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.16b,%1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovn_high_u32 (uint16x4_t a, uint32x4_t b) +{ + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.8h,%1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovn_high_u64 (uint32x2_t a, uint64x2_t b) +{ + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.4s,%1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmovn_s16 (int16x8_t a) +{ + int8x8_t result; + __asm__ ("xtn %0.8b,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmovn_s32 (int32x4_t a) +{ + int16x4_t result; + __asm__ ("xtn %0.4h,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmovn_s64 (int64x2_t a) +{ + int32x2_t result; + __asm__ ("xtn %0.2s,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmovn_u16 (uint16x8_t a) +{ + uint8x8_t result; + __asm__ ("xtn %0.8b,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmovn_u32 (uint32x4_t a) +{ + uint16x4_t result; + __asm__ ("xtn %0.4h,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmovn_u64 (uint64x2_t a) +{ + uint32x2_t result; + __asm__ ("xtn %0.2s,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_n_f32 (float32x2_t a, float32_t b) +{ + float32x2_t result; + __asm__ ("fmul %0.2s,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_n_s16 (int16x4_t a, int16_t b) +{ + int16x4_t result; + __asm__ ("mul %0.4h,%1.4h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_n_s32 (int32x2_t a, int32_t b) +{ + int32x2_t result; + __asm__ ("mul %0.2s,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_n_u16 (uint16x4_t a, uint16_t b) +{ + uint16x4_t result; + __asm__ ("mul %0.4h,%1.4h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_n_u32 (uint32x2_t a, uint32_t b) +{ + uint32x2_t result; + __asm__ ("mul %0.2s,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vmuld_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64_t a_ = (a); \ + float64_t result; \ + __asm__ ("fmul %d0,%d1,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x8_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x4_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x8_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_high_laneq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x4_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_n_s16 (int16x8_t a, int16_t b) +{ + int32x4_t result; + __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_n_s32 (int32x4_t a, int32_t b) +{ + int64x2_t result; + __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_n_u16 (uint16x8_t a, uint16_t b) +{ + uint32x4_t result; + __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_n_u32 (uint32x4_t a, uint32_t b) +{ + uint64x2_t result; + __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_high_p8 (poly8x16_t a, poly8x16_t b) +{ + poly16x8_t result; + __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_high_s8 (int8x16_t a, int8x16_t b) +{ + int16x8_t result; + __asm__ ("smull2 %0.8h,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_high_s16 (int16x8_t a, int16x8_t b) +{ + int32x4_t result; + __asm__ ("smull2 %0.4s,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_high_s32 (int32x4_t a, int32x4_t b) +{ + int64x2_t result; + __asm__ ("smull2 %0.2d,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_high_u8 (uint8x16_t a, uint8x16_t b) +{ + uint16x8_t result; + __asm__ ("umull2 %0.8h,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_high_u16 (uint16x8_t a, uint16x8_t b) +{ + uint32x4_t result; + __asm__ ("umull2 %0.4s,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_high_u32 (uint32x4_t a, uint32x4_t b) +{ + uint64x2_t result; + __asm__ ("umull2 %0.2d,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vmull_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16x4_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32x2_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ + : "=w"(result) \ + : "w"(a_), "x"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmull_laneq_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_n_s16 (int16x4_t a, int16_t b) +{ + int32x4_t result; + __asm__ ("smull %0.4s,%1.4h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_n_s32 (int32x2_t a, int32_t b) +{ + int64x2_t result; + __asm__ ("smull %0.2d,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_n_u16 (uint16x4_t a, uint16_t b) +{ + uint32x4_t result; + __asm__ ("umull %0.4s,%1.4h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_n_u32 (uint32x2_t a, uint32_t b) +{ + uint64x2_t result; + __asm__ ("umull %0.2d,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_p8 (poly8x8_t a, poly8x8_t b) +{ + poly16x8_t result; + __asm__ ("pmull %0.8h, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_s8 (int8x8_t a, int8x8_t b) +{ + int16x8_t result; + __asm__ ("smull %0.8h, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_s16 (int16x4_t a, int16x4_t b) +{ + int32x4_t result; + __asm__ ("smull %0.4s, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_s32 (int32x2_t a, int32x2_t b) +{ + int64x2_t result; + __asm__ ("smull %0.2d, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_u8 (uint8x8_t a, uint8x8_t b) +{ + uint16x8_t result; + __asm__ ("umull %0.8h, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_u16 (uint16x4_t a, uint16x4_t b) +{ + uint32x4_t result; + __asm__ ("umull %0.4s, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_u32 (uint32x2_t a, uint32x2_t b) +{ + uint64x2_t result; + __asm__ ("umull %0.2d, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_n_f32 (float32x4_t a, float32_t b) +{ + float32x4_t result; + __asm__ ("fmul %0.4s,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulq_n_f64 (float64x2_t a, float64_t b) +{ + float64x2_t result; + __asm__ ("fmul %0.2d,%1.2d,%2.d[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_n_s16 (int16x8_t a, int16_t b) +{ + int16x8_t result; + __asm__ ("mul %0.8h,%1.8h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_n_s32 (int32x4_t a, int32_t b) +{ + int32x4_t result; + __asm__ ("mul %0.4s,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_n_u16 (uint16x8_t a, uint16_t b) +{ + uint16x8_t result; + __asm__ ("mul %0.8h,%1.8h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_n_u32 (uint32x4_t a, uint32_t b) +{ + uint32x4_t result; + __asm__ ("mul %0.4s,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vmuls_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32_t a_ = (a); \ + float32_t result; \ + __asm__ ("fmul %s0,%s1,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("fmulx %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vmulx_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x2_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmulxd_f64 (float64_t a, float64_t b) +{ + float64_t result; + __asm__ ("fmulx %d0, %d1, %d2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("fmulx %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("fmulx %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vmulxq_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32x4_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vmulxq_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64x2_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \ + : "=w"(result) \ + : "w"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmulxs_f32 (float32_t a, float32_t b) +{ + float32_t result; + __asm__ ("fmulx %s0, %s1, %s2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmvn_p8 (poly8x8_t a) +{ + poly8x8_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmvn_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmvn_s16 (int16x4_t a) +{ + int16x4_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmvn_s32 (int32x2_t a) +{ + int32x2_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmvn_u8 (uint8x8_t a) +{ + uint8x8_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmvn_u16 (uint16x4_t a) +{ + uint16x4_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmvn_u32 (uint32x2_t a) +{ + uint32x2_t result; + __asm__ ("mvn %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmvnq_p8 (poly8x16_t a) +{ + poly8x16_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmvnq_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmvnq_s16 (int16x8_t a) +{ + int16x8_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmvnq_s32 (int32x4_t a) +{ + int32x4_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmvnq_u8 (uint8x16_t a) +{ + uint8x16_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmvnq_u16 (uint16x8_t a) +{ + uint16x8_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmvnq_u32 (uint32x4_t a) +{ + uint32x4_t result; + __asm__ ("mvn %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadal_s8 (int16x4_t a, int8x8_t b) +{ + int16x4_t result; + __asm__ ("sadalp %0.4h,%2.8b" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadal_s16 (int32x2_t a, int16x4_t b) +{ + int32x2_t result; + __asm__ ("sadalp %0.2s,%2.4h" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpadal_s32 (int64x1_t a, int32x2_t b) +{ + int64x1_t result; + __asm__ ("sadalp %0.1d,%2.2s" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadal_u8 (uint16x4_t a, uint8x8_t b) +{ + uint16x4_t result; + __asm__ ("uadalp %0.4h,%2.8b" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadal_u16 (uint32x2_t a, uint16x4_t b) +{ + uint32x2_t result; + __asm__ ("uadalp %0.2s,%2.4h" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpadal_u32 (uint64x1_t a, uint32x2_t b) +{ + uint64x1_t result; + __asm__ ("uadalp %0.1d,%2.2s" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpadalq_s8 (int16x8_t a, int8x16_t b) +{ + int16x8_t result; + __asm__ ("sadalp %0.8h,%2.16b" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpadalq_s16 (int32x4_t a, int16x8_t b) +{ + int32x4_t result; + __asm__ ("sadalp %0.4s,%2.8h" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpadalq_s32 (int64x2_t a, int32x4_t b) +{ + int64x2_t result; + __asm__ ("sadalp %0.2d,%2.4s" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpadalq_u8 (uint16x8_t a, uint8x16_t b) +{ + uint16x8_t result; + __asm__ ("uadalp %0.8h,%2.16b" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpadalq_u16 (uint32x4_t a, uint16x8_t b) +{ + uint32x4_t result; + __asm__ ("uadalp %0.4s,%2.8h" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpadalq_u32 (uint64x2_t a, uint32x4_t b) +{ + uint64x2_t result; + __asm__ ("uadalp %0.2d,%2.4s" + : "=w"(result) + : "0"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpadd_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("faddp %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_addpv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_addpv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_addpv2si (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpaddd_f64 (float64x2_t a) +{ + float64_t result; + __asm__ ("faddp %d0,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpaddl_s8 (int8x8_t a) +{ + int16x4_t result; + __asm__ ("saddlp %0.4h,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpaddl_s16 (int16x4_t a) +{ + int32x2_t result; + __asm__ ("saddlp %0.2s,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpaddl_s32 (int32x2_t a) +{ + int64x1_t result; + __asm__ ("saddlp %0.1d,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpaddl_u8 (uint8x8_t a) +{ + uint16x4_t result; + __asm__ ("uaddlp %0.4h,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpaddl_u16 (uint16x4_t a) +{ + uint32x2_t result; + __asm__ ("uaddlp %0.2s,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpaddl_u32 (uint32x2_t a) +{ + uint64x1_t result; + __asm__ ("uaddlp %0.1d,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpaddlq_s8 (int8x16_t a) +{ + int16x8_t result; + __asm__ ("saddlp %0.8h,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpaddlq_s16 (int16x8_t a) +{ + int32x4_t result; + __asm__ ("saddlp %0.4s,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpaddlq_s32 (int32x4_t a) +{ + int64x2_t result; + __asm__ ("saddlp %0.2d,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpaddlq_u8 (uint8x16_t a) +{ + uint16x8_t result; + __asm__ ("uaddlp %0.8h,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpaddlq_u16 (uint16x8_t a) +{ + uint32x4_t result; + __asm__ ("uaddlp %0.4s,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpaddlq_u32 (uint32x4_t a) +{ + uint64x2_t result; + __asm__ ("uaddlp %0.2d,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpaddq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("faddp %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpaddq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("faddp %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpaddq_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("addp %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpaddq_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("addp %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpaddq_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("addp %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpaddq_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("addp %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpaddq_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("addp %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpaddq_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("addp %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpaddq_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("addp %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpaddq_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("addp %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpadds_f32 (float32x2_t a) +{ + float32_t result; + __asm__ ("faddp %s0,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmax_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("fmaxp %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmax_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("smaxp %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmax_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("smaxp %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmax_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("smaxp %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmax_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("umaxp %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmax_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("umaxp %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmax_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("umaxp %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmaxnm_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpmaxnmq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpmaxnmq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpmaxnmqd_f64 (float64x2_t a) +{ + float64_t result; + __asm__ ("fmaxnmp %d0,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmaxnms_f32 (float32x2_t a) +{ + float32_t result; + __asm__ ("fmaxnmp %s0,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpmaxq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("fmaxp %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpmaxq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("fmaxp %0.2d, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpmaxq_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("smaxp %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpmaxq_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("smaxp %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpmaxq_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("smaxp %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpmaxq_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("umaxp %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpmaxq_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("umaxp %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpmaxq_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("umaxp %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpmaxqd_f64 (float64x2_t a) +{ + float64_t result; + __asm__ ("fmaxp %d0,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmaxs_f32 (float32x2_t a) +{ + float32_t result; + __asm__ ("fmaxp %s0,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmin_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("fminp %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmin_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("sminp %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmin_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("sminp %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmin_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("sminp %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmin_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("uminp %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmin_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("uminp %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmin_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("uminp %0.2s, %1.2s, %2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpminnm_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("fminnmp %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpminnmq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("fminnmp %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpminnmq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("fminnmp %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpminnmqd_f64 (float64x2_t a) +{ + float64_t result; + __asm__ ("fminnmp %d0,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpminnms_f32 (float32x2_t a) +{ + float32_t result; + __asm__ ("fminnmp %s0,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vpminq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("fminp %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vpminq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("fminp %0.2d, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vpminq_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("sminp %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpminq_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("sminp %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpminq_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("sminp %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vpminq_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("uminp %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpminq_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("uminp %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpminq_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("uminp %0.4s, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vpminqd_f64 (float64x2_t a) +{ + float64_t result; + __asm__ ("fminp %d0,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vpmins_f32 (float32x2_t a) +{ + float32_t result; + __asm__ ("fminp %s0,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_n_s16 (int16x4_t a, int16_t b) +{ + int16x4_t result; + __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_n_s32 (int32x2_t a, int32_t b) +{ + int32x2_t result; + __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s16 (int16x8_t a, int16_t b) +{ + int16x8_t result; + __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s32 (int32x4_t a, int32_t b) +{ + int32x4_t result; + __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqmovn_high_s16 (int8x8_t a, int16x8_t b) +{ + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.16b, %1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqmovn_high_s32 (int16x4_t a, int32x4_t b) +{ + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.8h, %1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqmovn_high_s64 (int32x2_t a, int64x2_t b) +{ + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.4s, %1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) +{ + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.16b, %1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) +{ + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.8h, %1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) +{ + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.4s, %1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqmovun_high_s16 (uint8x8_t a, int16x8_t b) +{ + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.16b, %1.8h" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqmovun_high_s32 (uint16x4_t a, int32x4_t b) +{ + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.8h, %1.4s" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqmovun_high_s64 (uint32x2_t a, int64x2_t b) +{ + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.4s, %1.2d" + : "+w"(result) + : "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s16 (int16x4_t a, int16_t b) +{ + int16x4_t result; + __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s32 (int32x2_t a, int32_t b) +{ + int32x2_t result; + __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_n_s16 (int16x8_t a, int16_t b) +{ + int16x8_t result; + __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" + : "=w"(result) + : "w"(a), "x"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_n_s32 (int32x4_t a, int32_t b) +{ + int32x4_t result; + __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vqrshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x16_t result = vcombine_s8 \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x8_t result = vcombine_s16 \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x4_t result = vcombine_s32 \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrn_high_n_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrun_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrun_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqrshrun_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x16_t result = vcombine_s8 \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x8_t result = vcombine_s16 \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x4_t result = vcombine_s32 \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrn_high_n_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrun_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrun_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vqshrun_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrbit_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("rbit %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrbit_u8 (uint8x8_t a) +{ + uint8x8_t result; + __asm__ ("rbit %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrbitq_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("rbit %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrbitq_u8 (uint8x16_t a) +{ + uint8x16_t result; + __asm__ ("rbit %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrecpe_u32 (uint32x2_t a) +{ + uint32x2_t result; + __asm__ ("urecpe %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrecpeq_u32 (uint32x4_t a) +{ + uint32x4_t result; + __asm__ ("urecpe %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev16_p8 (poly8x8_t a) +{ + poly8x8_t result; + __asm__ ("rev16 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev16_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("rev16 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev16_u8 (uint8x8_t a) +{ + uint8x8_t result; + __asm__ ("rev16 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev16q_p8 (poly8x16_t a) +{ + poly8x16_t result; + __asm__ ("rev16 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev16q_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("rev16 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev16q_u8 (uint8x16_t a) +{ + uint8x16_t result; + __asm__ ("rev16 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev32_p8 (poly8x8_t a) +{ + poly8x8_t result; + __asm__ ("rev32 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev32_p16 (poly16x4_t a) +{ + poly16x4_t result; + __asm__ ("rev32 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev32_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("rev32 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev32_s16 (int16x4_t a) +{ + int16x4_t result; + __asm__ ("rev32 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev32_u8 (uint8x8_t a) +{ + uint8x8_t result; + __asm__ ("rev32 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev32_u16 (uint16x4_t a) +{ + uint16x4_t result; + __asm__ ("rev32 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev32q_p8 (poly8x16_t a) +{ + poly8x16_t result; + __asm__ ("rev32 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev32q_p16 (poly16x8_t a) +{ + poly16x8_t result; + __asm__ ("rev32 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev32q_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("rev32 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev32q_s16 (int16x8_t a) +{ + int16x8_t result; + __asm__ ("rev32 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev32q_u8 (uint8x16_t a) +{ + uint8x16_t result; + __asm__ ("rev32 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev32q_u16 (uint16x8_t a) +{ + uint16x8_t result; + __asm__ ("rev32 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrev64_f32 (float32x2_t a) +{ + float32x2_t result; + __asm__ ("rev64 %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev64_p8 (poly8x8_t a) +{ + poly8x8_t result; + __asm__ ("rev64 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev64_p16 (poly16x4_t a) +{ + poly16x4_t result; + __asm__ ("rev64 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev64_s8 (int8x8_t a) +{ + int8x8_t result; + __asm__ ("rev64 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev64_s16 (int16x4_t a) +{ + int16x4_t result; + __asm__ ("rev64 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrev64_s32 (int32x2_t a) +{ + int32x2_t result; + __asm__ ("rev64 %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev64_u8 (uint8x8_t a) +{ + uint8x8_t result; + __asm__ ("rev64 %0.8b,%1.8b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev64_u16 (uint16x4_t a) +{ + uint16x4_t result; + __asm__ ("rev64 %0.4h,%1.4h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrev64_u32 (uint32x2_t a) +{ + uint32x2_t result; + __asm__ ("rev64 %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrev64q_f32 (float32x4_t a) +{ + float32x4_t result; + __asm__ ("rev64 %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev64q_p8 (poly8x16_t a) +{ + poly8x16_t result; + __asm__ ("rev64 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev64q_p16 (poly16x8_t a) +{ + poly16x8_t result; + __asm__ ("rev64 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev64q_s8 (int8x16_t a) +{ + int8x16_t result; + __asm__ ("rev64 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev64q_s16 (int16x8_t a) +{ + int16x8_t result; + __asm__ ("rev64 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrev64q_s32 (int32x4_t a) +{ + int32x4_t result; + __asm__ ("rev64 %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev64q_u8 (uint8x16_t a) +{ + uint8x16_t result; + __asm__ ("rev64 %0.16b,%1.16b" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev64q_u16 (uint16x8_t a) +{ + uint16x8_t result; + __asm__ ("rev64 %0.8h,%1.8h" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrev64q_u32 (uint32x4_t a) +{ + uint32x4_t result; + __asm__ ("rev64 %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +#define vrshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x16_t result = vcombine_s8 \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x8_t result = vcombine_s16 \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x4_t result = vcombine_s32 \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_high_n_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_high_n_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_high_n_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_n_s16(a, b) \ + __extension__ \ + ({ \ + int16x8_t a_ = (a); \ + int8x8_t result; \ + __asm__ ("rshrn %0.8b,%1.8h,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_n_s32(a, b) \ + __extension__ \ + ({ \ + int32x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("rshrn %0.4h,%1.4s,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_n_s64(a, b) \ + __extension__ \ + ({ \ + int64x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("rshrn %0.2s,%1.2d,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_n_u16(a, b) \ + __extension__ \ + ({ \ + uint16x8_t a_ = (a); \ + uint8x8_t result; \ + __asm__ ("rshrn %0.8b,%1.8h,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_n_u32(a, b) \ + __extension__ \ + ({ \ + uint32x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("rshrn %0.4h,%1.4s,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vrshrn_n_u64(a, b) \ + __extension__ \ + ({ \ + uint64x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("rshrn %0.2s,%1.2d,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrte_f32 (float32x2_t a) +{ + float32x2_t result; + __asm__ ("frsqrte %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrsqrte_f64 (float64x1_t a) +{ + float64x1_t result; + __asm__ ("frsqrte %d0,%d1" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsqrte_u32 (uint32x2_t a) +{ + uint32x2_t result; + __asm__ ("ursqrte %0.2s,%1.2s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrsqrted_f64 (float64_t a) +{ + float64_t result; + __asm__ ("frsqrte %d0,%d1" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_f32 (float32x4_t a) +{ + float32x4_t result; + __asm__ ("frsqrte %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsqrteq_f64 (float64x2_t a) +{ + float64x2_t result; + __asm__ ("frsqrte %0.2d,%1.2d" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_u32 (uint32x4_t a) +{ + uint32x4_t result; + __asm__ ("ursqrte %0.4s,%1.4s" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrsqrtes_f32 (float32_t a) +{ + float32_t result; + __asm__ ("frsqrte %s0,%s1" + : "=w"(result) + : "w"(a) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrts_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("frsqrts %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrsqrtsd_f64 (float64_t a, float64_t b) +{ + float64_t result; + __asm__ ("frsqrts %d0,%d1,%d2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrtsq_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("frsqrts %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsqrtsq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrsqrtss_f32 (float32_t a, float32_t b) +{ + float32_t result; + __asm__ ("frsqrts %s0,%s1,%s2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrsrtsq_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) +{ + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) +{ + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) +{ + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) +{ + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsubhn_s16 (int16x8_t a, int16x8_t b) +{ + int8x8_t result; + __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrsubhn_s32 (int32x4_t a, int32x4_t b) +{ + int16x4_t result; + __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrsubhn_s64 (int64x2_t a, int64x2_t b) +{ + int32x2_t result; + __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrsubhn_u16 (uint16x8_t a, uint16x8_t b) +{ + uint8x8_t result; + __asm__ ("rsubhn %0.8b, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrsubhn_u32 (uint32x4_t a, uint32x4_t b) +{ + uint16x4_t result; + __asm__ ("rsubhn %0.4h, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsubhn_u64 (uint64x2_t a, uint64x2_t b) +{ + uint32x2_t result; + __asm__ ("rsubhn %0.2s, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +#define vset_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x2_t b_ = (b); \ + float32_t a_ = (a); \ + float32x2_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x1_t b_ = (b); \ + float64_t a_ = (a); \ + float64x1_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x8_t b_ = (b); \ + poly8_t a_ = (a); \ + poly8x8_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x4_t b_ = (b); \ + poly16_t a_ = (a); \ + poly16x4_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x8_t b_ = (b); \ + int8_t a_ = (a); \ + int8x8_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + int16_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + int32_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x1_t b_ = (b); \ + int64_t a_ = (a); \ + int64x1_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x8_t b_ = (b); \ + uint8_t a_ = (a); \ + uint8x8_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + uint16_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + uint32_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vset_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x1_t b_ = (b); \ + uint64_t a_ = (a); \ + uint64x1_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32_t a_ = (a); \ + float32x4_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64_t a_ = (a); \ + float64x2_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x16_t b_ = (b); \ + int8_t a_ = (a); \ + int8x16_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16_t a_ = (a); \ + int16x8_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32_t a_ = (a); \ + int32x4_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int64_t a_ = (a); \ + int64x2_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x16_t b_ = (b); \ + uint8_t a_ = (a); \ + uint8x16_t result; \ + __asm__ ("ins %0.b[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16_t a_ = (a); \ + uint16x8_t result; \ + __asm__ ("ins %0.h[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32_t a_ = (a); \ + uint32x4_t result; \ + __asm__ ("ins %0.s[%3], %w1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsetq_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint64_t a_ = (a); \ + uint64x2_t result; \ + __asm__ ("ins %0.d[%3], %x1" \ + : "=w"(result) \ + : "r"(a_), "0"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int8x8_t a_ = (a); \ + int8x16_t result = vcombine_s8 \ + (a_, vcreate_s8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_high_n_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int16x4_t a_ = (a); \ + int16x8_t result = vcombine_s16 \ + (a_, vcreate_s16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_high_n_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int32x2_t a_ = (a); \ + int32x4_t result = vcombine_s32 \ + (a_, vcreate_s32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_high_n_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint8x8_t a_ = (a); \ + uint8x16_t result = vcombine_u8 \ + (a_, vcreate_u8 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_high_n_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint16x4_t a_ = (a); \ + uint16x8_t result = vcombine_u16 \ + (a_, vcreate_u16 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_high_n_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint32x2_t a_ = (a); \ + uint32x4_t result = vcombine_u32 \ + (a_, vcreate_u32 \ + (__AARCH64_UINT64_C (0x0))); \ + __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ + : "+w"(result) \ + : "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_n_s16(a, b) \ + __extension__ \ + ({ \ + int16x8_t a_ = (a); \ + int8x8_t result; \ + __asm__ ("shrn %0.8b,%1.8h,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_n_s32(a, b) \ + __extension__ \ + ({ \ + int32x4_t a_ = (a); \ + int16x4_t result; \ + __asm__ ("shrn %0.4h,%1.4s,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_n_s64(a, b) \ + __extension__ \ + ({ \ + int64x2_t a_ = (a); \ + int32x2_t result; \ + __asm__ ("shrn %0.2s,%1.2d,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_n_u16(a, b) \ + __extension__ \ + ({ \ + uint16x8_t a_ = (a); \ + uint8x8_t result; \ + __asm__ ("shrn %0.8b,%1.8h,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_n_u32(a, b) \ + __extension__ \ + ({ \ + uint32x4_t a_ = (a); \ + uint16x4_t result; \ + __asm__ ("shrn %0.4h,%1.4s,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vshrn_n_u64(a, b) \ + __extension__ \ + ({ \ + uint64x2_t a_ = (a); \ + uint32x2_t result; \ + __asm__ ("shrn %0.2s,%1.2d,%2" \ + : "=w"(result) \ + : "w"(a_), "i"(b) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsli_n_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x8_t b_ = (b); \ + poly8x8_t a_ = (a); \ + poly8x8_t result; \ + __asm__ ("sli %0.8b,%2.8b,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsli_n_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x4_t b_ = (b); \ + poly16x4_t a_ = (a); \ + poly16x4_t result; \ + __asm__ ("sli %0.4h,%2.4h,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsliq_n_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8x16_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("sli %0.16b,%2.16b,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsliq_n_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16x8_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("sli %0.8h,%2.8h,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsri_n_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x8_t b_ = (b); \ + poly8x8_t a_ = (a); \ + poly8x8_t result; \ + __asm__ ("sri %0.8b,%2.8b,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsri_n_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x4_t b_ = (b); \ + poly16x4_t a_ = (a); \ + poly16x4_t result; \ + __asm__ ("sri %0.4h,%2.4h,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsriq_n_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8x16_t a_ = (a); \ + poly8x16_t result; \ + __asm__ ("sri %0.16b,%2.16b,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vsriq_n_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16x8_t a_ = (a); \ + poly16x8_t result; \ + __asm__ ("sri %0.8h,%2.8h,%3" \ + : "=w"(result) \ + : "0"(a_), "w"(b_), "i"(c) \ + : /* No clobbers */); \ + result; \ + }) + +#define vst1_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x2_t b_ = (b); \ + float32_t * a_ = (a); \ + __asm__ ("st1 {%1.s}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x1_t b_ = (b); \ + float64_t * a_ = (a); \ + __asm__ ("st1 {%1.d}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x8_t b_ = (b); \ + poly8_t * a_ = (a); \ + __asm__ ("st1 {%1.b}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x4_t b_ = (b); \ + poly16_t * a_ = (a); \ + __asm__ ("st1 {%1.h}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x8_t b_ = (b); \ + int8_t * a_ = (a); \ + __asm__ ("st1 {%1.b}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x4_t b_ = (b); \ + int16_t * a_ = (a); \ + __asm__ ("st1 {%1.h}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x2_t b_ = (b); \ + int32_t * a_ = (a); \ + __asm__ ("st1 {%1.s}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x1_t b_ = (b); \ + int64_t * a_ = (a); \ + __asm__ ("st1 {%1.d}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x8_t b_ = (b); \ + uint8_t * a_ = (a); \ + __asm__ ("st1 {%1.b}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x4_t b_ = (b); \ + uint16_t * a_ = (a); \ + __asm__ ("st1 {%1.h}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x2_t b_ = (b); \ + uint32_t * a_ = (a); \ + __asm__ ("st1 {%1.s}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x1_t b_ = (b); \ + uint64_t * a_ = (a); \ + __asm__ ("st1 {%1.d}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + + +#define vst1q_lane_f32(a, b, c) \ + __extension__ \ + ({ \ + float32x4_t b_ = (b); \ + float32_t * a_ = (a); \ + __asm__ ("st1 {%1.s}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_f64(a, b, c) \ + __extension__ \ + ({ \ + float64x2_t b_ = (b); \ + float64_t * a_ = (a); \ + __asm__ ("st1 {%1.d}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_p8(a, b, c) \ + __extension__ \ + ({ \ + poly8x16_t b_ = (b); \ + poly8_t * a_ = (a); \ + __asm__ ("st1 {%1.b}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_p16(a, b, c) \ + __extension__ \ + ({ \ + poly16x8_t b_ = (b); \ + poly16_t * a_ = (a); \ + __asm__ ("st1 {%1.h}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_s8(a, b, c) \ + __extension__ \ + ({ \ + int8x16_t b_ = (b); \ + int8_t * a_ = (a); \ + __asm__ ("st1 {%1.b}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_s16(a, b, c) \ + __extension__ \ + ({ \ + int16x8_t b_ = (b); \ + int16_t * a_ = (a); \ + __asm__ ("st1 {%1.h}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_s32(a, b, c) \ + __extension__ \ + ({ \ + int32x4_t b_ = (b); \ + int32_t * a_ = (a); \ + __asm__ ("st1 {%1.s}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_s64(a, b, c) \ + __extension__ \ + ({ \ + int64x2_t b_ = (b); \ + int64_t * a_ = (a); \ + __asm__ ("st1 {%1.d}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_u8(a, b, c) \ + __extension__ \ + ({ \ + uint8x16_t b_ = (b); \ + uint8_t * a_ = (a); \ + __asm__ ("st1 {%1.b}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_u16(a, b, c) \ + __extension__ \ + ({ \ + uint16x8_t b_ = (b); \ + uint16_t * a_ = (a); \ + __asm__ ("st1 {%1.h}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_u32(a, b, c) \ + __extension__ \ + ({ \ + uint32x4_t b_ = (b); \ + uint32_t * a_ = (a); \ + __asm__ ("st1 {%1.s}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +#define vst1q_lane_u64(a, b, c) \ + __extension__ \ + ({ \ + uint64x2_t b_ = (b); \ + uint64_t * a_ = (a); \ + __asm__ ("st1 {%1.d}[%2],[%0]" \ + : \ + : "r"(a_), "w"(b_), "i"(c) \ + : "memory"); \ + }) + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c) +{ + int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c) +{ + int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c) +{ + int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c) +{ + uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("subhn2 %0.16b, %1.8h, %2.8h" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c) +{ + uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("subhn2 %0.8h, %1.4s, %2.4s" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c) +{ + uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("subhn2 %0.4s, %1.2d, %2.2d" + : "+w"(result) + : "w"(b), "w"(c) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsubhn_s16 (int16x8_t a, int16x8_t b) +{ + int8x8_t result; + __asm__ ("subhn %0.8b, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsubhn_s32 (int32x4_t a, int32x4_t b) +{ + int16x4_t result; + __asm__ ("subhn %0.4h, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsubhn_s64 (int64x2_t a, int64x2_t b) +{ + int32x2_t result; + __asm__ ("subhn %0.2s, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsubhn_u16 (uint16x8_t a, uint16x8_t b) +{ + uint8x8_t result; + __asm__ ("subhn %0.8b, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsubhn_u32 (uint32x4_t a, uint32x4_t b) +{ + uint16x4_t result; + __asm__ ("subhn %0.4h, %1.4s, %2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsubhn_u64 (uint64x2_t a, uint64x2_t b) +{ + uint32x2_t result; + __asm__ ("subhn %0.2s, %1.2d, %2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vtrn1_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("trn1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtrn1_p8 (poly8x8_t a, poly8x8_t b) +{ + poly8x8_t result; + __asm__ ("trn1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vtrn1_p16 (poly16x4_t a, poly16x4_t b) +{ + poly16x4_t result; + __asm__ ("trn1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtrn1_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("trn1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vtrn1_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("trn1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vtrn1_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("trn1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtrn1_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("trn1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtrn1_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("trn1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtrn1_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("trn1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vtrn1q_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("trn1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vtrn1q_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("trn1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vtrn1q_p8 (poly8x16_t a, poly8x16_t b) +{ + poly8x16_t result; + __asm__ ("trn1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vtrn1q_p16 (poly16x8_t a, poly16x8_t b) +{ + poly16x8_t result; + __asm__ ("trn1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vtrn1q_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("trn1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vtrn1q_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("trn1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vtrn1q_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("trn1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vtrn1q_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("trn1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtrn1q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("trn1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtrn1q_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("trn1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtrn1q_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("trn1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtrn1q_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("trn1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vtrn2_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("trn2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtrn2_p8 (poly8x8_t a, poly8x8_t b) +{ + poly8x8_t result; + __asm__ ("trn2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vtrn2_p16 (poly16x4_t a, poly16x4_t b) +{ + poly16x4_t result; + __asm__ ("trn2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtrn2_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("trn2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vtrn2_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("trn2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vtrn2_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("trn2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtrn2_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("trn2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtrn2_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("trn2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtrn2_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("trn2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vtrn2q_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("trn2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vtrn2q_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("trn2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vtrn2q_p8 (poly8x16_t a, poly8x16_t b) +{ + poly8x16_t result; + __asm__ ("trn2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vtrn2q_p16 (poly16x8_t a, poly16x8_t b) +{ + poly16x8_t result; + __asm__ ("trn2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vtrn2q_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("trn2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vtrn2q_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("trn2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vtrn2q_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("trn2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vtrn2q_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("trn2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtrn2q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("trn2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtrn2q_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("trn2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtrn2q_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("trn2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtrn2q_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("trn2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_p8 (poly8x8_t a, poly8x8_t b) +{ + uint8x8_t result; + __asm__ ("cmtst %0.8b, %1.8b, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_p16 (poly16x4_t a, poly16x4_t b) +{ + uint16x4_t result; + __asm__ ("cmtst %0.4h, %1.4h, %2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_p8 (poly8x16_t a, poly8x16_t b) +{ + uint8x16_t result; + __asm__ ("cmtst %0.16b, %1.16b, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_p16 (poly16x8_t a, poly16x8_t b) +{ + uint16x8_t result; + __asm__ ("cmtst %0.8h, %1.8h, %2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp1_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp1_p8 (poly8x8_t a, poly8x8_t b) +{ + poly8x8_t result; + __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp1_p16 (poly16x4_t a, poly16x4_t b) +{ + poly16x4_t result; + __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp1_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp1_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp1_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp1_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp1_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp1_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp1q_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp1q_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp1q_p8 (poly8x16_t a, poly8x16_t b) +{ + poly8x16_t result; + __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp1q_p16 (poly16x8_t a, poly16x8_t b) +{ + poly16x8_t result; + __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp1q_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp1q_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp1q_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp1q_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp1q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp1q_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp1q_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp1q_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp2_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp2_p8 (poly8x8_t a, poly8x8_t b) +{ + poly8x8_t result; + __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp2_p16 (poly16x4_t a, poly16x4_t b) +{ + poly16x4_t result; + __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp2_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp2_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp2_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp2_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp2_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp2_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp2q_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp2q_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp2q_p8 (poly8x16_t a, poly8x16_t b) +{ + poly8x16_t result; + __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp2q_p16 (poly16x8_t a, poly16x8_t b) +{ + poly16x8_t result; + __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp2q_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp2q_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp2q_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp2q_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp2q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp2q_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp2q_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp2q_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vzip1_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("zip1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vzip1_p8 (poly8x8_t a, poly8x8_t b) +{ + poly8x8_t result; + __asm__ ("zip1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vzip1_p16 (poly16x4_t a, poly16x4_t b) +{ + poly16x4_t result; + __asm__ ("zip1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vzip1_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("zip1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vzip1_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("zip1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vzip1_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("zip1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vzip1_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("zip1 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vzip1_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("zip1 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vzip1_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("zip1 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vzip1q_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("zip1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vzip1q_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("zip1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vzip1q_p8 (poly8x16_t a, poly8x16_t b) +{ + poly8x16_t result; + __asm__ ("zip1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vzip1q_p16 (poly16x8_t a, poly16x8_t b) +{ + poly16x8_t result; + __asm__ ("zip1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vzip1q_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("zip1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vzip1q_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("zip1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vzip1q_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("zip1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vzip1q_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("zip1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vzip1q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("zip1 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vzip1q_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("zip1 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vzip1q_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("zip1 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vzip1q_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("zip1 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vzip2_f32 (float32x2_t a, float32x2_t b) +{ + float32x2_t result; + __asm__ ("zip2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vzip2_p8 (poly8x8_t a, poly8x8_t b) +{ + poly8x8_t result; + __asm__ ("zip2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vzip2_p16 (poly16x4_t a, poly16x4_t b) +{ + poly16x4_t result; + __asm__ ("zip2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vzip2_s8 (int8x8_t a, int8x8_t b) +{ + int8x8_t result; + __asm__ ("zip2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vzip2_s16 (int16x4_t a, int16x4_t b) +{ + int16x4_t result; + __asm__ ("zip2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vzip2_s32 (int32x2_t a, int32x2_t b) +{ + int32x2_t result; + __asm__ ("zip2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vzip2_u8 (uint8x8_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("zip2 %0.8b,%1.8b,%2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vzip2_u16 (uint16x4_t a, uint16x4_t b) +{ + uint16x4_t result; + __asm__ ("zip2 %0.4h,%1.4h,%2.4h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vzip2_u32 (uint32x2_t a, uint32x2_t b) +{ + uint32x2_t result; + __asm__ ("zip2 %0.2s,%1.2s,%2.2s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vzip2q_f32 (float32x4_t a, float32x4_t b) +{ + float32x4_t result; + __asm__ ("zip2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vzip2q_f64 (float64x2_t a, float64x2_t b) +{ + float64x2_t result; + __asm__ ("zip2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vzip2q_p8 (poly8x16_t a, poly8x16_t b) +{ + poly8x16_t result; + __asm__ ("zip2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vzip2q_p16 (poly16x8_t a, poly16x8_t b) +{ + poly16x8_t result; + __asm__ ("zip2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vzip2q_s8 (int8x16_t a, int8x16_t b) +{ + int8x16_t result; + __asm__ ("zip2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vzip2q_s16 (int16x8_t a, int16x8_t b) +{ + int16x8_t result; + __asm__ ("zip2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vzip2q_s32 (int32x4_t a, int32x4_t b) +{ + int32x4_t result; + __asm__ ("zip2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vzip2q_s64 (int64x2_t a, int64x2_t b) +{ + int64x2_t result; + __asm__ ("zip2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vzip2q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("zip2 %0.16b,%1.16b,%2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vzip2q_u16 (uint16x8_t a, uint16x8_t b) +{ + uint16x8_t result; + __asm__ ("zip2 %0.8h,%1.8h,%2.8h" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vzip2q_u32 (uint32x4_t a, uint32x4_t b) +{ + uint32x4_t result; + __asm__ ("zip2 %0.4s,%1.4s,%2.4s" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vzip2q_u64 (uint64x2_t a, uint64x2_t b) +{ + uint64x2_t result; + __asm__ ("zip2 %0.2d,%1.2d,%2.2d" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +/* End of temporary inline asm implementations. */ + +/* Start of temporary inline asm for vldn, vstn and friends. */ + +/* Create struct element types for duplicating loads. + + Create 2 element structures of: + + +------+----+----+----+----+ + | | 8 | 16 | 32 | 64 | + +------+----+----+----+----+ + |int | Y | Y | N | N | + +------+----+----+----+----+ + |uint | Y | Y | N | N | + +------+----+----+----+----+ + |float | - | - | N | N | + +------+----+----+----+----+ + |poly | Y | Y | - | - | + +------+----+----+----+----+ + + Create 3 element structures of: + + +------+----+----+----+----+ + | | 8 | 16 | 32 | 64 | + +------+----+----+----+----+ + |int | Y | Y | Y | Y | + +------+----+----+----+----+ + |uint | Y | Y | Y | Y | + +------+----+----+----+----+ + |float | - | - | Y | Y | + +------+----+----+----+----+ + |poly | Y | Y | - | - | + +------+----+----+----+----+ + + Create 4 element structures of: + + +------+----+----+----+----+ + | | 8 | 16 | 32 | 64 | + +------+----+----+----+----+ + |int | Y | N | N | Y | + +------+----+----+----+----+ + |uint | Y | N | N | Y | + +------+----+----+----+----+ + |float | - | - | N | Y | + +------+----+----+----+----+ + |poly | Y | N | - | - | + +------+----+----+----+----+ + + This is required for casting memory reference. */ +#define __STRUCTN(t, sz, nelem) \ + typedef struct t ## sz ## x ## nelem ## _t { \ + t ## sz ## _t val[nelem]; \ + } t ## sz ## x ## nelem ## _t; + +/* 2-element structs. */ +__STRUCTN (int, 8, 2) +__STRUCTN (int, 16, 2) +__STRUCTN (uint, 8, 2) +__STRUCTN (uint, 16, 2) +__STRUCTN (poly, 8, 2) +__STRUCTN (poly, 16, 2) +/* 3-element structs. */ +__STRUCTN (int, 8, 3) +__STRUCTN (int, 16, 3) +__STRUCTN (int, 32, 3) +__STRUCTN (int, 64, 3) +__STRUCTN (uint, 8, 3) +__STRUCTN (uint, 16, 3) +__STRUCTN (uint, 32, 3) +__STRUCTN (uint, 64, 3) +__STRUCTN (float, 32, 3) +__STRUCTN (float, 64, 3) +__STRUCTN (poly, 8, 3) +__STRUCTN (poly, 16, 3) +/* 4-element structs. */ +__STRUCTN (int, 8, 4) +__STRUCTN (int, 64, 4) +__STRUCTN (uint, 8, 4) +__STRUCTN (uint, 64, 4) +__STRUCTN (poly, 8, 4) +__STRUCTN (float, 64, 4) +#undef __STRUCTN + +#define __LD2R_FUNC(rettype, structtype, ptrtype, \ + regsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ + { \ + rettype result; \ + __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ + "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(*(const structtype *)ptr) \ + : "memory", "v16", "v17"); \ + return result; \ + } + +__LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,) +__LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,) +__LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,) +__LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,) +__LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,) +__LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,) +__LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,) +__LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,) +__LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,) +__LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,) +__LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,) +__LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,) +__LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q) +__LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q) +__LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q) +__LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q) +__LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q) +__LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q) +__LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q) +__LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q) +__LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q) +__LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q) +__LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q) +__LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q) + +#define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + rettype b, const int c) \ + { \ + rettype result; \ + __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ + "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \ + "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ + : "memory", "v16", "v17"); \ + return result; \ + } + +__LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,) +__LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) +__LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) +__LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) +__LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) +__LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) +__LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) +__LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) +__LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) +__LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) +__LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) +__LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) +__LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) +__LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) +__LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) +__LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) +__LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) +__LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) +__LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) +__LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) +__LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) +__LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) +__LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) +__LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) + +#define __LD3R_FUNC(rettype, structtype, ptrtype, \ + regsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ + { \ + rettype result; \ + __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ + "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(*(const structtype *)ptr) \ + : "memory", "v16", "v17", "v18"); \ + return result; \ + } + +__LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,) +__LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,) +__LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,) +__LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,) +__LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,) +__LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,) +__LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,) +__LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,) +__LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,) +__LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,) +__LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,) +__LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,) +__LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q) +__LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q) +__LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q) +__LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q) +__LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q) +__LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q) +__LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q) +__LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q) +__LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q) +__LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q) +__LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q) +__LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q) + +#define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + rettype b, const int c) \ + { \ + rettype result; \ + __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ + "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \ + "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ + : "memory", "v16", "v17", "v18"); \ + return result; \ + } + +__LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,) +__LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) +__LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) +__LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) +__LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) +__LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) +__LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) +__LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) +__LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) +__LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) +__LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) +__LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) +__LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) +__LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) +__LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) +__LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) +__LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) +__LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) +__LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) +__LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) +__LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) +__LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) +__LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) +__LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) + +#define __LD4R_FUNC(rettype, structtype, ptrtype, \ + regsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \ + { \ + rettype result; \ + __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ + "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(*(const structtype *)ptr) \ + : "memory", "v16", "v17", "v18", "v19"); \ + return result; \ + } + +__LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,) +__LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,) +__LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,) +__LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,) +__LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,) +__LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,) +__LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,) +__LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,) +__LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,) +__LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,) +__LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,) +__LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,) +__LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q) +__LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q) +__LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q) +__LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q) +__LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q) +__LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q) +__LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q) +__LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q) +__LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q) +__LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q) +__LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q) +__LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q) + +#define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \ + rettype b, const int c) \ + { \ + rettype result; \ + __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ + "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \ + "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \ + : "=Q"(result) \ + : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \ + : "memory", "v16", "v17", "v18", "v19"); \ + return result; \ + } + +__LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,) +__LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) +__LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) +__LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) +__LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) +__LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) +__LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) +__LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) +__LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) +__LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) +__LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) +__LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) +__LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) +__LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) +__LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) +__LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) +__LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) +__LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) +__LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) +__LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) +__LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) +__LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) +__LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) +__LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) + +#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ + intype b, const int c) \ + { \ + __ST2_LANE_STRUCTURE_##intype *__p = \ + (__ST2_LANE_STRUCTURE_##intype *)ptr; \ + __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ + "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ + : "=Q"(*__p) \ + : "Q"(b), "i"(c) \ + : "v16", "v17"); \ + } + +__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) +__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) +__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) +__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) +__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) +__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) +__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) +__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) +__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) +__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) +__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) +__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) +__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) +__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) +__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) +__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) +__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) +__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) +__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) +__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) +__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) +__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) +__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) +__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) + +#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ + intype b, const int c) \ + { \ + __ST3_LANE_STRUCTURE_##intype *__p = \ + (__ST3_LANE_STRUCTURE_##intype *)ptr; \ + __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ + "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ + : "=Q"(*__p) \ + : "Q"(b), "i"(c) \ + : "v16", "v17", "v18"); \ + } + +__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) +__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) +__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) +__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) +__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) +__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) +__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) +__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) +__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) +__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) +__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) +__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) +__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) +__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) +__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) +__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) +__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) +__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) +__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) +__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) +__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) +__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) +__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) +__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) + +#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ + lnsuffix, funcsuffix, Q) \ + typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ + __extension__ static __inline void \ + __attribute__ ((__always_inline__)) \ + vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ + intype b, const int c) \ + { \ + __ST4_LANE_STRUCTURE_##intype *__p = \ + (__ST4_LANE_STRUCTURE_##intype *)ptr; \ + __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ + "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ + : "=Q"(*__p) \ + : "Q"(b), "i"(c) \ + : "v16", "v17", "v18", "v19"); \ + } + +__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) +__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) +__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) +__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) +__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) +__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) +__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) +__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) +__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) +__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) +__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) +__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) +__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) +__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) +__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) +__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) +__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) +__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) +__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) +__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) +__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) +__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) +__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) +__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vaddlv_s32 (int32x2_t a) +{ + int64_t result; + __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); + return result; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vaddlv_u32 (uint32x2_t a) +{ + uint64_t result; + __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); + return result; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpaddd_s64 (int64x2_t __a) +{ + return __builtin_aarch64_addpdi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); +} + +/* Table intrinsics. */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl1_p8 (poly8x16_t a, uint8x8_t b) +{ + poly8x8_t result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl1_s8 (int8x16_t a, uint8x8_t b) +{ + int8x8_t result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl1_u8 (uint8x16_t a, uint8x8_t b) +{ + uint8x8_t result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) +{ + poly8x16_t result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl1q_s8 (int8x16_t a, uint8x16_t b) +{ + int8x16_t result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) +{ + uint8x16_t result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) +{ + int8x8_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) +{ + uint8x8_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) +{ + poly8x8_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) +{ + int8x16_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) +{ + uint8x16_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) +{ + poly8x16_t result; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) +{ + int8x8_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) +{ + uint8x8_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) +{ + poly8x8_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) +{ + int8x16_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) +{ + uint8x16_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) +{ + poly8x16_t result; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) +{ + int8x8_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) +{ + uint8x8_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) +{ + poly8x8_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) +{ + int8x16_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) +{ + uint8x16_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) +{ + poly8x16_t result; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"=w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" + : "+w"(result) + : "w"(tab), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t" + "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17"); + return result; +} + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18"); + return result; +} + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) +{ + int8x8_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) +{ + int8x16_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) +{ + uint8x16_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) +{ + poly8x16_t result = r; + __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t" + "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t" + :"+w"(result) + :"Q"(tab),"w"(idx) + :"memory", "v16", "v17", "v18", "v19"); + return result; +} + +/* V7 legacy table intrinsics. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl1_s8 (int8x8_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" + : "=w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) +{ + int8x8_t result; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) +{ + uint8x8_t result; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) +{ + poly8x8_t result; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "=w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) +{ + int8x8_t result = r; + int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" + : "+w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" + : "+w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" + : "+w"(result) + : "w"(temp), "w"(idx) + : /* No clobbers */); + return result; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx) +{ + int8x8_t result = r; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "+w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx) +{ + uint8x8_t result = r; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "+w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx) +{ + poly8x8_t result = r; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); + temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); + __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t" + "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t" + : "+w"(result) + : "Q"(temp), "w"(idx) + : "v16", "v17", "memory"); + return result; +} + +/* End of temporary inline asm. */ + +/* Start of optimal implementations in approved order. */ + +/* vabs */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabs_f32 (float32x2_t __a) +{ + return __builtin_aarch64_absv2sf (__a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vabs_f64 (float64x1_t __a) +{ + return __builtin_fabs (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return __builtin_aarch64_absv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) +{ + return __builtin_aarch64_absv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) +{ + return __builtin_aarch64_absv2si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vabs_s64 (int64x1_t __a) +{ + return __builtin_llabs (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabsq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_absv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vabsq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_absv2df (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_absv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_absv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_absv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabsq_s64 (int64x2_t __a) +{ + return __builtin_aarch64_absv2di (__a); +} + +/* vadd */ + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vaddd_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a + __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vaddd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a + __b; +} + +/* vaddv */ + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), + 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), + 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), + 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vaddvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), + 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vaddvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vaddvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vaddvq_s64 (int64x2_t __a) +{ + return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vaddvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), + 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vaddvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), + 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vaddvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), + 0); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vaddvq_u64 (uint64x2_t __a) +{ + return vgetq_lane_u64 ((uint64x2_t) + __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), + 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vaddv_f32 (float32x2_t __a) +{ + float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a); + return vget_lane_f32 (__t, 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vaddvq_f32 (float32x4_t __a) +{ + float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a); + return vgetq_lane_f32 (__t, 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vaddvq_f64 (float64x2_t __a) +{ + float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a); + return vgetq_lane_f64 (__t, 0); +} + +/* vbsl */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) +{ + return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) +{ + return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) +{ + return __builtin_aarch64_simd_bsldi_suss (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) +{ + return __builtin_aarch64_simd_bsldi_uuuu (__a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c) +{ + return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) +{ + return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) +{ + return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO + +/* vaes */ + +static __inline uint8x16_t +vaeseq_u8 (uint8x16_t data, uint8x16_t key) +{ + return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); +} + +static __inline uint8x16_t +vaesdq_u8 (uint8x16_t data, uint8x16_t key) +{ + return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); +} + +static __inline uint8x16_t +vaesmcq_u8 (uint8x16_t data) +{ + return __builtin_aarch64_crypto_aesmcv16qi_uu (data); +} + +static __inline uint8x16_t +vaesimcq_u8 (uint8x16_t data) +{ + return __builtin_aarch64_crypto_aesimcv16qi_uu (data); +} + +#endif + +/* vcage */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcages_f32 (float32_t __a, float32_t __b) +{ + return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcage_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) >= vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcageq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) >= vabsq_f32 (__b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcaged_f64 (float64_t __a, float64_t __b) +{ + return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcageq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) >= vabsq_f64 (__b); +} + +/* vcagt */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcagts_f32 (float32_t __a, float32_t __b) +{ + return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcagt_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) > vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcagtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) > vabsq_f32 (__b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcagtd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcagtq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) > vabsq_f64 (__b); +} + +/* vcale */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcale_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) <= vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) <= vabsq_f32 (__b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcaleq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) <= vabsq_f64 (__b); +} + +/* vcalt */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcalt_f32 (float32x2_t __a, float32x2_t __b) +{ + return vabs_f32 (__a) < vabs_f32 (__b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return vabsq_f32 (__a) < vabsq_f32 (__b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcaltq_f64 (float64x2_t __a, float64x2_t __b) +{ + return vabsq_f64 (__a) < vabsq_f64 (__b); +} + +/* vceq - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vceq - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vceqs_f32 (float32_t __a, float32_t __b) +{ + return __a == __b ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqd_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vceqd_f64 (float64_t __a, float64_t __b) +{ + return __a == __b ? -1ll : 0ll; +} + +/* vceqz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_f64 (float64x1_t __a) +{ + return __a == 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_s64 (int64x1_t __a) +{ + return __a == 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceqz_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceqz_u16 (uint16x4_t __a) +{ + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceqz_u32 (uint32x2_t __a) +{ + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqz_u64 (uint64x1_t __a) +{ + return __a == 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqzq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqzq_u16 (uint16x8_t __a) +{ + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqzq_u32 (uint32x4_t __a) +{ + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vceqzq_u64 (uint64x2_t __a) +{ + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vceqz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vceqzs_f32 (float32_t __a) +{ + return __a == 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqzd_s64 (int64x1_t __a) +{ + return __a == 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceqzd_u64 (int64x1_t __a) +{ + return __a == 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vceqzd_f64 (float64_t __a) +{ + return __a == 0.0 ? -1ll : 0ll; +} + +/* vcge - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcge_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vcge - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcges_f32 (float32_t __a, float32_t __b) +{ + return __a >= __b ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcged_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcged_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcged_f64 (float64_t __a, float64_t __b) +{ + return __a >= __b ? -1ll : 0ll; +} + +/* vcgez - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_f64 (float64x1_t __a) +{ + return __a >= 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_s64 (int64x1_t __a) +{ + return __a >= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgez_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgez_u16 (uint16x4_t __a) +{ + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgez_u32 (uint32x2_t __a) +{ + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgez_u64 (uint64x1_t __a) +{ + return __a >= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgezq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgezq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgezq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgezq_u16 (uint16x8_t __a) +{ + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgezq_u32 (uint32x4_t __a) +{ + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgezq_u64 (uint64x2_t __a) +{ + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vcgez - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgezs_f32 (float32_t __a) +{ + return __a >= 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgezd_s64 (int64x1_t __a) +{ + return __a >= 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgezd_u64 (int64x1_t __a) +{ + return __a >= 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgezd_f64 (float64_t __a) +{ + return __a >= 0.0 ? -1ll : 0ll; +} + +/* vcgt - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgt_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgt_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgt_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vcgt - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgts_f32 (float32_t __a, float32_t __b) +{ + return __a > __b ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtd_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgtd_f64 (float64_t __a, float64_t __b) +{ + return __a > __b ? -1ll : 0ll; +} + +/* vcgtz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgtz_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtz_f64 (float64x1_t __a) +{ + return __a > 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgtz_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgtz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgtz_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgtz_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtz_s64 (int64x1_t __a) +{ + return __a > 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgtz_u8 (uint8x8_t __a) +{ + uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgtz_u16 (uint16x4_t __a) +{ + uint16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgtz_u32 (uint32x2_t __a) +{ + uint32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtz_u64 (uint64x1_t __a) +{ + return __a > 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtzq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtzq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtzq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtzq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtzq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtzq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtzq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtzq_u8 (uint8x16_t __a) +{ + uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtzq_u16 (uint16x8_t __a) +{ + uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtzq_u32 (uint32x4_t __a) +{ + uint32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcgtzq_u64 (uint64x2_t __a) +{ + uint64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +/* vcgtz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcgtzs_f32 (float32_t __a) +{ + return __a > 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtzd_s64 (int64x1_t __a) +{ + return __a > 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcgtzd_u64 (int64x1_t __a) +{ + return __a > 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcgtzd_f64 (float64_t __a) +{ + return __a > 0.0 ? -1ll : 0ll; +} + +/* vcle - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcle_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b, + (int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcle_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b, + (int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b, + (int16x4_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b, + (int32x2_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcle_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcleq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b, + (int8x16_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcleq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b, + (int8x16_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b, + (int16x8_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b, + (int32x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcleq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b, + (int64x2_t) __a); +} + +/* vcle - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcles_f32 (float32_t __a, float32_t __b) +{ + return __a <= __b ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcled_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcled_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcled_f64 (float64_t __a, float64_t __b) +{ + return __a <= __b ? -1ll : 0ll; +} + +/* vclez - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclez_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_f64 (float64x1_t __a) +{ + return __a <= 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclez_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclez_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclez_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclez_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_s64 (int64x1_t __a) +{ + return __a <= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclez_u64 (uint64x1_t __a) +{ + return __a <= 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclezq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vclezq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclezq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclezq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclezq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclezq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vclezq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b); +} + +/* vclez - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vclezs_f32 (float32_t __a) +{ + return __a <= 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclezd_s64 (int64x1_t __a) +{ + return __a <= 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclezd_u64 (int64x1_t __a) +{ + return __a <= 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vclezd_f64 (float64_t __a) +{ + return __a <= 0.0 ? -1ll : 0ll; +} + +/* vclt - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclt_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b, + (int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclt_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b, + (int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b, + (int16x4_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b, + (int32x2_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vclt_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltq_f64 (float64x2_t __a, float64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b, + (int8x16_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b, + (int8x16_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b, + (int16x8_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b, + (int32x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b, + (int64x2_t) __a); +} + +/* vclt - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vclts_f32 (float32_t __a, float32_t __b) +{ + return __a < __b ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltd_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcltd_f64 (float64_t __a, float64_t __b) +{ + return __a < __b ? -1ll : 0ll; +} + +/* vcltz - vector. */ + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcltz_f32 (float32x2_t __a) +{ + float32x2_t __b = {0.0f, 0.0f}; + return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltz_f64 (float64x1_t __a) +{ + return __a < 0.0 ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcltz_p8 (poly8x8_t __a) +{ + poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcltz_s8 (int8x8_t __a) +{ + int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcltz_s16 (int16x4_t __a) +{ + int16x4_t __b = {0, 0, 0, 0}; + return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcltz_s32 (int32x2_t __a) +{ + int32x2_t __b = {0, 0}; + return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltz_s64 (int64x1_t __a) +{ + return __a < 0ll ? -1ll : 0ll; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltzq_f32 (float32x4_t __a) +{ + float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f}; + return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltzq_f64 (float64x2_t __a) +{ + float64x2_t __b = {0.0, 0.0}; + return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltzq_p8 (poly8x16_t __a) +{ + poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltzq_s8 (int8x16_t __a) +{ + int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltzq_s16 (int16x8_t __a) +{ + int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0}; + return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltzq_s32 (int32x4_t __a) +{ + int32x4_t __b = {0, 0, 0, 0}; + return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcltzq_s64 (int64x2_t __a) +{ + int64x2_t __b = {0, 0}; + return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b); +} + +/* vcltz - scalar. */ + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcltzs_f32 (float32_t __a) +{ + return __a < 0.0f ? -1 : 0; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltzd_s64 (int64x1_t __a) +{ + return __a < 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcltzd_u64 (int64x1_t __a) +{ + return __a < 0 ? -1ll : 0ll; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcltzd_f64 (float64_t __a) +{ + return __a < 0.0 ? -1ll : 0ll; +} + +/* vclz. */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vclz_s8 (int8x8_t __a) +{ + return __builtin_aarch64_clzv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vclz_s16 (int16x4_t __a) +{ + return __builtin_aarch64_clzv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vclz_s32 (int32x2_t __a) +{ + return __builtin_aarch64_clzv2si (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclz_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclz_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclz_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclzq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_clzv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclzq_s16 (int16x8_t __a) +{ + return __builtin_aarch64_clzv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclzq_s32 (int32x4_t __a) +{ + return __builtin_aarch64_clzv4si (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclzq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclzq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclzq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); +} + +/* vcvt (double -> float). */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_f64 (float64x2_t __a) +{ + return __builtin_aarch64_float_truncate_lo_v2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); +} + +/* vcvt (float -> double). */ + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvt_f64_f32 (float32x2_t __a) +{ + + return __builtin_aarch64_float_extend_lo_v2df (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvt_high_f64_f32 (float32x4_t __a) +{ + return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); +} + +/* vcvt (int -> float) */ + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vcvtd_f64_s64 (int64_t __a) +{ + return (float64_t) __a; +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vcvtd_f64_u64 (uint64_t __a) +{ + return (float64_t) __a; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvts_f32_s32 (int32_t __a) +{ + return (float32_t) __a; +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vcvts_f32_u32 (uint32_t __a) +{ + return (float32_t) __a; +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_s32 (int32x2_t __a) +{ + return __builtin_aarch64_floatv2siv2sf (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_u32 (uint32x2_t __a) +{ + return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_s32 (int32x4_t __a) +{ + return __builtin_aarch64_floatv4siv4sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_u32 (uint32x4_t __a) +{ + return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvtq_f64_s64 (int64x2_t __a) +{ + return __builtin_aarch64_floatv2div2df (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vcvtq_f64_u64 (uint64x2_t __a) +{ + return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); +} + +/* vcvt (float -> int) */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtd_s64_f64 (float64_t __a) +{ + return (int64_t) __a; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtd_u64_f64 (float64_t __a) +{ + return (uint64_t) __a; +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvts_s32_f32 (float32_t __a) +{ + return (int32_t) __a; +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvts_u32_f32 (float32_t __a) +{ + return (uint32_t) __a; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lbtruncv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvt_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lbtruncv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lbtruncv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a); +} + +/* vcvta */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtad_s64_f64 (float64_t __a) +{ + return __builtin_aarch64_lrounddfdi (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtad_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lroundudfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtas_s32_f32 (float32_t __a) +{ + return __builtin_aarch64_lroundsfsi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtas_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lroundusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvta_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lroundv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvta_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtaq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lroundv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtaq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtaq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lroundv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtaq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a); +} + +/* vcvtm */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtmd_s64_f64 (float64_t __a) +{ + return __builtin_llfloor (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtmd_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lfloorudfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtms_s32_f32 (float32_t __a) +{ + return __builtin_ifloorf (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtms_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lfloorusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtm_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lfloorv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtm_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtmq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lfloorv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtmq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtmq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lfloorv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtmq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a); +} + +/* vcvtn */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtnd_s64_f64 (float64_t __a) +{ + return __builtin_aarch64_lfrintndfdi (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtnd_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lfrintnudfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtns_s32_f32 (float32_t __a) +{ + return __builtin_aarch64_lfrintnsfsi (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtns_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lfrintnusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtn_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lfrintnv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtn_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtnq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lfrintnv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtnq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtnq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lfrintnv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtnq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a); +} + +/* vcvtp */ + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vcvtpd_s64_f64 (float64_t __a) +{ + return __builtin_llceil (__a); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vcvtpd_u64_f64 (float64_t __a) +{ + return __builtin_aarch64_lceiludfdi (__a); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vcvtps_s32_f32 (float32_t __a) +{ + return __builtin_iceilf (__a); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vcvtps_u32_f32 (float32_t __a) +{ + return __builtin_aarch64_lceilusfsi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvtp_s32_f32 (float32x2_t __a) +{ + return __builtin_aarch64_lceilv2sfv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvtp_u32_f32 (float32x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtpq_s32_f32 (float32x4_t __a) +{ + return __builtin_aarch64_lceilv4sfv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtpq_u32_f32 (float32x4_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcvtpq_s64_f64 (float64x2_t __a) +{ + return __builtin_aarch64_lceilv2dfv2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcvtpq_u64_f64 (float64x2_t __a) +{ + /* TODO: This cast should go away when builtins have + their correct types. */ + return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a); +} + +/* vdup_n */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_n_f32 (float32_t __a) +{ + return (float32x2_t) {__a, __a}; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdup_n_f64 (float64_t __a) +{ + return __a; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_n_p8 (poly8_t __a) +{ + return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_n_p16 (poly16_t __a) +{ + return (poly16x4_t) {__a, __a, __a, __a}; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_n_s8 (int8_t __a) +{ + return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_n_s16 (int16_t __a) +{ + return (int16x4_t) {__a, __a, __a, __a}; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_n_s32 (int32_t __a) +{ + return (int32x2_t) {__a, __a}; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_n_s64 (int64_t __a) +{ + return __a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_n_u8 (uint8_t __a) +{ + return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_n_u16 (uint16_t __a) +{ + return (uint16x4_t) {__a, __a, __a, __a}; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_n_u32 (uint32_t __a) +{ + return (uint32x2_t) {__a, __a}; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_n_u64 (uint64_t __a) +{ + return __a; +} + +/* vdupq_n */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_n_f32 (float32_t __a) +{ + return (float32x4_t) {__a, __a, __a, __a}; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdupq_n_f64 (float64_t __a) +{ + return (float64x2_t) {__a, __a}; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_n_p8 (uint32_t __a) +{ + return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_n_p16 (uint32_t __a) +{ + return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_n_s8 (int32_t __a) +{ + return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_n_s16 (int32_t __a) +{ + return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_n_s32 (int32_t __a) +{ + return (int32x4_t) {__a, __a, __a, __a}; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_n_s64 (int64_t __a) +{ + return (int64x2_t) {__a, __a}; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_n_u8 (uint32_t __a) +{ + return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, + __a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_n_u16 (uint32_t __a) +{ + return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_n_u32 (uint32_t __a) +{ + return (uint32x4_t) {__a, __a, __a, __a}; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_n_u64 (uint64_t __a) +{ + return (uint64x2_t) {__a, __a}; +} + +/* vdup_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vdup_lane_f32 (__a, __b); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdup_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vdup_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vdup_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vdup_lane_p16 (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_lane_s8 (int8x8_t __a, const int __b) +{ + return __aarch64_vdup_lane_s8 (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_lane_s16 (int16x4_t __a, const int __b) +{ + return __aarch64_vdup_lane_s16 (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_lane_s32 (int32x2_t __a, const int __b) +{ + return __aarch64_vdup_lane_s32 (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_lane_s64 (int64x1_t __a, const int __b) +{ + return __aarch64_vdup_lane_s64 (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_lane_u8 (uint8x8_t __a, const int __b) +{ + return __aarch64_vdup_lane_u8 (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_lane_u16 (uint16x4_t __a, const int __b) +{ + return __aarch64_vdup_lane_u16 (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_lane_u32 (uint32x2_t __a, const int __b) +{ + return __aarch64_vdup_lane_u32 (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_lane_u64 (uint64x1_t __a, const int __b) +{ + return __aarch64_vdup_lane_u64 (__a, __b); +} + +/* vdup_laneq */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_laneq_f32 (float32x4_t __a, const int __b) +{ + return __aarch64_vdup_laneq_f32 (__a, __b); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vdup_laneq_f64 (float64x2_t __a, const int __b) +{ + return __aarch64_vdup_laneq_f64 (__a, __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_laneq_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vdup_laneq_p8 (__a, __b); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_laneq_p16 (poly16x8_t __a, const int __b) +{ + return __aarch64_vdup_laneq_p16 (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_laneq_s8 (int8x16_t __a, const int __b) +{ + return __aarch64_vdup_laneq_s8 (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_laneq_s16 (int16x8_t __a, const int __b) +{ + return __aarch64_vdup_laneq_s16 (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_laneq_s32 (int32x4_t __a, const int __b) +{ + return __aarch64_vdup_laneq_s32 (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_laneq_s64 (int64x2_t __a, const int __b) +{ + return __aarch64_vdup_laneq_s64 (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_laneq_u8 (uint8x16_t __a, const int __b) +{ + return __aarch64_vdup_laneq_u8 (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_laneq_u16 (uint16x8_t __a, const int __b) +{ + return __aarch64_vdup_laneq_u16 (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_laneq_u32 (uint32x4_t __a, const int __b) +{ + return __aarch64_vdup_laneq_u32 (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_laneq_u64 (uint64x2_t __a, const int __b) +{ + return __aarch64_vdup_laneq_u64 (__a, __b); +} + +/* vdupq_lane */ +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vdupq_lane_f32 (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_f64 (float64x1_t __a, const int __b) +{ + return __aarch64_vdupq_lane_f64 (__a, __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vdupq_lane_p8 (__a, __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vdupq_lane_p16 (__a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_s8 (int8x8_t __a, const int __b) +{ + return __aarch64_vdupq_lane_s8 (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_s16 (int16x4_t __a, const int __b) +{ + return __aarch64_vdupq_lane_s16 (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_s32 (int32x2_t __a, const int __b) +{ + return __aarch64_vdupq_lane_s32 (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_s64 (int64x1_t __a, const int __b) +{ + return __aarch64_vdupq_lane_s64 (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_u8 (uint8x8_t __a, const int __b) +{ + return __aarch64_vdupq_lane_u8 (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_u16 (uint16x4_t __a, const int __b) +{ + return __aarch64_vdupq_lane_u16 (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_u32 (uint32x2_t __a, const int __b) +{ + return __aarch64_vdupq_lane_u32 (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_u64 (uint64x1_t __a, const int __b) +{ + return __aarch64_vdupq_lane_u64 (__a, __b); +} + +/* vdupq_laneq */ +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_laneq_f32 (float32x4_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_f32 (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vdupq_laneq_f64 (float64x2_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_f64 (__a, __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_laneq_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_p8 (__a, __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_laneq_p16 (poly16x8_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_p16 (__a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_laneq_s8 (int8x16_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_s8 (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_laneq_s16 (int16x8_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_s16 (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_laneq_s32 (int32x4_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_s32 (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_laneq_s64 (int64x2_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_s64 (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_laneq_u8 (uint8x16_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_u8 (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_laneq_u16 (uint16x8_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_u16 (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_laneq_u32 (uint32x4_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_u32 (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_laneq_u64 (uint64x2_t __a, const int __b) +{ + return __aarch64_vdupq_laneq_u64 (__a, __b); +} + +/* vdupb_lane */ +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vdupb_lane_p8 (poly8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_p8 (__a, __b); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vdupb_lane_s8 (int8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_s8 (__a, __b); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vdupb_lane_u8 (uint8x8_t __a, const int __b) +{ + return __aarch64_vget_lane_u8 (__a, __b); +} + +/* vduph_lane */ +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vduph_lane_p16 (poly16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_p16 (__a, __b); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vduph_lane_s16 (int16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_s16 (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vduph_lane_u16 (uint16x4_t __a, const int __b) +{ + return __aarch64_vget_lane_u16 (__a, __b); +} + +/* vdups_lane */ +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vdups_lane_f32 (float32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_f32 (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vdups_lane_s32 (int32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_s32 (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vdups_lane_u32 (uint32x2_t __a, const int __b) +{ + return __aarch64_vget_lane_u32 (__a, __b); +} + +/* vdupd_lane */ +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vdupd_lane_f64 (float64x1_t __a, const int __attribute__ ((unused)) __b) +{ + return __a; +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vdupd_lane_s64 (int64x1_t __a, const int __attribute__ ((unused)) __b) +{ + return __a; +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vdupd_lane_u64 (uint64x1_t __a, const int __attribute__ ((unused)) __b) +{ + return __a; +} + +/* vdupb_laneq */ +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vdupb_laneq_p8 (poly8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p8 (__a, __b); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b) +{ + return __aarch64_vgetq_lane_s8 (__a, __b); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vdupb_laneq_u8 (uint8x16_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u8 (__a, __b); +} + +/* vduph_laneq */ +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vduph_laneq_p16 (poly16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_p16 (__a, __b); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vduph_laneq_s16 (int16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s16 (__a, __b); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vduph_laneq_u16 (uint16x8_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u16 (__a, __b); +} + +/* vdups_laneq */ +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vdups_laneq_f32 (float32x4_t __a, const int __b) +{ + return __aarch64_vgetq_lane_f32 (__a, __b); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vdups_laneq_s32 (int32x4_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s32 (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vdups_laneq_u32 (uint32x4_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u32 (__a, __b); +} + +/* vdupd_laneq */ +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vdupd_laneq_f64 (float64x2_t __a, const int __b) +{ + return __aarch64_vgetq_lane_f64 (__a, __b); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vdupd_laneq_s64 (int64x2_t __a, const int __b) +{ + return __aarch64_vgetq_lane_s64 (__a, __b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vdupd_laneq_u64 (uint64x2_t __a, const int __b) +{ + return __aarch64_vgetq_lane_u64 (__a, __b); +} + +/* vfma_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_lane_f32 (float32x2_t __a, float32x2_t __b, + float32x2_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2sf (__b, + __aarch64_vdup_lane_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfma_lane_f64 (float64_t __a, float64_t __b, + float64_t __c, const int __lane) +{ + return __builtin_fma (__b, __c, __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfmad_lane_f64 (float64_t __a, float64_t __b, + float64_t __c, const int __lane) +{ + return __builtin_fma (__b, __c, __a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vfmas_lane_f32 (float32_t __a, float32_t __b, + float32x2_t __c, const int __lane) +{ + return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a); +} + +/* vfma_laneq */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_laneq_f32 (float32x2_t __a, float32x2_t __b, + float32x4_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2sf (__b, + __aarch64_vdup_laneq_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfma_laneq_f64 (float64_t __a, float64_t __b, + float64x2_t __c, const int __lane) +{ + return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfmad_laneq_f64 (float64_t __a, float64_t __b, + float64x2_t __c, const int __lane) +{ + return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vfmas_laneq_f32 (float32_t __a, float32_t __b, + float32x4_t __c, const int __lane) +{ + return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a); +} + +/* vfmaq_lane */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b, + float32x2_t __c, const int __lane) +{ + return __builtin_aarch64_fmav4sf (__b, + __aarch64_vdupq_lane_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b, + float64_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a); +} + +/* vfmaq_laneq */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b, + float32x4_t __c, const int __lane) +{ + return __builtin_aarch64_fmav4sf (__b, + __aarch64_vdupq_laneq_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b, + float64x2_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2df (__b, + __aarch64_vdupq_laneq_f64 (__c, __lane), + __a); +} + +/* vfms_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfms_lane_f32 (float32x2_t __a, float32x2_t __b, + float32x2_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2sf (-__b, + __aarch64_vdup_lane_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfms_lane_f64 (float64_t __a, float64_t __b, + float64_t __c, const int __lane) +{ + return __builtin_fma (-__b, __c, __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfmsd_lane_f64 (float64_t __a, float64_t __b, + float64_t __c, const int __lane) +{ + return __builtin_fma (-__b, __c, __a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vfmss_lane_f32 (float32_t __a, float32_t __b, + float32x2_t __c, const int __lane) +{ + return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a); +} + +/* vfms_laneq */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfms_laneq_f32 (float32x2_t __a, float32x2_t __b, + float32x4_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2sf (-__b, + __aarch64_vdup_laneq_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfms_laneq_f64 (float64_t __a, float64_t __b, + float64x2_t __c, const int __lane) +{ + return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vfmsd_laneq_f64 (float64_t __a, float64_t __b, + float64x2_t __c, const int __lane) +{ + return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vfmss_laneq_f32 (float32_t __a, float32_t __b, + float32x4_t __c, const int __lane) +{ + return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a); +} + +/* vfmsq_lane */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b, + float32x2_t __c, const int __lane) +{ + return __builtin_aarch64_fmav4sf (-__b, + __aarch64_vdupq_lane_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b, + float64_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a); +} + +/* vfmsq_laneq */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b, + float32x4_t __c, const int __lane) +{ + return __builtin_aarch64_fmav4sf (-__b, + __aarch64_vdupq_laneq_f32 (__c, __lane), + __a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b, + float64x2_t __c, const int __lane) +{ + return __builtin_aarch64_fmav2df (-__b, + __aarch64_vdupq_laneq_f64 (__c, __lane), + __a); +} + +/* vld1 */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_f32 (const float32_t *a) +{ + return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vld1_f64 (const float64_t *a) +{ + return *a; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_p8 (const poly8_t *a) +{ + return (poly8x8_t) + __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_p16 (const poly16_t *a) +{ + return (poly16x4_t) + __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_s8 (const int8_t *a) +{ + return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_s16 (const int16_t *a) +{ + return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_s32 (const int32_t *a) +{ + return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_s64 (const int64_t *a) +{ + return *a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_u8 (const uint8_t *a) +{ + return (uint8x8_t) + __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_u16 (const uint16_t *a) +{ + return (uint16x4_t) + __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_u32 (const uint32_t *a) +{ + return (uint32x2_t) + __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_u64 (const uint64_t *a) +{ + return *a; +} + +/* vld1q */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_f32 (const float32_t *a) +{ + return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vld1q_f64 (const float64_t *a) +{ + return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_p8 (const poly8_t *a) +{ + return (poly8x16_t) + __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_p16 (const poly16_t *a) +{ + return (poly16x8_t) + __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_s8 (const int8_t *a) +{ + return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_s16 (const int16_t *a) +{ + return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_s32 (const int32_t *a) +{ + return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_s64 (const int64_t *a) +{ + return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_u8 (const uint8_t *a) +{ + return (uint8x16_t) + __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_u16 (const uint16_t *a) +{ + return (uint16x8_t) + __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_u32 (const uint32_t *a) +{ + return (uint32x4_t) + __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_u64 (const uint64_t *a) +{ + return (uint64x2_t) + __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); +} + +/* vldn */ + +__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) +vld2_s64 (const int64_t * __a) +{ + int64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); + ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); + return ret; +} + +__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) +vld2_u64 (const uint64_t * __a) +{ + uint64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); + ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); + return ret; +} + +__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__)) +vld2_f64 (const float64_t * __a) +{ + float64x1x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0); + ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1); + return ret; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_s8 (const int8_t * __a) +{ + int8x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); + ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); + return ret; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_p8 (const poly8_t * __a) +{ + poly8x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); + ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); + return ret; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_s16 (const int16_t * __a) +{ + int16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); + ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); + return ret; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_p16 (const poly16_t * __a) +{ + poly16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); + ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); + return ret; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_s32 (const int32_t * __a) +{ + int32x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); + ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); + return ret; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_u8 (const uint8_t * __a) +{ + uint8x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); + ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); + return ret; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_u16 (const uint16_t * __a) +{ + uint16x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); + ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); + return ret; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_u32 (const uint32_t * __a) +{ + uint32x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); + ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); + return ret; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_f32 (const float32_t * __a) +{ + float32x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); + ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); + return ret; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vld2q_s8 (const int8_t * __a) +{ + int8x16x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); + ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); + return ret; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vld2q_p8 (const poly8_t * __a) +{ + poly8x16x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); + ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); + return ret; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vld2q_s16 (const int16_t * __a) +{ + int16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); + ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); + return ret; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vld2q_p16 (const poly16_t * __a) +{ + poly16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); + ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); + return ret; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vld2q_s32 (const int32_t * __a) +{ + int32x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); + ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); + return ret; +} + +__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__)) +vld2q_s64 (const int64_t * __a) +{ + int64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); + ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); + return ret; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vld2q_u8 (const uint8_t * __a) +{ + uint8x16x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); + ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); + return ret; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vld2q_u16 (const uint16_t * __a) +{ + uint16x8x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); + ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); + return ret; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vld2q_u32 (const uint32_t * __a) +{ + uint32x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); + ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); + return ret; +} + +__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__)) +vld2q_u64 (const uint64_t * __a) +{ + uint64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); + ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); + return ret; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vld2q_f32 (const float32_t * __a) +{ + float32x4x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); + ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); + return ret; +} + +__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__)) +vld2q_f64 (const float64_t * __a) +{ + float64x2x2_t ret; + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); + ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); + return ret; +} + +__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) +vld3_s64 (const int64_t * __a) +{ + int64x1x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); + ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); + ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); + return ret; +} + +__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) +vld3_u64 (const uint64_t * __a) +{ + uint64x1x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); + ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); + ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); + return ret; +} + +__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__)) +vld3_f64 (const float64_t * __a) +{ + float64x1x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0); + ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1); + ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2); + return ret; +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_s8 (const int8_t * __a) +{ + int8x8x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); + ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); + ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); + return ret; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_p8 (const poly8_t * __a) +{ + poly8x8x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); + ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); + ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); + return ret; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_s16 (const int16_t * __a) +{ + int16x4x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); + ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); + ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); + return ret; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_p16 (const poly16_t * __a) +{ + poly16x4x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); + ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); + ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); + return ret; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_s32 (const int32_t * __a) +{ + int32x2x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); + ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); + ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); + return ret; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_u8 (const uint8_t * __a) +{ + uint8x8x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); + ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); + ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); + return ret; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_u16 (const uint16_t * __a) +{ + uint16x4x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); + ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); + ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); + return ret; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_u32 (const uint32_t * __a) +{ + uint32x2x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); + ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); + ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); + return ret; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_f32 (const float32_t * __a) +{ + float32x2x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); + ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); + ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); + return ret; +} + +__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) +vld3q_s8 (const int8_t * __a) +{ + int8x16x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); + ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); + ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); + return ret; +} + +__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) +vld3q_p8 (const poly8_t * __a) +{ + poly8x16x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); + ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); + ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); + return ret; +} + +__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) +vld3q_s16 (const int16_t * __a) +{ + int16x8x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); + ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); + ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); + return ret; +} + +__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) +vld3q_p16 (const poly16_t * __a) +{ + poly16x8x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); + ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); + ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); + return ret; +} + +__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) +vld3q_s32 (const int32_t * __a) +{ + int32x4x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); + ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); + ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); + return ret; +} + +__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__)) +vld3q_s64 (const int64_t * __a) +{ + int64x2x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); + ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); + ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); + return ret; +} + +__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) +vld3q_u8 (const uint8_t * __a) +{ + uint8x16x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); + ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); + ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); + return ret; +} + +__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) +vld3q_u16 (const uint16_t * __a) +{ + uint16x8x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); + ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); + ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); + return ret; +} + +__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) +vld3q_u32 (const uint32_t * __a) +{ + uint32x4x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); + ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); + ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); + return ret; +} + +__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__)) +vld3q_u64 (const uint64_t * __a) +{ + uint64x2x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); + ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); + ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); + return ret; +} + +__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) +vld3q_f32 (const float32_t * __a) +{ + float32x4x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); + ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); + ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); + return ret; +} + +__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__)) +vld3q_f64 (const float64_t * __a) +{ + float64x2x3_t ret; + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); + ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); + ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); + return ret; +} + +__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) +vld4_s64 (const int64_t * __a) +{ + int64x1x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return ret; +} + +__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) +vld4_u64 (const uint64_t * __a) +{ + uint64x1x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); + ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); + ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); + ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); + return ret; +} + +__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__)) +vld4_f64 (const float64_t * __a) +{ + float64x1x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0); + ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1); + ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2); + ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3); + return ret; +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_s8 (const int8_t * __a) +{ + int8x8x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); + ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); + ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); + ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); + return ret; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_p8 (const poly8_t * __a) +{ + poly8x8x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); + ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); + ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); + ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); + return ret; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_s16 (const int16_t * __a) +{ + int16x4x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); + ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); + ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); + ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); + return ret; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_p16 (const poly16_t * __a) +{ + poly16x4x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); + ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); + ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); + ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); + return ret; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_s32 (const int32_t * __a) +{ + int32x2x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); + ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); + ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); + ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); + return ret; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_u8 (const uint8_t * __a) +{ + uint8x8x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); + ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); + ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); + ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); + return ret; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_u16 (const uint16_t * __a) +{ + uint16x4x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); + ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); + ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); + ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); + return ret; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_u32 (const uint32_t * __a) +{ + uint32x2x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); + ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); + ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); + ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); + return ret; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_f32 (const float32_t * __a) +{ + float32x2x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); + ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); + ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); + ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); + return ret; +} + +__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) +vld4q_s8 (const int8_t * __a) +{ + int8x16x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); + ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); + ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); + ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); + return ret; +} + +__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) +vld4q_p8 (const poly8_t * __a) +{ + poly8x16x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); + ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); + ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); + ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); + return ret; +} + +__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) +vld4q_s16 (const int16_t * __a) +{ + int16x8x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); + ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); + ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); + ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); + return ret; +} + +__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) +vld4q_p16 (const poly16_t * __a) +{ + poly16x8x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); + ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); + ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); + ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); + return ret; +} + +__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) +vld4q_s32 (const int32_t * __a) +{ + int32x4x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__)) +vld4q_s64 (const int64_t * __a) +{ + int64x2x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); + ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); + ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); + ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); + return ret; +} + +__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) +vld4q_u8 (const uint8_t * __a) +{ + uint8x16x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); + ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); + ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); + ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); + ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); + return ret; +} + +__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) +vld4q_u16 (const uint16_t * __a) +{ + uint16x8x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); + ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); + ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); + ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); + ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); + return ret; +} + +__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) +vld4q_u32 (const uint32_t * __a) +{ + uint32x4x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); + ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); + ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); + ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); + ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); + return ret; +} + +__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__)) +vld4q_u64 (const uint64_t * __a) +{ + uint64x2x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); + ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); + ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); + ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); + ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); + return ret; +} + +__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) +vld4q_f32 (const float32_t * __a) +{ + float32x4x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); + ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); + ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); + ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); + ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); + return ret; +} + +__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__)) +vld4q_f64 (const float64_t * __a) +{ + float64x2x4_t ret; + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); + ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); + ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); + ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); + ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); + return ret; +} + +/* vmax */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmax_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_smax_nanv2sf (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmax_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_smaxv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmax_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_smaxv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmax_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_smaxv2si (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmax_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmax_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmax_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmaxq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_smax_nanv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmaxq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_smax_nanv2df (__a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmaxq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_smaxv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmaxq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_aarch64_smaxv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmaxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_aarch64_smaxv4si (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +/* vmaxnm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmaxnm_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_smaxv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_smaxv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_smaxv2df (__a, __b); +} + +/* vmaxv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), + 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vmaxv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vmaxv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vmaxv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vmaxv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), + 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vmaxv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), + 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vmaxv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), + 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), + 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmaxvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), + 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vmaxvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vmaxvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vmaxvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vmaxvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), + 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vmaxvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), + 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vmaxvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), + 0); +} + +/* vmaxnmv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxnmv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), + 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmaxnmvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmaxnmvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0); +} + +/* vmin */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmin_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_smin_nanv2sf (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmin_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_sminv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmin_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_sminv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmin_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_sminv2si (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmin_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmin_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmin_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vminq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_smin_nanv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vminq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_smin_nanv2df (__a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vminq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_sminv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vminq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_aarch64_sminv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vminq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_aarch64_sminv4si (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vminq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vminq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vminq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +/* vminnm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vminnm_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_sminv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vminnmq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_sminv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vminnmq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_sminv2df (__a, __b); +} + +/* vminv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), + 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vminv_s8 (int8x8_t __a) +{ + return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), + 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vminv_s16 (int16x4_t __a) +{ + return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vminv_s32 (int32x2_t __a) +{ + return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vminv_u8 (uint8x8_t __a) +{ + return vget_lane_u8 ((uint8x8_t) + __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), + 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vminv_u16 (uint16x4_t __a) +{ + return vget_lane_u16 ((uint16x4_t) + __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), + 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vminv_u32 (uint32x2_t __a) +{ + return vget_lane_u32 ((uint32x2_t) + __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), + 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), + 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vminvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), + 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vminvq_s8 (int8x16_t __a) +{ + return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vminvq_s16 (int16x8_t __a) +{ + return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vminvq_s32 (int32x4_t __a) +{ + return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vminvq_u8 (uint8x16_t __a) +{ + return vgetq_lane_u8 ((uint8x16_t) + __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), + 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vminvq_u16 (uint16x8_t __a) +{ + return vgetq_lane_u16 ((uint16x8_t) + __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), + 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vminvq_u32 (uint32x4_t __a) +{ + return vgetq_lane_u32 ((uint32x4_t) + __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), + 0); +} + +/* vminnmv */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminnmv_f32 (float32x2_t __a) +{ + return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vminnmvq_f32 (float32x4_t __a) +{ + return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vminnmvq_f64 (float64x2_t __a) +{ + return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0); +} + +/* vmla */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) +{ + return a + b * c; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) +{ + return a + b * c; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) +{ + return a + b * c; +} + +/* vmla_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_lane_f32 (float32x2_t __a, float32x2_t __b, + float32x2_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_lane_s16 (int16x4_t __a, int16x4_t __b, + int16x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_lane_s32 (int32x2_t __a, int32x2_t __b, + int32x2_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, + uint16x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, + uint32x2_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane))); +} + +/* vmla_laneq */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_laneq_f32 (float32x2_t __a, float32x2_t __b, + float32x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_laneq_s16 (int16x4_t __a, int16x4_t __b, + int16x8_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_laneq_s32 (int32x2_t __a, int32x2_t __b, + int32x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b, + uint16x8_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b, + uint32x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); +} + +/* vmlaq_lane */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, + float32x2_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, + int16x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, + int32x2_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, + uint16x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, + uint32x2_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane))); +} + + /* vmlaq_laneq */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b, + float32x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b, + int16x8_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b, + int32x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, + uint16x8_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, + uint32x4_t __c, const int __lane) +{ + return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); +} + +/* vmls */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) +{ + return a - b * c; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) +{ + return a - b * c; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) +{ + return a - b * c; +} + +/* vmls_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_lane_f32 (float32x2_t __a, float32x2_t __b, + float32x2_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_lane_s16 (int16x4_t __a, int16x4_t __b, + int16x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_lane_s32 (int32x2_t __a, int32x2_t __b, + int32x2_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, + uint16x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, + uint32x2_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane))); +} + +/* vmls_laneq */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_laneq_f32 (float32x2_t __a, float32x2_t __b, + float32x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_laneq_s16 (int16x4_t __a, int16x4_t __b, + int16x8_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_laneq_s32 (int32x2_t __a, int32x2_t __b, + int32x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b, + uint16x8_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b, + uint32x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); +} + +/* vmlsq_lane */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, + float32x2_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, + int16x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, + int32x2_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane))); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, + uint16x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, + uint32x2_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane))); +} + + /* vmlsq_laneq */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b, + float32x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane))); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b, + int16x8_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane))); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b, + int32x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane))); +} +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, + uint16x8_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane))); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, + uint32x4_t __c, const int __lane) +{ + return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane))); +} + +/* vmov_n_ */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmov_n_f32 (float32_t __a) +{ + return vdup_n_f32 (__a); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmov_n_f64 (float64_t __a) +{ + return __a; +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmov_n_p8 (poly8_t __a) +{ + return vdup_n_p8 (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vmov_n_p16 (poly16_t __a) +{ + return vdup_n_p16 (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmov_n_s8 (int8_t __a) +{ + return vdup_n_s8 (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmov_n_s16 (int16_t __a) +{ + return vdup_n_s16 (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmov_n_s32 (int32_t __a) +{ + return vdup_n_s32 (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vmov_n_s64 (int64_t __a) +{ + return __a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmov_n_u8 (uint8_t __a) +{ + return vdup_n_u8 (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmov_n_u16 (uint16_t __a) +{ + return vdup_n_u16 (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmov_n_u32 (uint32_t __a) +{ + return vdup_n_u32 (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vmov_n_u64 (uint64_t __a) +{ + return __a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmovq_n_f32 (float32_t __a) +{ + return vdupq_n_f32 (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmovq_n_f64 (float64_t __a) +{ + return vdupq_n_f64 (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmovq_n_p8 (poly8_t __a) +{ + return vdupq_n_p8 (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmovq_n_p16 (poly16_t __a) +{ + return vdupq_n_p16 (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmovq_n_s8 (int8_t __a) +{ + return vdupq_n_s8 (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovq_n_s16 (int16_t __a) +{ + return vdupq_n_s16 (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovq_n_s32 (int32_t __a) +{ + return vdupq_n_s32 (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovq_n_s64 (int64_t __a) +{ + return vdupq_n_s64 (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmovq_n_u8 (uint8_t __a) +{ + return vdupq_n_u8 (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovq_n_u16 (uint16_t __a) +{ + return vdupq_n_u16 (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovq_n_u32 (uint32_t __a) +{ + return vdupq_n_u32 (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovq_n_u64 (uint64_t __a) +{ + return vdupq_n_u64 (__a); +} + +/* vmul_lane */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_f32 (__b, __lane); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane) +{ + return __a * __b; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_s16 (__b, __lane); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_s32 (__b, __lane); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_u16 (__b, __lane); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_u32 (__b, __lane); +} + +/* vmul_laneq */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_f32 (__b, __lane); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_f64 (__b, __lane); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_s16 (__b, __lane); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_s32 (__b, __lane); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_u16 (__b, __lane); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_u32 (__b, __lane); +} + +/* vmulq_lane */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_f32 (__b, __lane); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane) +{ + return __a * __b; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_s16 (__b, __lane); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_s32 (__b, __lane); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_u16 (__b, __lane); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane) +{ + return __a * __aarch64_vget_lane_u32 (__b, __lane); +} + +/* vmulq_laneq */ + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_f32 (__b, __lane); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_f64 (__b, __lane); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_s16 (__b, __lane); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_s32 (__b, __lane); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_u16 (__b, __lane); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane) +{ + return __a * __aarch64_vgetq_lane_u32 (__b, __lane); +} + +/* vneg */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vneg_f32 (float32x2_t __a) +{ + return -__a; +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vneg_f64 (float64x1_t __a) +{ + return -__a; +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vneg_s8 (int8x8_t __a) +{ + return -__a; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vneg_s16 (int16x4_t __a) +{ + return -__a; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vneg_s32 (int32x2_t __a) +{ + return -__a; +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vneg_s64 (int64x1_t __a) +{ + return -__a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vnegq_f32 (float32x4_t __a) +{ + return -__a; +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vnegq_f64 (float64x2_t __a) +{ + return -__a; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vnegq_s8 (int8x16_t __a) +{ + return -__a; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vnegq_s16 (int16x8_t __a) +{ + return -__a; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vnegq_s32 (int32x4_t __a) +{ + return -__a; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vnegq_s64 (int64x2_t __a) +{ + return -__a; +} + +/* vqabs */ + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqabsq_s64 (int64x2_t __a) +{ + return (int64x2_t) __builtin_aarch64_sqabsv2di (__a); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqabsb_s8 (int8x1_t __a) +{ + return (int8x1_t) __builtin_aarch64_sqabsqi (__a); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqabsh_s16 (int16x1_t __a) +{ + return (int16x1_t) __builtin_aarch64_sqabshi (__a); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqabss_s32 (int32x1_t __a) +{ + return (int32x1_t) __builtin_aarch64_sqabssi (__a); +} + +/* vqadd */ + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqaddb_s8 (int8x1_t __a, int8x1_t __b) +{ + return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqaddh_s16 (int16x1_t __a, int16x1_t __b) +{ + return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqadds_s32 (int32x1_t __a, int32x1_t __b) +{ + return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqaddd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqaddb_u8 (uint8x1_t __a, uint8x1_t __b) +{ + return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqaddh_u16 (uint16x1_t __a, uint16x1_t __b) +{ + return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqadds_u32 (uint32x1_t __a, uint32x1_t __b) +{ + return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqaddd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b); +} + +/* vqdmlal */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) +{ + return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) +{ + int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); + return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) +{ + return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_sqdmlalv2si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) +{ + return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) +{ + int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); + return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) +{ + return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) +{ + return __builtin_aarch64_sqdmlalhi (__a, __b, __c); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) +{ + return __builtin_aarch64_sqdmlalsi (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); +} + +/* vqdmlsl */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) +{ + return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) +{ + int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (__AARCH64_INT64_C (0))); + return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) +{ + return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_sqdmlslv2si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, + int const __d) +{ + return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) +{ + return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) +{ + int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (__AARCH64_INT64_C (0))); + return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) +{ + return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c) +{ + return __builtin_aarch64_sqdmlslhi (__a, __b, __c); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c) +{ + return __builtin_aarch64_sqdmlslsi (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); +} + +/* vqdmulh */ + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqdmulhh_s16 (int16x1_t __a, int16x1_t __b) +{ + return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmulhs_s32 (int32x1_t __a, int32x1_t __b) +{ + return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); +} + +/* vqdmull */ + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_sqdmullv4hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_aarch64_sqdmull2v8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c) +{ + return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c) +{ + return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) +{ + return __builtin_aarch64_sqdmull2_nv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) +{ + int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (__AARCH64_INT64_C (0))); + return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c) +{ + return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_n_s16 (int16x4_t __a, int16_t __b) +{ + return __builtin_aarch64_sqdmull_nv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_sqdmullv2si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_aarch64_sqdmull2v4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c) +{ + return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c) +{ + return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) +{ + return __builtin_aarch64_sqdmull2_nv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) +{ + int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (__AARCH64_INT64_C (0))); + return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c) +{ + return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_n_s32 (int32x2_t __a, int32_t __b) +{ + return __builtin_aarch64_sqdmull_nv2si (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmullh_s16 (int16x1_t __a, int16x1_t __b) +{ + return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqdmulls_s32 (int32x1_t __a, int32x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); +} + +/* vqmovn */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqmovn_s16 (int16x8_t __a) +{ + return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqmovn_s32 (int32x4_t __a) +{ + return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqmovn_s64 (int64x2_t __a) +{ + return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqmovn_u16 (uint16x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqmovn_u32 (uint32x4_t __a) +{ + return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqmovn_u64 (uint64x2_t __a) +{ + return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqmovnh_s16 (int16x1_t __a) +{ + return (int8x1_t) __builtin_aarch64_sqmovnhi (__a); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqmovns_s32 (int32x1_t __a) +{ + return (int16x1_t) __builtin_aarch64_sqmovnsi (__a); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqmovnd_s64 (int64x1_t __a) +{ + return (int32x1_t) __builtin_aarch64_sqmovndi (__a); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqmovnh_u16 (uint16x1_t __a) +{ + return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqmovns_u32 (uint32x1_t __a) +{ + return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqmovnd_u64 (uint64x1_t __a) +{ + return (uint32x1_t) __builtin_aarch64_uqmovndi (__a); +} + +/* vqmovun */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqmovun_s16 (int16x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqmovun_s32 (int32x4_t __a) +{ + return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqmovun_s64 (int64x2_t __a) +{ + return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqmovunh_s16 (int16x1_t __a) +{ + return (int8x1_t) __builtin_aarch64_sqmovunhi (__a); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqmovuns_s32 (int32x1_t __a) +{ + return (int16x1_t) __builtin_aarch64_sqmovunsi (__a); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqmovund_s64 (int64x1_t __a) +{ + return (int32x1_t) __builtin_aarch64_sqmovundi (__a); +} + +/* vqneg */ + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqnegq_s64 (int64x2_t __a) +{ + return (int64x2_t) __builtin_aarch64_sqnegv2di (__a); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqnegb_s8 (int8x1_t __a) +{ + return (int8x1_t) __builtin_aarch64_sqnegqi (__a); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqnegh_s16 (int16x1_t __a) +{ + return (int16x1_t) __builtin_aarch64_sqneghi (__a); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqnegs_s32 (int32x1_t __a) +{ + return (int32x1_t) __builtin_aarch64_sqnegsi (__a); +} + +/* vqrdmulh */ + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b) +{ + return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b) +{ + return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c) +{ + return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); +} + +/* vqrshl */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqrshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_sqrshlv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_sqrshlv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_sqrshlv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqrshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return __builtin_aarch64_sqrshldi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqrshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqrshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_sqrshlv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_aarch64_sqrshlv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_aarch64_sqrshlv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqrshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __builtin_aarch64_sqrshlv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqrshlb_s8 (int8x1_t __a, int8x1_t __b) +{ + return __builtin_aarch64_sqrshlqi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqrshlh_s16 (int16x1_t __a, int16x1_t __b) +{ + return __builtin_aarch64_sqrshlhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqrshls_s32 (int32x1_t __a, int32x1_t __b) +{ + return __builtin_aarch64_sqrshlsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqrshld_s64 (int64x1_t __a, int64x1_t __b) +{ + return __builtin_aarch64_sqrshldi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b) +{ + return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b) +{ + return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqrshls_u32 (uint32x1_t __a, uint32x1_t __b) +{ + return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqrshld_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b); +} + +/* vqrshrn */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqrshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqrshrnh_n_s16 (int16x1_t __a, const int __b) +{ + return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqrshrns_n_s32 (int32x1_t __a, const int __b) +{ + return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqrshrnd_n_s64 (int64x1_t __a, const int __b) +{ + return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqrshrnh_n_u16 (uint16x1_t __a, const int __b) +{ + return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqrshrns_n_u32 (uint32x1_t __a, const int __b) +{ + return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqrshrnd_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); +} + +/* vqrshrun */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshrun_n_s16 (int16x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshrun_n_s32 (int32x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshrun_n_s64 (int64x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqrshrunh_n_s16 (int16x1_t __a, const int __b) +{ + return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqrshruns_n_s32 (int32x1_t __a, const int __b) +{ + return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqrshrund_n_s64 (int64x1_t __a, const int __b) +{ + return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); +} + +/* vqshl */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_sqshlv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return __builtin_aarch64_sqshlv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return __builtin_aarch64_sqshlv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return __builtin_aarch64_sqshldi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_sqshlv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return __builtin_aarch64_sqshlv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return __builtin_aarch64_sqshlv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return __builtin_aarch64_sqshlv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqshlb_s8 (int8x1_t __a, int8x1_t __b) +{ + return __builtin_aarch64_sqshlqi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqshlh_s16 (int16x1_t __a, int16x1_t __b) +{ + return __builtin_aarch64_sqshlhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqshls_s32 (int32x1_t __a, int32x1_t __b) +{ + return __builtin_aarch64_sqshlsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshld_s64 (int64x1_t __a, int64x1_t __b) +{ + return __builtin_aarch64_sqshldi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqshlb_u8 (uint8x1_t __a, uint8x1_t __b) +{ + return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqshlh_u16 (uint16x1_t __a, uint16x1_t __b) +{ + return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqshls_u32 (uint32x1_t __a, uint32x1_t __b) +{ + return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshld_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshl_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshl_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshl_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshl_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshl_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshl_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshl_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshl_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqshlq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqshlq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqshlq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqshlq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshlq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshlq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshlq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshlq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqshlb_n_s8 (int8x1_t __a, const int __b) +{ + return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqshlh_n_s16 (int16x1_t __a, const int __b) +{ + return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqshls_n_s32 (int32x1_t __a, const int __b) +{ + return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshld_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqshlb_n_u8 (uint8x1_t __a, const int __b) +{ + return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqshlh_n_u16 (uint16x1_t __a, const int __b) +{ + return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqshls_n_u32 (uint32x1_t __a, const int __b) +{ + return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshld_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b); +} + +/* vqshlu */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshlu_n_s8 (int8x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshlu_n_s16 (int16x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshlu_n_s32 (int32x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshlu_n_s64 (int64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshluq_n_s8 (int8x16_t __a, const int __b) +{ + return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshluq_n_s16 (int16x8_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshluq_n_s32 (int32x4_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshluq_n_s64 (int64x2_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqshlub_n_s8 (int8x1_t __a, const int __b) +{ + return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqshluh_n_s16 (int16x1_t __a, const int __b) +{ + return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqshlus_n_s32 (int32x1_t __a, const int __b) +{ + return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshlud_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); +} + +/* vqshrn */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqshrnh_n_s16 (int16x1_t __a, const int __b) +{ + return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqshrns_n_s32 (int32x1_t __a, const int __b) +{ + return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqshrnd_n_s64 (int64x1_t __a, const int __b) +{ + return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqshrnh_n_u16 (uint16x1_t __a, const int __b) +{ + return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqshrns_n_u32 (uint32x1_t __a, const int __b) +{ + return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqshrnd_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b); +} + +/* vqshrun */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshrun_n_s16 (int16x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshrun_n_s32 (int32x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshrun_n_s64 (int64x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqshrunh_n_s16 (int16x1_t __a, const int __b) +{ + return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqshruns_n_s32 (int32x1_t __a, const int __b) +{ + return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqshrund_n_s64 (int64x1_t __a, const int __b) +{ + return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b); +} + +/* vqsub */ + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vqsubb_s8 (int8x1_t __a, int8x1_t __b) +{ + return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vqsubh_s16 (int16x1_t __a, int16x1_t __b) +{ + return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vqsubs_s32 (int32x1_t __a, int32x1_t __b) +{ + return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqsubd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vqsubb_u8 (uint8x1_t __a, uint8x1_t __b) +{ + return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vqsubh_u16 (uint16x1_t __a, uint16x1_t __b) +{ + return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vqsubs_u32 (uint32x1_t __a, uint32x1_t __b) +{ + return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); +} + +/* vrecpe */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpes_f32 (float32_t __a) +{ + return __builtin_aarch64_frecpesf (__a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecped_f64 (float64_t __a) +{ + return __builtin_aarch64_frecpedf (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecpe_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frecpev2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpeq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frecpev4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrecpeq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frecpev2df (__a); +} + +/* vrecps */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpss_f32 (float32_t __a, float32_t __b) +{ + return __builtin_aarch64_frecpssf (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecpsd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_aarch64_frecpsdf (__a, __b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecps_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_frecpsv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_frecpsv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrecpsq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_frecpsv2df (__a, __b); +} + +/* vrecpx */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpxs_f32 (float32_t __a) +{ + return __builtin_aarch64_frecpxsf (__a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecpxd_f64 (float64_t __a) +{ + return __builtin_aarch64_frecpxdf (__a); +} + +/* vrnd */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnd_f32 (float32x2_t __a) +{ + return __builtin_aarch64_btruncv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_btruncv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_btruncv2df (__a); +} + +/* vrnda */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnda_f32 (float32x2_t __a) +{ + return __builtin_aarch64_roundv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndaq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_roundv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndaq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_roundv2df (__a); +} + +/* vrndi */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndi_f32 (float32x2_t __a) +{ + return __builtin_aarch64_nearbyintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndiq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_nearbyintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndiq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_nearbyintv2df (__a); +} + +/* vrndm */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndm_f32 (float32x2_t __a) +{ + return __builtin_aarch64_floorv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndmq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_floorv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndmq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_floorv2df (__a); +} + +/* vrndn */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndn_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frintnv2sf (__a); +} +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndnq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frintnv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndnq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frintnv2df (__a); +} + +/* vrndp */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndp_f32 (float32x2_t __a) +{ + return __builtin_aarch64_ceilv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndpq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_ceilv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndpq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_ceilv2df (__a); +} + +/* vrndx */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndx_f32 (float32x2_t __a) +{ + return __builtin_aarch64_rintv2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndxq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_rintv4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrndxq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_rintv2df (__a); +} + +/* vrshl */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_srshldi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshld_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_srshldi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshld_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b); +} + +/* vrshr */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshr_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshr_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshr_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshr_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshr_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshr_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshr_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshr_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrshrq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrshrq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrshrq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrshrq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrshrq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrshrq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrshrq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrshrq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshrd_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshrd_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b); +} + +/* vrsra */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a, + (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a, + (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a, + (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a, + (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a, + (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a, + (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a, + (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a, + (int64x2_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO + +/* vsha1 */ + +static __inline uint32x4_t +vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); +} +static __inline uint32x4_t +vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); +} +static __inline uint32x4_t +vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); +} + +static __inline uint32_t +vsha1h_u32 (uint32_t hash_e) +{ + return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); +} + +static __inline uint32x4_t +vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) +{ + return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); +} + +static __inline uint32x4_t +vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) +{ + return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); +} + +static __inline uint32x4_t +vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); +} + +static __inline uint32x4_t +vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) +{ + return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); +} + +static __inline uint32x4_t +vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) +{ + return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); +} + +static __inline uint32x4_t +vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) +{ + return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); +} + +static __inline poly128_t +vmull_p64 (poly64_t a, poly64_t b) +{ + return + __builtin_aarch64_crypto_pmulldi_ppp (a, b); +} + +static __inline poly128_t +vmull_high_p64 (poly64x2_t a, poly64x2_t b) +{ + return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); +} + +#endif + +/* vshl */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshl_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshl_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshl_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshl_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshl_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshl_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshl_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshl_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshlq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshlq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshlq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshlq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshlq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshlq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshlq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshlq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshld_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshld_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshld_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshld_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshll_high_n_s8 (int8x16_t __a, const int __b) +{ + return __builtin_aarch64_sshll2_nv16qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshll_high_n_s16 (int16x8_t __a, const int __b) +{ + return __builtin_aarch64_sshll2_nv8hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshll_high_n_s32 (int32x4_t __a, const int __b) +{ + return __builtin_aarch64_sshll2_nv4si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshll_high_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshll_high_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshll_high_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshll_n_s8 (int8x8_t __a, const int __b) +{ + return __builtin_aarch64_sshll_nv8qi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshll_n_s16 (int16x4_t __a, const int __b) +{ + return __builtin_aarch64_sshll_nv4hi (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshll_n_s32 (int32x2_t __a, const int __b) +{ + return __builtin_aarch64_sshll_nv2si (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshll_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshll_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshll_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b); +} + +/* vshr */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshr_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshr_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshr_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshr_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshr_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshr_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshr_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshr_n_u64 (uint64x1_t __a, const int __b) +{ + return __builtin_aarch64_lshr_simddi_uus ( __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshrq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshrq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshrq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshrq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshrq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshrq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshrq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshrq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshrd_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t) __builtin_aarch64_ashr_simddi (__a, __b); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vshrd_n_u64 (uint64_t __a, const int __b) +{ + return __builtin_aarch64_lshr_simddi_uus (__a, __b); +} + +/* vsli */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a, + (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a, + (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a, + (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a, + (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a, + (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a, + (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a, + (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a, + (int64x2_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c); +} + +/* vsqadd */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsqadd_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsqadd_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsqadd_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsqadd_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) +vsqaddb_u8 (uint8x1_t __a, int8x1_t __b) +{ + return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b); +} + +__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) +vsqaddh_u16 (uint16x1_t __a, int16x1_t __b) +{ + return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b); +} + +__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) +vsqadds_u32 (uint32x1_t __a, int32x1_t __b) +{ + return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsqaddd_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); +} + +/* vsqrt */ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vsqrt_f32 (float32x2_t a) +{ + return __builtin_aarch64_sqrtv2sf (a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vsqrtq_f32 (float32x4_t a) +{ + return __builtin_aarch64_sqrtv4sf (a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vsqrtq_f64 (float64x2_t a) +{ + return __builtin_aarch64_sqrtv2df (a); +} + +/* vsra */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a, + (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a, + (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a, + (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a, + (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a, + (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a, + (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a, + (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a, + (int64x2_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c); +} + +/* vsri */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a, + (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a, + (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a, + (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a, + (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a, + (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a, + (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a, + (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a, + (int64x2_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c); +} + +/* vst1 */ + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f32 (float32_t *a, float32x2_t b) +{ + __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f64 (float64_t *a, float64x1_t b) +{ + *a = b; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p8 (poly8_t *a, poly8x8_t b) +{ + __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, + (int8x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p16 (poly16_t *a, poly16x4_t b) +{ + __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, + (int16x4_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s8 (int8_t *a, int8x8_t b) +{ + __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s16 (int16_t *a, int16x4_t b) +{ + __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s32 (int32_t *a, int32x2_t b) +{ + __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s64 (int64_t *a, int64x1_t b) +{ + *a = b; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u8 (uint8_t *a, uint8x8_t b) +{ + __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, + (int8x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u16 (uint16_t *a, uint16x4_t b) +{ + __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, + (int16x4_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u32 (uint32_t *a, uint32x2_t b) +{ + __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, + (int32x2_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u64 (uint64_t *a, uint64x1_t b) +{ + *a = b; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f32 (float32_t *a, float32x4_t b) +{ + __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f64 (float64_t *a, float64x2_t b) +{ + __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); +} + +/* vst1q */ + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p8 (poly8_t *a, poly8x16_t b) +{ + __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, + (int8x16_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p16 (poly16_t *a, poly16x8_t b) +{ + __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, + (int16x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s8 (int8_t *a, int8x16_t b) +{ + __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s16 (int16_t *a, int16x8_t b) +{ + __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s32 (int32_t *a, int32x4_t b) +{ + __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s64 (int64_t *a, int64x2_t b) +{ + __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u8 (uint8_t *a, uint8x16_t b) +{ + __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, + (int8x16_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u16 (uint16_t *a, uint16x8_t b) +{ + __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, + (int16x8_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u32 (uint32_t *a, uint32x4_t b) +{ + __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, + (int32x4_t) b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u64 (uint64_t *a, uint64x2_t b) +{ + __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, + (int64x2_t) b); +} + +/* vstn */ + +__extension__ static __inline void +vst2_s64 (int64_t * __a, int64x1x2_t val) +{ + __builtin_aarch64_simd_oi __o; + int64x2x2_t temp; + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); + __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void +vst2_u64 (uint64_t * __a, uint64x1x2_t val) +{ + __builtin_aarch64_simd_oi __o; + uint64x2x2_t temp; + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); + __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void +vst2_f64 (float64_t * __a, float64x1x2_t val) +{ + __builtin_aarch64_simd_oi __o; + float64x2x2_t temp; + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); + __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); +} + +__extension__ static __inline void +vst2_s8 (int8_t * __a, int8x8x2_t val) +{ + __builtin_aarch64_simd_oi __o; + int8x16x2_t temp; + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); + __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p8 (poly8_t * __a, poly8x8x2_t val) +{ + __builtin_aarch64_simd_oi __o; + poly8x16x2_t temp; + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); + __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s16 (int16_t * __a, int16x4x2_t val) +{ + __builtin_aarch64_simd_oi __o; + int16x8x2_t temp; + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); + __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p16 (poly16_t * __a, poly16x4x2_t val) +{ + __builtin_aarch64_simd_oi __o; + poly16x8x2_t temp; + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); + __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s32 (int32_t * __a, int32x2x2_t val) +{ + __builtin_aarch64_simd_oi __o; + int32x4x2_t temp; + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); + __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u8 (uint8_t * __a, uint8x8x2_t val) +{ + __builtin_aarch64_simd_oi __o; + uint8x16x2_t temp; + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); + __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u16 (uint16_t * __a, uint16x4x2_t val) +{ + __builtin_aarch64_simd_oi __o; + uint16x8x2_t temp; + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); + __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u32 (uint32_t * __a, uint32x2x2_t val) +{ + __builtin_aarch64_simd_oi __o; + uint32x4x2_t temp; + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); + __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_f32 (float32_t * __a, float32x2x2_t val) +{ + __builtin_aarch64_simd_oi __o; + float32x4x2_t temp; + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); + __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s8 (int8_t * __a, int8x16x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); + __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_p8 (poly8_t * __a, poly8x16x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); + __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s16 (int16_t * __a, int16x8x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); + __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_p16 (poly16_t * __a, poly16x8x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); + __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s32 (int32_t * __a, int32x4x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); + __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s64 (int64_t * __a, int64x2x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); + __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u8 (uint8_t * __a, uint8x16x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); + __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u16 (uint16_t * __a, uint16x8x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); + __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u32 (uint32_t * __a, uint32x4x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); + __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u64 (uint64_t * __a, uint64x2x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); + __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_f32 (float32_t * __a, float32x4x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); + __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_f64 (float64_t * __a, float64x2x2_t val) +{ + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); + __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); +} + +__extension__ static __inline void +vst3_s64 (int64_t * __a, int64x1x3_t val) +{ + __builtin_aarch64_simd_ci __o; + int64x2x3_t temp; + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); + __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void +vst3_u64 (uint64_t * __a, uint64x1x3_t val) +{ + __builtin_aarch64_simd_ci __o; + uint64x2x3_t temp; + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); + __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void +vst3_f64 (float64_t * __a, float64x1x3_t val) +{ + __builtin_aarch64_simd_ci __o; + float64x2x3_t temp; + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); + __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); +} + +__extension__ static __inline void +vst3_s8 (int8_t * __a, int8x8x3_t val) +{ + __builtin_aarch64_simd_ci __o; + int8x16x3_t temp; + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p8 (poly8_t * __a, poly8x8x3_t val) +{ + __builtin_aarch64_simd_ci __o; + poly8x16x3_t temp; + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s16 (int16_t * __a, int16x4x3_t val) +{ + __builtin_aarch64_simd_ci __o; + int16x8x3_t temp; + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p16 (poly16_t * __a, poly16x4x3_t val) +{ + __builtin_aarch64_simd_ci __o; + poly16x8x3_t temp; + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s32 (int32_t * __a, int32x2x3_t val) +{ + __builtin_aarch64_simd_ci __o; + int32x4x3_t temp; + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); + __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u8 (uint8_t * __a, uint8x8x3_t val) +{ + __builtin_aarch64_simd_ci __o; + uint8x16x3_t temp; + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u16 (uint16_t * __a, uint16x4x3_t val) +{ + __builtin_aarch64_simd_ci __o; + uint16x8x3_t temp; + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u32 (uint32_t * __a, uint32x2x3_t val) +{ + __builtin_aarch64_simd_ci __o; + uint32x4x3_t temp; + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); + __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_f32 (float32_t * __a, float32x2x3_t val) +{ + __builtin_aarch64_simd_ci __o; + float32x4x3_t temp; + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); + __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s8 (int8_t * __a, int8x16x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_p8 (poly8_t * __a, poly8x16x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s16 (int16_t * __a, int16x8x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_p16 (poly16_t * __a, poly16x8x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s32 (int32_t * __a, int32x4x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); + __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s64 (int64_t * __a, int64x2x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); + __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u8 (uint8_t * __a, uint8x16x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u16 (uint16_t * __a, uint16x8x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u32 (uint32_t * __a, uint32x4x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); + __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u64 (uint64_t * __a, uint64x2x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); + __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_f32 (float32_t * __a, float32x4x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); + __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_f64 (float64_t * __a, float64x2x3_t val) +{ + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); + __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); +} + +__extension__ static __inline void +vst4_s64 (int64_t * __a, int64x1x4_t val) +{ + __builtin_aarch64_simd_xi __o; + int64x2x4_t temp; + temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); + __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void +vst4_u64 (uint64_t * __a, uint64x1x4_t val) +{ + __builtin_aarch64_simd_xi __o; + uint64x2x4_t temp; + temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); + __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void +vst4_f64 (float64_t * __a, float64x1x4_t val) +{ + __builtin_aarch64_simd_xi __o; + float64x2x4_t temp; + temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3); + __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); +} + +__extension__ static __inline void +vst4_s8 (int8_t * __a, int8x8x4_t val) +{ + __builtin_aarch64_simd_xi __o; + int8x16x4_t temp; + temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p8 (poly8_t * __a, poly8x8x4_t val) +{ + __builtin_aarch64_simd_xi __o; + poly8x16x4_t temp; + temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s16 (int16_t * __a, int16x4x4_t val) +{ + __builtin_aarch64_simd_xi __o; + int16x8x4_t temp; + temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p16 (poly16_t * __a, poly16x4x4_t val) +{ + __builtin_aarch64_simd_xi __o; + poly16x8x4_t temp; + temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s32 (int32_t * __a, int32x2x4_t val) +{ + __builtin_aarch64_simd_xi __o; + int32x4x4_t temp; + temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); + temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); + __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u8 (uint8_t * __a, uint8x8x4_t val) +{ + __builtin_aarch64_simd_xi __o; + uint8x16x4_t temp; + temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u16 (uint16_t * __a, uint16x4x4_t val) +{ + __builtin_aarch64_simd_xi __o; + uint16x8x4_t temp; + temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u32 (uint32_t * __a, uint32x2x4_t val) +{ + __builtin_aarch64_simd_xi __o; + uint32x4x4_t temp; + temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); + __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_f32 (float32_t * __a, float32x2x4_t val) +{ + __builtin_aarch64_simd_xi __o; + float32x4x4_t temp; + temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); + temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3); + __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s8 (int8_t * __a, int8x16x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_p8 (poly8_t * __a, poly8x16x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s16 (int16_t * __a, int16x8x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_p16 (poly16_t * __a, poly16x8x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s32 (int32_t * __a, int32x4x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); + __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s64 (int64_t * __a, int64x2x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); + __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u8 (uint8_t * __a, uint8x16x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u16 (uint16_t * __a, uint16x8x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u32 (uint32_t * __a, uint32x4x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); + __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u64 (uint64_t * __a, uint64x2x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); + __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_f32 (float32_t * __a, float32x4x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3); + __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_f64 (float64_t * __a, float64x2x4_t val) +{ + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3); + __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); +} + +/* vsub */ + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsubd_s64 (int64x1_t __a, int64x1_t __b) +{ + return __a - __b; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsubd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return __a - __b; +} + +/* vtbx1 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), + vmov_n_u8 (8)); + int8x8_t __tbl = vtbl1_s8 (__tab, __idx); + + return vbsl_s8 (__mask, __tbl, __r); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); + uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); + + return vbsl_u8 (__mask, __tbl, __r); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); + poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); + + return vbsl_p8 (__mask, __tbl, __r); +} + +/* vtbx3 */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), + vmov_n_u8 (24)); + int8x8_t __tbl = vtbl3_s8 (__tab, __idx); + + return vbsl_s8 (__mask, __tbl, __r); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); + uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); + + return vbsl_u8 (__mask, __tbl, __r); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) +{ + uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); + poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); + + return vbsl_p8 (__mask, __tbl, __r); +} + +/* vtrn */ + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vtrn_f32 (float32x2_t a, float32x2_t b) +{ + return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vtrn_p8 (poly8x8_t a, poly8x8_t b) +{ + return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vtrn_p16 (poly16x4_t a, poly16x4_t b) +{ + return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vtrn_s8 (int8x8_t a, int8x8_t b) +{ + return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vtrn_s16 (int16x4_t a, int16x4_t b) +{ + return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vtrn_s32 (int32x2_t a, int32x2_t b) +{ + return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vtrn_u8 (uint8x8_t a, uint8x8_t b) +{ + return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vtrn_u16 (uint16x4_t a, uint16x4_t b) +{ + return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vtrn_u32 (uint32x2_t a, uint32x2_t b) +{ + return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_f32 (float32x4_t a, float32x4_t b) +{ + return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_p8 (poly8x16_t a, poly8x16_t b) +{ + return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_p16 (poly16x8_t a, poly16x8_t b) +{ + return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_s8 (int8x16_t a, int8x16_t b) +{ + return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_s16 (int16x8_t a, int16x8_t b) +{ + return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_s32 (int32x4_t a, int32x4_t b) +{ + return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_u8 (uint8x16_t a, uint8x16_t b) +{ + return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_u16 (uint16x8_t a, uint16x8_t b) +{ + return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_u32 (uint32x4_t a, uint32x4_t b) +{ + return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; +} + +/* vtst */ + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_s64 (int64x1_t __a, int64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a, + (int8x8_t) __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a, + (int16x4_t) __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a, + (int32x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtstq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a, + (int8x16_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a, + (int16x8_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a, + (int32x4_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtstq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a, + (int64x2_t) __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtstd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtstd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (__a & __b) ? -1ll : 0ll; +} + +/* vuqadd */ + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuqadd_s8 (int8x8_t __a, uint8x8_t __b) +{ + return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuqadd_s16 (int16x4_t __a, uint16x4_t __b) +{ + return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuqadd_s32 (int32x2_t __a, uint32x2_t __b) +{ + return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vuqadd_s64 (int64x1_t __a, uint64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) +{ + return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) +{ + return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) +{ + return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) +{ + return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); +} + +__extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) +{ + return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); +} + +__extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) +vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) +{ + return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); +} + +__extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) +vuqadds_s32 (int32x1_t __a, uint32x1_t __b) +{ + return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) +{ + return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); +} + +#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ + __extension__ static __inline rettype \ + __attribute__ ((__always_inline__)) \ + v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \ + { \ + return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \ + v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \ + } + +#define __INTERLEAVE_LIST(op) \ + __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \ + __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \ + __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \ + __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \ + __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \ + __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \ + __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \ + __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \ + __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \ + __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \ + __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \ + __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \ + __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \ + __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \ + __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \ + __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \ + __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \ + __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q) + +/* vuzp */ + +__INTERLEAVE_LIST (uzp) + +/* vzip */ + +__INTERLEAVE_LIST (zip) + +#undef __INTERLEAVE_LIST +#undef __DEFINTERLEAVE + +/* End of optimal implementations in approved order. */ + +#undef __aarch64_vget_lane_any +#undef __aarch64_vget_lane_f32 +#undef __aarch64_vget_lane_f64 +#undef __aarch64_vget_lane_p8 +#undef __aarch64_vget_lane_p16 +#undef __aarch64_vget_lane_s8 +#undef __aarch64_vget_lane_s16 +#undef __aarch64_vget_lane_s32 +#undef __aarch64_vget_lane_s64 +#undef __aarch64_vget_lane_u8 +#undef __aarch64_vget_lane_u16 +#undef __aarch64_vget_lane_u32 +#undef __aarch64_vget_lane_u64 + +#undef __aarch64_vgetq_lane_f32 +#undef __aarch64_vgetq_lane_f64 +#undef __aarch64_vgetq_lane_p8 +#undef __aarch64_vgetq_lane_p16 +#undef __aarch64_vgetq_lane_s8 +#undef __aarch64_vgetq_lane_s16 +#undef __aarch64_vgetq_lane_s32 +#undef __aarch64_vgetq_lane_s64 +#undef __aarch64_vgetq_lane_u8 +#undef __aarch64_vgetq_lane_u16 +#undef __aarch64_vgetq_lane_u32 +#undef __aarch64_vgetq_lane_u64 + +#undef __aarch64_vdup_lane_any +#undef __aarch64_vdup_lane_f32 +#undef __aarch64_vdup_lane_f64 +#undef __aarch64_vdup_lane_p8 +#undef __aarch64_vdup_lane_p16 +#undef __aarch64_vdup_lane_s8 +#undef __aarch64_vdup_lane_s16 +#undef __aarch64_vdup_lane_s32 +#undef __aarch64_vdup_lane_s64 +#undef __aarch64_vdup_lane_u8 +#undef __aarch64_vdup_lane_u16 +#undef __aarch64_vdup_lane_u32 +#undef __aarch64_vdup_lane_u64 +#undef __aarch64_vdup_laneq_f32 +#undef __aarch64_vdup_laneq_f64 +#undef __aarch64_vdup_laneq_p8 +#undef __aarch64_vdup_laneq_p16 +#undef __aarch64_vdup_laneq_s8 +#undef __aarch64_vdup_laneq_s16 +#undef __aarch64_vdup_laneq_s32 +#undef __aarch64_vdup_laneq_s64 +#undef __aarch64_vdup_laneq_u8 +#undef __aarch64_vdup_laneq_u16 +#undef __aarch64_vdup_laneq_u32 +#undef __aarch64_vdup_laneq_u64 +#undef __aarch64_vdupq_lane_f32 +#undef __aarch64_vdupq_lane_f64 +#undef __aarch64_vdupq_lane_p8 +#undef __aarch64_vdupq_lane_p16 +#undef __aarch64_vdupq_lane_s8 +#undef __aarch64_vdupq_lane_s16 +#undef __aarch64_vdupq_lane_s32 +#undef __aarch64_vdupq_lane_s64 +#undef __aarch64_vdupq_lane_u8 +#undef __aarch64_vdupq_lane_u16 +#undef __aarch64_vdupq_lane_u32 +#undef __aarch64_vdupq_lane_u64 +#undef __aarch64_vdupq_laneq_f32 +#undef __aarch64_vdupq_laneq_f64 +#undef __aarch64_vdupq_laneq_p8 +#undef __aarch64_vdupq_laneq_p16 +#undef __aarch64_vdupq_laneq_s8 +#undef __aarch64_vdupq_laneq_s16 +#undef __aarch64_vdupq_laneq_s32 +#undef __aarch64_vdupq_laneq_s64 +#undef __aarch64_vdupq_laneq_u8 +#undef __aarch64_vdupq_laneq_u16 +#undef __aarch64_vdupq_laneq_u32 +#undef __aarch64_vdupq_laneq_u64 + +#endif diff --git a/gcc-4.9/gcc/config/aarch64/atomics.md b/gcc-4.9/gcc/config/aarch64/atomics.md new file mode 100644 index 000000000..bffa465de --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/atomics.md @@ -0,0 +1,382 @@ +;; Machine description for AArch64 processor synchronization primitives. +;; Copyright (C) 2009-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_c_enum "unspecv" + [ + UNSPECV_LX ; Represent a load-exclusive. + UNSPECV_SX ; Represent a store-exclusive. + UNSPECV_LDA ; Represent an atomic load or load-acquire. + UNSPECV_STL ; Represent an atomic store or store-release. + UNSPECV_ATOMIC_CMPSW ; Represent an atomic compare swap. + UNSPECV_ATOMIC_EXCHG ; Represent an atomic exchange. + UNSPECV_ATOMIC_OP ; Represent an atomic operation. +]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool out + (match_operand:ALLI 1 "register_operand" "") ;; val out + (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory + (match_operand:ALLI 3 "general_operand" "") ;; expected + (match_operand:ALLI 4 "register_operand" "") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f + "" + { + aarch64_expand_compare_and_swap (operands); + DONE; + } +) + +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:SI 0 "register_operand" "=&r") ;; val out + (zero_extend:SI + (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory + (set (match_dup 1) + (unspec_volatile:SHORT + [(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected + (match_operand:SHORT 3 "register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW)) + (clobber (match_scratch:SI 7 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_compare_and_swap (operands); + DONE; + } +) + +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:GPI 0 "register_operand" "=&r") ;; val out + (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:GPI + [(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect + (match_operand:GPI 3 "register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW)) + (clobber (match_scratch:SI 7 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_compare_and_swap (operands); + DONE; + } +) + +(define_insn_and_split "atomic_exchange" + [(set (match_operand:ALLI 0 "register_operand" "=&r") ;; output + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:ALLI + [(match_operand:ALLI 2 "register_operand" "r") ;; input + (match_operand:SI 3 "const_int_operand" "")] ;; model + UNSPECV_ATOMIC_EXCHG)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (SET, operands[0], NULL, operands[1], + operands[2], operands[3], operands[4]); + DONE; + } +) + +(define_insn_and_split "atomic_" + [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") + (unspec_volatile:ALLI + [(atomic_op:ALLI (match_dup 0) + (match_operand:ALLI 1 "" "rn")) + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:ALLI 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + } +) + +(define_insn_and_split "atomic_nand" + [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") + (unspec_volatile:ALLI + [(not:ALLI + (and:ALLI (match_dup 0) + (match_operand:ALLI 1 "aarch64_logical_operand" "rn"))) + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:ALLI 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (NOT, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + } +) + +(define_insn_and_split "atomic_fetch_" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) + (set (match_dup 1) + (unspec_volatile:ALLI + [(atomic_op:ALLI (match_dup 1) + (match_operand:ALLI 2 "" "rn")) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:ALLI 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (, operands[0], operands[4], operands[1], + operands[2], operands[3], operands[5]); + DONE; + } +) + +(define_insn_and_split "atomic_fetch_nand" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) + (set (match_dup 1) + (unspec_volatile:ALLI + [(not:ALLI + (and:ALLI (match_dup 1) + (match_operand:ALLI 2 "aarch64_logical_operand" "rn"))) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:ALLI 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (NOT, operands[0], operands[4], operands[1], + operands[2], operands[3], operands[5]); + DONE; + } +) + +(define_insn_and_split "atomic__fetch" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (atomic_op:ALLI + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") + (match_operand:ALLI 2 "" "rn"))) + (set (match_dup 1) + (unspec_volatile:ALLI + [(match_dup 1) (match_dup 2) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (, NULL, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + } +) + +(define_insn_and_split "atomic_nand_fetch" + [(set (match_operand:ALLI 0 "register_operand" "=&r") + (not:ALLI + (and:ALLI + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q") + (match_operand:ALLI 2 "aarch64_logical_operand" "rn")))) + (set (match_dup 1) + (unspec_volatile:ALLI + [(match_dup 1) (match_dup 2) + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_atomic_op (NOT, NULL, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + } +) + +(define_insn "atomic_load" + [(set (match_operand:ALLI 0 "register_operand" "=r") + (unspec_volatile:ALLI + [(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_LDA))] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE) + return "ldr\t%0, %1"; + else + return "ldar\t%0, %1"; + } +) + +(define_insn "atomic_store" + [(set (match_operand:ALLI 0 "memory_operand" "=Q") + (unspec_volatile:ALLI + [(match_operand:ALLI 1 "general_operand" "rZ") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_STL))] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_ACQUIRE) + return "str\t%1, %0"; + else + return "stlr\t%1, %0"; + } +) + +(define_insn "aarch64_load_exclusive" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (unspec_volatile:SHORT + [(match_operand:SHORT 1 "aarch64_sync_memory_operand" "Q") + (match_operand:SI 2 "const_int_operand")] + UNSPECV_LX)))] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE) + return "ldxr\t%w0, %1"; + else + return "ldaxr\t%w0, %1"; + } +) + +(define_insn "aarch64_load_exclusive" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec_volatile:GPI + [(match_operand:GPI 1 "aarch64_sync_memory_operand" "Q") + (match_operand:SI 2 "const_int_operand")] + UNSPECV_LX))] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE) + return "ldxr\t%0, %1"; + else + return "ldaxr\t%0, %1"; + } +) + +(define_insn "aarch64_store_exclusive" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) + (set (match_operand:ALLI 1 "aarch64_sync_memory_operand" "=Q") + (unspec_volatile:ALLI + [(match_operand:ALLI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand")] + UNSPECV_SX))] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[3]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_ACQUIRE) + return "stxr\t%w0, %2, %1"; + else + return "stlxr\t%w0, %2, %1"; + } +) + +(define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand" "")] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[0]); + if (model != MEMMODEL_RELAXED && model != MEMMODEL_CONSUME) + emit_insn (gen_dmb (operands[0])); + DONE; + } +) + +(define_expand "dmb" + [(set (match_dup 1) + (unspec:BLK [(match_dup 1) (match_operand:SI 0 "const_int_operand")] + UNSPEC_MB))] + "" + { + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; + } +) + +(define_insn "*dmb" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0) (match_operand:SI 1 "const_int_operand")] + UNSPEC_MB))] + "" + { + enum memmodel model = (enum memmodel) INTVAL (operands[1]); + if (model == MEMMODEL_ACQUIRE) + return "dmb\\tishld"; + else + return "dmb\\tish"; + } +) diff --git a/gcc-4.9/gcc/config/aarch64/biarchilp32.h b/gcc-4.9/gcc/config/aarch64/biarchilp32.h new file mode 100644 index 000000000..579673ced --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/biarchilp32.h @@ -0,0 +1,29 @@ +/* Make configure files to produce biarch compiler defaulting to ilp32 ABI. + This file must be included very first, while the OS specific file later + to overwrite otherwise wrong defaults. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define AARCH64_ABI_DEFAULT AARCH64_ABI_ILP32 +#define TARGET_DATA_MODEL 2 diff --git a/gcc-4.9/gcc/config/aarch64/biarchlp64.h b/gcc-4.9/gcc/config/aarch64/biarchlp64.h new file mode 100644 index 000000000..03dd35508 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/biarchlp64.h @@ -0,0 +1,29 @@ +/* Make configure files to produce biarch compiler defaulting to ilp64 ABI. + This file must be included very first, while the OS specific file later + to overwrite otherwise wrong defaults. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define AARCH64_ABI_DEFAULT AARCH64_ABI_LP64 +#define TARGET_DATA_MODEL 1 diff --git a/gcc-4.9/gcc/config/aarch64/constraints.md b/gcc-4.9/gcc/config/aarch64/constraints.md new file mode 100644 index 000000000..12ab570c0 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/constraints.md @@ -0,0 +1,188 @@ +;; Machine description for AArch64 architecture. +;; Copyright (C) 2009-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + +(define_register_constraint "w" "FP_REGS" + "Floating point and SIMD vector registers.") + +(define_register_constraint "x" "FP_LO_REGS" + "Floating point and SIMD vector registers V0 - V15.") + +(define_constraint "I" + "A constant that can be used with an ADD operation." + (and (match_code "const_int") + (match_test "aarch64_uimm12_shift (ival)"))) + +(define_constraint "J" + "A constant that can be used with a SUB operation (once negated)." + (and (match_code "const_int") + (match_test "aarch64_uimm12_shift (-ival)"))) + +;; We can't use the mode of a CONST_INT to determine the context in +;; which it is being used, so we must have a separate constraint for +;; each context. + +(define_constraint "K" + "A constant that can be used with a 32-bit logical operation." + (and (match_code "const_int") + (match_test "aarch64_bitmask_imm (ival, SImode)"))) + +(define_constraint "L" + "A constant that can be used with a 64-bit logical operation." + (and (match_code "const_int") + (match_test "aarch64_bitmask_imm (ival, DImode)"))) + +(define_constraint "M" + "A constant that can be used with a 32-bit MOV immediate operation." + (and (match_code "const_int") + (match_test "aarch64_move_imm (ival, SImode)"))) + +(define_constraint "N" + "A constant that can be used with a 64-bit MOV immediate operation." + (and (match_code "const_int") + (match_test "aarch64_move_imm (ival, DImode)"))) + +(define_constraint "S" + "A constraint that matches an absolute symbolic address." + (and (match_code "const,symbol_ref,label_ref") + (match_test "aarch64_symbolic_address_p (op)"))) + +(define_constraint "Y" + "Floating point constant zero." + (and (match_code "const_double") + (match_test "aarch64_float_const_zero_rtx_p (op)"))) + +(define_constraint "Z" + "Integer constant zero." + (match_test "op == const0_rtx")) + +(define_constraint "Ush" + "A constraint that matches an absolute symbolic address high part." + (and (match_code "high") + (match_test "aarch64_valid_symref (XEXP (op, 0), GET_MODE (XEXP (op, 0)))"))) + +(define_constraint "Uss" + "@internal + A constraint that matches an immediate shift constant in SImode." + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) ival < 32"))) + +(define_constraint "Usd" + "@internal + A constraint that matches an immediate shift constant in DImode." + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) ival < 64"))) + +(define_constraint "UsM" + "@internal + A constraint that matches the immediate constant -1." + (match_test "op == constm1_rtx")) + +(define_constraint "Ui1" + "@internal + A constraint that matches the immediate constant +1." + (match_test "op == const1_rtx")) + +(define_constraint "Ui3" + "@internal + A constraint that matches the integers 0...4." + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) ival <= 4"))) + +(define_constraint "Up3" + "@internal + A constraint that matches the integers 2^(0...4)." + (and (match_code "const_int") + (match_test "(unsigned) exact_log2 (ival) <= 4"))) + +(define_memory_constraint "Q" + "A memory address which uses a single base register with no offset." + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0))"))) + +(define_memory_constraint "Ump" + "@internal + A memory address suitable for a load/store pair operation." + (and (match_code "mem") + (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0), + PARALLEL, 1)"))) + +(define_memory_constraint "Utv" + "@internal + An address valid for loading/storing opaque structure + types wider than TImode." + (and (match_code "mem") + (match_test "aarch64_simd_mem_operand_p (op)"))) + +(define_constraint "Ufc" + "A floating point constant which can be used with an\ + FMOV immediate operation." + (and (match_code "const_double") + (match_test "aarch64_float_const_representable_p (op)"))) + +(define_constraint "Dn" + "@internal + A constraint that matches vector of immediates." + (and (match_code "const_vector") + (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op), + false, NULL)"))) + +(define_constraint "Dh" + "@internal + A constraint that matches an immediate operand valid for\ + AdvSIMD scalar move in HImode." + (and (match_code "const_int") + (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, + HImode)"))) + +(define_constraint "Dq" + "@internal + A constraint that matches an immediate operand valid for\ + AdvSIMD scalar move in QImode." + (and (match_code "const_int") + (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, + QImode)"))) + +(define_constraint "Dl" + "@internal + A constraint that matches vector of immediates for left shifts." + (and (match_code "const_vector") + (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op), + true)"))) + +(define_constraint "Dr" + "@internal + A constraint that matches vector of immediates for right shifts." + (and (match_code "const_vector") + (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op), + false)"))) +(define_constraint "Dz" + "@internal + A constraint that matches vector of immediate zero." + (and (match_code "const_vector") + (match_test "aarch64_simd_imm_zero_p (op, GET_MODE (op))"))) + +(define_constraint "Dd" + "@internal + A constraint that matches an immediate operand valid for AdvSIMD scalar." + (and (match_code "const_int") + (match_test "aarch64_simd_imm_scalar_p (op, GET_MODE (op))"))) diff --git a/gcc-4.9/gcc/config/aarch64/gentune.sh b/gcc-4.9/gcc/config/aarch64/gentune.sh new file mode 100644 index 000000000..c0f2e794f --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/gentune.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Generate aarch64-tune.md, a file containing the tune attribute from the list of +# CPUs in aarch64-cores.def + +echo ";; -*- buffer-read-only: t -*-" +echo ";; Generated automatically by gentune.sh from aarch64-cores.def" + +allcores=`awk -F'[(, ]+' '/^AARCH64_CORE/ { cores = cores$3"," } END { print cores } ' $1` + +echo "(define_attr \"tune\"" +echo " \"$allcores\"" | sed -e 's/,"$/"/' +echo " (const (symbol_ref \"((enum attr_tune) aarch64_tune)\")))" diff --git a/gcc-4.9/gcc/config/aarch64/iterators.md b/gcc-4.9/gcc/config/aarch64/iterators.md new file mode 100644 index 000000000..f1339b8cc --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/iterators.md @@ -0,0 +1,997 @@ +;; Machine description for AArch64 architecture. +;; Copyright (C) 2009-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; ------------------------------------------------------------------- +;; Mode Iterators +;; ------------------------------------------------------------------- + + +;; Iterator for General Purpose Integer registers (32- and 64-bit modes) +(define_mode_iterator GPI [SI DI]) + +;; Iterator for QI and HI modes +(define_mode_iterator SHORT [QI HI]) + +;; Iterator for all integer modes (up to 64-bit) +(define_mode_iterator ALLI [QI HI SI DI]) + +;; Iterator scalar modes (up to 64-bit) +(define_mode_iterator SDQ_I [QI HI SI DI]) + +;; Iterator for all integer modes that can be extended (up to 64-bit) +(define_mode_iterator ALLX [QI HI SI]) + +;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes) +(define_mode_iterator GPF [SF DF]) + +;; Integer vector modes. +(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) + +;; Integer vector modes. +(define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) + +;; vector and scalar, 64 & 128-bit container, all integer modes +(define_mode_iterator VSDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI QI HI SI DI]) + +;; vector and scalar, 64 & 128-bit container: all vector integer modes; +;; 64-bit scalar integer mode +(define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI]) + +;; Double vector modes. +(define_mode_iterator VD [V8QI V4HI V2SI V2SF]) + +;; vector, 64-bit container, all integer modes +(define_mode_iterator VD_BHSI [V8QI V4HI V2SI]) + +;; 128 and 64-bit container; 8, 16, 32-bit vector integer modes +(define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI]) + +;; Quad vector modes. +(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; All vector modes, except double. +(define_mode_iterator VQ_S [V8QI V16QI V4HI V8HI V2SI V4SI]) + +;; Vector and scalar, 64 & 128-bit container: all vector integer mode; +;; 8, 16, 32-bit scalar integer modes +(define_mode_iterator VSDQ_I_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI QI HI SI]) + +;; Vector modes for moves. +(define_mode_iterator VDQM [V8QI V16QI V4HI V8HI V2SI V4SI]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; addresses in different modes. In LP64, only DI will match, while in +;; ILP32, either can match. +(define_mode_iterator P [(SI "ptr_mode == SImode || Pmode == SImode") + (DI "ptr_mode == DImode || Pmode == DImode")]) + +;; This mode iterator allows :PTR to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) + +;; Vector Float modes. +(define_mode_iterator VDQF [V2SF V4SF V2DF]) + +;; Vector single Float modes. +(define_mode_iterator VDQSF [V2SF V4SF]) + +;; Modes suitable to use as the return type of a vcond expression. +(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI]) + +;; All Float modes. +(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF]) + +;; Vector Float modes with 2 elements. +(define_mode_iterator V2F [V2SF V2DF]) + +;; All modes. +(define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF]) + +;; All vector modes and DI. +(define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI]) + +;; All vector modes and DI and DF. +(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI + V2DI V2SF V4SF V2DF DI DF]) + +;; Vector modes for Integer reduction across lanes. +(define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI]) + +;; Vector modes(except V2DI) for Integer reduction across lanes. +(define_mode_iterator VDQV_S [V8QI V16QI V4HI V8HI V4SI]) + +;; All double integer narrow-able modes. +(define_mode_iterator VDN [V4HI V2SI DI]) + +;; All quad integer narrow-able modes. +(define_mode_iterator VQN [V8HI V4SI V2DI]) + +;; All double integer widen-able modes. +(define_mode_iterator VDW [V8QI V4HI V2SI]) + +;; Vector and scalar 128-bit container: narrowable 16, 32, 64-bit integer modes +(define_mode_iterator VSQN_HSDI [V8HI V4SI V2DI HI SI DI]) + +;; All quad integer widen-able modes. +(define_mode_iterator VQW [V16QI V8HI V4SI]) + +;; Double vector modes for combines. +(define_mode_iterator VDC [V8QI V4HI V2SI V2SF DI DF]) + +;; Double vector modes for combines. +(define_mode_iterator VDIC [V8QI V4HI V2SI]) + +;; Double vector modes. +(define_mode_iterator VD_RE [V8QI V4HI V2SI DI DF V2SF]) + +;; Vector modes except double int. +(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) + +;; Vector modes for Q and H types. +(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) + +;; Vector modes for H and S types. +(define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) + +;; Vector modes for Q, H and S types. +(define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + +;; Vector and scalar integer modes for H and S +(define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI]) + +;; Vector and scalar 64-bit container: 16, 32-bit integer modes +(define_mode_iterator VSD_HSI [V4HI V2SI HI SI]) + +;; Vector 64-bit container: 16, 32-bit integer modes +(define_mode_iterator VD_HSI [V4HI V2SI]) + +;; Scalar 64-bit container: 16, 32-bit integer modes +(define_mode_iterator SD_HSI [HI SI]) + +;; Vector 64-bit container: 16, 32-bit integer modes +(define_mode_iterator VQ_HSI [V8HI V4SI]) + +;; All byte modes. +(define_mode_iterator VB [V8QI V16QI]) + +(define_mode_iterator TX [TI TF]) + +;; Opaque structure modes. +(define_mode_iterator VSTRUCT [OI CI XI]) + +;; Double scalar modes +(define_mode_iterator DX [DI DF]) + +;; Modes available for mul lane operations. +(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) + +;; Modes available for mul lane operations changing lane count. +(define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF]) + +;; ------------------------------------------------------------------ +;; Unspec enumerations for Advance SIMD. These could well go into +;; aarch64.md but for their use in int_iterators here. +;; ------------------------------------------------------------------ + +(define_c_enum "unspec" + [ + UNSPEC_ASHIFT_SIGNED ; Used in aarch-simd.md. + UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md. + UNSPEC_FMAX ; Used in aarch64-simd.md. + UNSPEC_FMAXNMV ; Used in aarch64-simd.md. + UNSPEC_FMAXV ; Used in aarch64-simd.md. + UNSPEC_FMIN ; Used in aarch64-simd.md. + UNSPEC_FMINNMV ; Used in aarch64-simd.md. + UNSPEC_FMINV ; Used in aarch64-simd.md. + UNSPEC_FADDV ; Used in aarch64-simd.md. + UNSPEC_SADDV ; Used in aarch64-simd.md. + UNSPEC_UADDV ; Used in aarch64-simd.md. + UNSPEC_SMAXV ; Used in aarch64-simd.md. + UNSPEC_SMINV ; Used in aarch64-simd.md. + UNSPEC_UMAXV ; Used in aarch64-simd.md. + UNSPEC_UMINV ; Used in aarch64-simd.md. + UNSPEC_SHADD ; Used in aarch64-simd.md. + UNSPEC_UHADD ; Used in aarch64-simd.md. + UNSPEC_SRHADD ; Used in aarch64-simd.md. + UNSPEC_URHADD ; Used in aarch64-simd.md. + UNSPEC_SHSUB ; Used in aarch64-simd.md. + UNSPEC_UHSUB ; Used in aarch64-simd.md. + UNSPEC_SRHSUB ; Used in aarch64-simd.md. + UNSPEC_URHSUB ; Used in aarch64-simd.md. + UNSPEC_ADDHN ; Used in aarch64-simd.md. + UNSPEC_RADDHN ; Used in aarch64-simd.md. + UNSPEC_SUBHN ; Used in aarch64-simd.md. + UNSPEC_RSUBHN ; Used in aarch64-simd.md. + UNSPEC_ADDHN2 ; Used in aarch64-simd.md. + UNSPEC_RADDHN2 ; Used in aarch64-simd.md. + UNSPEC_SUBHN2 ; Used in aarch64-simd.md. + UNSPEC_RSUBHN2 ; Used in aarch64-simd.md. + UNSPEC_SQDMULH ; Used in aarch64-simd.md. + UNSPEC_SQRDMULH ; Used in aarch64-simd.md. + UNSPEC_PMUL ; Used in aarch64-simd.md. + UNSPEC_USQADD ; Used in aarch64-simd.md. + UNSPEC_SUQADD ; Used in aarch64-simd.md. + UNSPEC_SQXTUN ; Used in aarch64-simd.md. + UNSPEC_SQXTN ; Used in aarch64-simd.md. + UNSPEC_UQXTN ; Used in aarch64-simd.md. + UNSPEC_SSRA ; Used in aarch64-simd.md. + UNSPEC_USRA ; Used in aarch64-simd.md. + UNSPEC_SRSRA ; Used in aarch64-simd.md. + UNSPEC_URSRA ; Used in aarch64-simd.md. + UNSPEC_SRSHR ; Used in aarch64-simd.md. + UNSPEC_URSHR ; Used in aarch64-simd.md. + UNSPEC_SQSHLU ; Used in aarch64-simd.md. + UNSPEC_SQSHL ; Used in aarch64-simd.md. + UNSPEC_UQSHL ; Used in aarch64-simd.md. + UNSPEC_SQSHRUN ; Used in aarch64-simd.md. + UNSPEC_SQRSHRUN ; Used in aarch64-simd.md. + UNSPEC_SQSHRN ; Used in aarch64-simd.md. + UNSPEC_UQSHRN ; Used in aarch64-simd.md. + UNSPEC_SQRSHRN ; Used in aarch64-simd.md. + UNSPEC_UQRSHRN ; Used in aarch64-simd.md. + UNSPEC_SSHL ; Used in aarch64-simd.md. + UNSPEC_USHL ; Used in aarch64-simd.md. + UNSPEC_SRSHL ; Used in aarch64-simd.md. + UNSPEC_URSHL ; Used in aarch64-simd.md. + UNSPEC_SQRSHL ; Used in aarch64-simd.md. + UNSPEC_UQRSHL ; Used in aarch64-simd.md. + UNSPEC_SSLI ; Used in aarch64-simd.md. + UNSPEC_USLI ; Used in aarch64-simd.md. + UNSPEC_SSRI ; Used in aarch64-simd.md. + UNSPEC_USRI ; Used in aarch64-simd.md. + UNSPEC_SSHLL ; Used in aarch64-simd.md. + UNSPEC_USHLL ; Used in aarch64-simd.md. + UNSPEC_ADDP ; Used in aarch64-simd.md. + UNSPEC_TBL ; Used in vector permute patterns. + UNSPEC_CONCAT ; Used in vector permute patterns. + UNSPEC_ZIP1 ; Used in vector permute patterns. + UNSPEC_ZIP2 ; Used in vector permute patterns. + UNSPEC_UZP1 ; Used in vector permute patterns. + UNSPEC_UZP2 ; Used in vector permute patterns. + UNSPEC_TRN1 ; Used in vector permute patterns. + UNSPEC_TRN2 ; Used in vector permute patterns. + UNSPEC_AESE ; Used in aarch64-simd.md. + UNSPEC_AESD ; Used in aarch64-simd.md. + UNSPEC_AESMC ; Used in aarch64-simd.md. + UNSPEC_AESIMC ; Used in aarch64-simd.md. + UNSPEC_SHA1C ; Used in aarch64-simd.md. + UNSPEC_SHA1M ; Used in aarch64-simd.md. + UNSPEC_SHA1P ; Used in aarch64-simd.md. + UNSPEC_SHA1H ; Used in aarch64-simd.md. + UNSPEC_SHA1SU0 ; Used in aarch64-simd.md. + UNSPEC_SHA1SU1 ; Used in aarch64-simd.md. + UNSPEC_SHA256H ; Used in aarch64-simd.md. + UNSPEC_SHA256H2 ; Used in aarch64-simd.md. + UNSPEC_SHA256SU0 ; Used in aarch64-simd.md. + UNSPEC_SHA256SU1 ; Used in aarch64-simd.md. + UNSPEC_PMULL ; Used in aarch64-simd.md. + UNSPEC_PMULL2 ; Used in aarch64-simd.md. +]) + +;; ------------------------------------------------------------------- +;; Mode attributes +;; ------------------------------------------------------------------- + +;; In GPI templates, a string like "%0" will expand to "%w0" in the +;; 32-bit version and "%x0" in the 64-bit version. +(define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) + +;; For constraints used in scalar immediate vector moves +(define_mode_attr hq [(HI "h") (QI "q")]) + +;; For scalar usage of vector/FP registers +(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d") + (SF "s") (DF "d") + (V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (V2DI "") (V2SF "") + (V4SF "") (V2DF "")]) + +;; For scalar usage of vector/FP registers, narrowing +(define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s") + (V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (V2DI "") (V2SF "") + (V4SF "") (V2DF "")]) + +;; For scalar usage of vector/FP registers, widening +(define_mode_attr vw2 [(DI "") (QI "h") (HI "s") (SI "d") + (V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (V2DI "") (V2SF "") + (V4SF "") (V2DF "")]) + +;; Register Type Name and Vector Arrangement Specifier for when +;; we are doing scalar for DI and SIMD for SI (ignoring all but +;; lane 0). +(define_mode_attr rtn [(DI "d") (SI "")]) +(define_mode_attr vas [(DI "") (SI ".2s")]) + +;; Map a floating point mode to the appropriate register name prefix +(define_mode_attr s [(SF "s") (DF "d")]) + +;; Give the length suffix letter for a sign- or zero-extension. +(define_mode_attr size [(QI "b") (HI "h") (SI "w")]) + +;; Give the number of bits in the mode +(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")]) + +;; Give the ordinal of the MSB in the mode +(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")]) + +;; Attribute to describe constants acceptable in logical operations +(define_mode_attr lconst [(SI "K") (DI "L")]) + +;; Map a mode to a specific constraint character. +(define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")]) + +(define_mode_attr Vtype [(V8QI "8b") (V16QI "16b") + (V4HI "4h") (V8HI "8h") + (V2SI "2s") (V4SI "4s") + (DI "1d") (DF "1d") + (V2DI "2d") (V2SF "2s") + (V4SF "4s") (V2DF "2d")]) + +(define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") + (V4HI ".4h") (V8HI ".8h") + (V2SI ".2s") (V4SI ".4s") + (V2DI ".2d") (V2SF ".2s") + (V4SF ".4s") (V2DF ".2d") + (DI "") (SI "") + (HI "") (QI "") + (TI "") (SF "") + (DF "")]) + +;; Register suffix narrowed modes for VQN. +(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h") + (V2DI ".2s") + (DI "") (SI "") + (HI "")]) + +;; Mode-to-individual element type mapping. +(define_mode_attr Vetype [(V8QI "b") (V16QI "b") + (V4HI "h") (V8HI "h") + (V2SI "s") (V4SI "s") + (V2DI "d") (V2SF "s") + (V4SF "s") (V2DF "d") + (SF "s") (DF "d") + (QI "b") (HI "h") + (SI "s") (DI "d")]) + +;; Mode-to-bitwise operation type mapping. +(define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b") + (V4HI "8b") (V8HI "16b") + (V2SI "8b") (V4SI "16b") + (V2DI "16b") (V2SF "8b") + (V4SF "16b") (V2DF "16b") + (DI "8b") (DF "8b")]) + +;; Define element mode for each vector mode. +(define_mode_attr VEL [(V8QI "QI") (V16QI "QI") + (V4HI "HI") (V8HI "HI") + (V2SI "SI") (V4SI "SI") + (DI "DI") (V2DI "DI") + (V2SF "SF") (V4SF "SF") + (V2DF "DF") (DF "DF") + (SI "SI") (HI "HI") + (QI "QI")]) + +;; Define container mode for lane selection. +(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI") + (V2SI "V2SI") (V4SI "V2SI") + (DI "DI") (V2DI "DI") + (V2SF "V2SF") (V4SF "V2SF") + (V2DF "DF")]) + +;; Define container mode for lane selection. +(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") + (V4HI "V8HI") (V8HI "V8HI") + (V2SI "V4SI") (V4SI "V4SI") + (DI "V2DI") (V2DI "V2DI") + (V2SF "V2SF") (V4SF "V4SF") + (V2DF "V2DF") (SI "V4SI") + (HI "V8HI") (QI "V16QI")]) + +;; Define container mode for lane selection. +(define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI") + (V4HI "V8HI") (V8HI "V8HI") + (V2SI "V4SI") (V4SI "V4SI") + (DI "V2DI") (V2DI "V2DI") + (V2SF "V4SF") (V4SF "V4SF") + (V2DF "V2DF") (SI "V4SI") + (HI "V8HI") (QI "V16QI")]) + +;; Half modes of all vector modes. +(define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI") + (V4HI "V2HI") (V8HI "V4HI") + (V2SI "SI") (V4SI "V2SI") + (V2DI "DI") (V2SF "SF") + (V4SF "V2SF") (V2DF "DF")]) + +;; Double modes of vector modes. +(define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI") + (V2SI "V4SI") (V2SF "V4SF") + (SI "V2SI") (DI "V2DI") + (DF "V2DF")]) + +;; Double modes of vector modes (lower case). +(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi") + (V2SI "v4si") (V2SF "v4sf") + (SI "v2si") (DI "v2di") + (DF "v2df")]) + +;; Narrowed modes for VDN. +(define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI") + (DI "V2SI")]) + +;; Narrowed double-modes for VQN (Used for XTN). +(define_mode_attr VNARROWQ [(V8HI "V8QI") (V4SI "V4HI") + (V2DI "V2SI") + (DI "SI") (SI "HI") + (HI "QI")]) + +;; Narrowed quad-modes for VQN (Used for XTN2). +(define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI") + (V2DI "V4SI")]) + +;; Register suffix narrowed modes for VQN. +(define_mode_attr Vntype [(V8HI "8b") (V4SI "4h") + (V2DI "2s")]) + +;; Register suffix narrowed modes for VQN. +(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h") + (V2DI "4s")]) + +;; Widened modes of vector modes. +(define_mode_attr VWIDE [(V8QI "V8HI") (V4HI "V4SI") + (V2SI "V2DI") (V16QI "V8HI") + (V8HI "V4SI") (V4SI "V2DI") + (HI "SI") (SI "DI")] + +) + +;; Widened mode register suffixes for VDW/VQW. +(define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s") + (V2SI "2d") (V16QI "8h") + (V8HI "4s") (V4SI "2d")]) + +;; Widened mode register suffixes for VDW/VQW. +(define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s") + (V2SI ".2d") (V16QI ".8h") + (V8HI ".4s") (V4SI ".2d") + (SI "") (HI "")]) + +;; Lower part register suffixes for VQW. +(define_mode_attr Vhalftype [(V16QI "8b") (V8HI "4h") + (V4SI "2s")]) + +;; Define corresponding core/FP element mode for each vector mode. +(define_mode_attr vw [(V8QI "w") (V16QI "w") + (V4HI "w") (V8HI "w") + (V2SI "w") (V4SI "w") + (DI "x") (V2DI "x") + (V2SF "s") (V4SF "s") + (V2DF "d")]) + +;; Corresponding core element mode for each vector mode. This is a +;; variation on mapping FP modes to GP regs. +(define_mode_attr vwcore [(V8QI "w") (V16QI "w") + (V4HI "w") (V8HI "w") + (V2SI "w") (V4SI "w") + (DI "x") (V2DI "x") + (V2SF "w") (V4SF "w") + (V2DF "x")]) + +;; Double vector types for ALLX. +(define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")]) + +;; Mode of result of comparison operations. +(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") + (V4HI "V4HI") (V8HI "V8HI") + (V2SI "V2SI") (V4SI "V4SI") + (DI "DI") (V2DI "V2DI") + (V2SF "V2SI") (V4SF "V4SI") + (V2DF "V2DI") (DF "DI") + (SF "SI")]) + +;; Lower case mode of results of comparison operations. +(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") + (V4HI "v4hi") (V8HI "v8hi") + (V2SI "v2si") (V4SI "v4si") + (DI "di") (V2DI "v2di") + (V2SF "v2si") (V4SF "v4si") + (V2DF "v2di") (DF "di") + (SF "si")]) + +;; Vm for lane instructions is restricted to FP_LO_REGS. +(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x") + (V2SI "w") (V4SI "w") (SI "w")]) + +(define_mode_attr Vendreg [(OI "T") (CI "U") (XI "V")]) + +(define_mode_attr nregs [(OI "2") (CI "3") (XI "4")]) + +(define_mode_attr VRL2 [(V8QI "V32QI") (V4HI "V16HI") + (V2SI "V8SI") (V2SF "V8SF") + (DI "V4DI") (DF "V4DF") + (V16QI "V32QI") (V8HI "V16HI") + (V4SI "V8SI") (V4SF "V8SF") + (V2DI "V4DI") (V2DF "V4DF")]) + +(define_mode_attr VRL3 [(V8QI "V48QI") (V4HI "V24HI") + (V2SI "V12SI") (V2SF "V12SF") + (DI "V6DI") (DF "V6DF") + (V16QI "V48QI") (V8HI "V24HI") + (V4SI "V12SI") (V4SF "V12SF") + (V2DI "V6DI") (V2DF "V6DF")]) + +(define_mode_attr VRL4 [(V8QI "V64QI") (V4HI "V32HI") + (V2SI "V16SI") (V2SF "V16SF") + (DI "V8DI") (DF "V8DF") + (V16QI "V64QI") (V8HI "V32HI") + (V4SI "V16SI") (V4SF "V16SF") + (V2DI "V8DI") (V2DF "V8DF")]) + +(define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) + +;; Mode for atomic operation suffixes +(define_mode_attr atomic_sfx + [(QI "b") (HI "h") (SI "") (DI "")]) + +(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")]) +(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")]) + +(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") + (V4HI "V8HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V2SI") + (DI "V2DI") (V2DI "DI") + (V2SF "V4SF") (V4SF "V2SF") + (DF "V2DF") (V2DF "DF")]) + +(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64") + (V4HI "to_128") (V8HI "to_64") + (V2SI "to_128") (V4SI "to_64") + (DI "to_128") (V2DI "to_64") + (V2SF "to_128") (V4SF "to_64") + (DF "to_128") (V2DF "to_64")]) + +;; For certain vector-by-element multiplication instructions we must +;; constrain the HI cases to use only V0-V15. This is covered by +;; the 'x' constraint. All other modes may use the 'w' constraint. +(define_mode_attr h_con [(V2SI "w") (V4SI "w") + (V4HI "x") (V8HI "x") + (V2SF "w") (V4SF "w") + (V2DF "w") (DF "w")]) + +;; Defined to 'f' for types whose element type is a float type. +(define_mode_attr f [(V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (DI "") (V2DI "") + (V2SF "f") (V4SF "f") + (V2DF "f") (DF "f")]) + +;; Defined to '_fp' for types whose element type is a float type. +(define_mode_attr fp [(V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (DI "") (V2DI "") + (V2SF "_fp") (V4SF "_fp") + (V2DF "_fp") (DF "_fp") + (SF "_fp")]) + +;; Defined to '_q' for 128-bit types. +(define_mode_attr q [(V8QI "") (V16QI "_q") + (V4HI "") (V8HI "_q") + (V2SI "") (V4SI "_q") + (DI "") (V2DI "_q") + (V2SF "") (V4SF "_q") + (V2DF "_q") + (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")]) + +(define_mode_attr vp [(V8QI "v") (V16QI "v") + (V4HI "v") (V8HI "v") + (V2SI "p") (V4SI "v") + (V2DI "p") (V2DF "p") + (V2SF "p") (V4SF "v")]) + +;; ------------------------------------------------------------------- +;; Code Iterators +;; ------------------------------------------------------------------- + +;; This code iterator allows the various shifts supported on the core +(define_code_iterator SHIFT [ashift ashiftrt lshiftrt rotatert]) + +;; This code iterator allows the shifts supported in arithmetic instructions +(define_code_iterator ASHIFT [ashift ashiftrt lshiftrt]) + +;; Code iterator for logical operations +(define_code_iterator LOGICAL [and ior xor]) + +;; Code iterator for sign/zero extension +(define_code_iterator ANY_EXTEND [sign_extend zero_extend]) + +;; All division operations (signed/unsigned) +(define_code_iterator ANY_DIV [div udiv]) + +;; Code iterator for sign/zero extraction +(define_code_iterator ANY_EXTRACT [sign_extract zero_extract]) + +;; Code iterator for equality comparisons +(define_code_iterator EQL [eq ne]) + +;; Code iterator for less-than and greater/equal-to +(define_code_iterator LTGE [lt ge]) + +;; Iterator for __sync_ operations that where the operation can be +;; represented directly RTL. This is all of the sync operations bar +;; nand. +(define_code_iterator atomic_op [plus minus ior xor and]) + +;; Iterator for integer conversions +(define_code_iterator FIXUORS [fix unsigned_fix]) + +;; Iterator for float conversions +(define_code_iterator FLOATUORS [float unsigned_float]) + +;; Code iterator for variants of vector max and min. +(define_code_iterator MAXMIN [smax smin umax umin]) + +(define_code_iterator FMAXMIN [smax smin]) + +;; Code iterator for variants of vector max and min. +(define_code_iterator ADDSUB [plus minus]) + +;; Code iterator for variants of vector saturating binary ops. +(define_code_iterator BINQOPS [ss_plus us_plus ss_minus us_minus]) + +;; Code iterator for variants of vector saturating unary ops. +(define_code_iterator UNQOPS [ss_neg ss_abs]) + +;; Code iterator for signed variants of vector saturating binary ops. +(define_code_iterator SBINQOPS [ss_plus ss_minus]) + +;; Comparison operators for CM. +(define_code_iterator COMPARISONS [lt le eq ge gt]) + +;; Unsigned comparison operators. +(define_code_iterator UCOMPARISONS [ltu leu geu gtu]) + +;; Unsigned comparison operators. +(define_code_iterator FAC_COMPARISONS [lt le ge gt]) + +;; ------------------------------------------------------------------- +;; Code Attributes +;; ------------------------------------------------------------------- +;; Map rtl objects to optab names +(define_code_attr optab [(ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") + (rotatert "rotr") + (sign_extend "extend") + (zero_extend "zero_extend") + (sign_extract "extv") + (zero_extract "extzv") + (fix "fix") + (unsigned_fix "fixuns") + (float "float") + (unsigned_float "floatuns") + (and "and") + (ior "ior") + (xor "xor") + (not "one_cmpl") + (neg "neg") + (plus "add") + (minus "sub") + (ss_plus "qadd") + (us_plus "qadd") + (ss_minus "qsub") + (us_minus "qsub") + (ss_neg "qneg") + (ss_abs "qabs") + (eq "eq") + (ne "ne") + (lt "lt") + (ge "ge") + (le "le") + (gt "gt") + (ltu "ltu") + (leu "leu") + (geu "geu") + (gtu "gtu")]) + +;; For comparison operators we use the FCM* and CM* instructions. +;; As there are no CMLE or CMLT instructions which act on 3 vector +;; operands, we must use CMGE or CMGT and swap the order of the +;; source operands. + +(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt") + (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")]) +(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1") + (ltu "2") (leu "2") (geu "1") (gtu "1")]) +(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2") + (ltu "1") (leu "1") (geu "2") (gtu "2")]) + +(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT") + (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")]) + +(define_code_attr fix_trunc_optab [(fix "fix_trunc") + (unsigned_fix "fixuns_trunc")]) + +;; Optab prefix for sign/zero-extending operations +(define_code_attr su_optab [(sign_extend "") (zero_extend "u") + (div "") (udiv "u") + (fix "") (unsigned_fix "u") + (float "s") (unsigned_float "u") + (ss_plus "s") (us_plus "u") + (ss_minus "s") (us_minus "u")]) + +;; Similar for the instruction mnemonics +(define_code_attr shift [(ashift "lsl") (ashiftrt "asr") + (lshiftrt "lsr") (rotatert "ror")]) + +;; Map shift operators onto underlying bit-field instructions +(define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx") + (lshiftrt "ubfx") (rotatert "extr")]) + +;; Logical operator instruction mnemonics +(define_code_attr logical [(and "and") (ior "orr") (xor "eor")]) + +;; Similar, but when not(op) +(define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")]) + +;; Sign- or zero-extending load +(define_code_attr ldrxt [(sign_extend "ldrs") (zero_extend "ldr")]) + +;; Sign- or zero-extending data-op +(define_code_attr su [(sign_extend "s") (zero_extend "u") + (sign_extract "s") (zero_extract "u") + (fix "s") (unsigned_fix "u") + (div "s") (udiv "u") + (smax "s") (umax "u") + (smin "s") (umin "u")]) + +;; Emit cbz/cbnz depending on comparison type. +(define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")]) + +;; Emit tbz/tbnz depending on comparison type. +(define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")]) + +;; Max/min attributes. +(define_code_attr maxmin [(smax "max") + (smin "min") + (umax "max") + (umin "min")]) + +;; MLA/MLS attributes. +(define_code_attr as [(ss_plus "a") (ss_minus "s")]) + +;; Atomic operations +(define_code_attr atomic_optab + [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")]) + +(define_code_attr atomic_op_operand + [(ior "aarch64_logical_operand") + (xor "aarch64_logical_operand") + (and "aarch64_logical_operand") + (plus "aarch64_plus_operand") + (minus "aarch64_plus_operand")]) + +;; ------------------------------------------------------------------- +;; Int Iterators. +;; ------------------------------------------------------------------- +(define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV + UNSPEC_SMAXV UNSPEC_SMINV]) + +(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV + UNSPEC_FMAXNMV UNSPEC_FMINNMV]) + +(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV]) + +(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD + UNSPEC_SRHADD UNSPEC_URHADD + UNSPEC_SHSUB UNSPEC_UHSUB + UNSPEC_SRHSUB UNSPEC_URHSUB]) + + +(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN + UNSPEC_SUBHN UNSPEC_RSUBHN]) + +(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2 + UNSPEC_SUBHN2 UNSPEC_RSUBHN2]) + +(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN]) + +(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) + +(define_int_iterator USSUQADD [UNSPEC_SUQADD UNSPEC_USQADD]) + +(define_int_iterator SUQMOVN [UNSPEC_SQXTN UNSPEC_UQXTN]) + +(define_int_iterator VSHL [UNSPEC_SSHL UNSPEC_USHL + UNSPEC_SRSHL UNSPEC_URSHL]) + +(define_int_iterator VSHLL [UNSPEC_SSHLL UNSPEC_USHLL]) + +(define_int_iterator VQSHL [UNSPEC_SQSHL UNSPEC_UQSHL + UNSPEC_SQRSHL UNSPEC_UQRSHL]) + +(define_int_iterator VSRA [UNSPEC_SSRA UNSPEC_USRA + UNSPEC_SRSRA UNSPEC_URSRA]) + +(define_int_iterator VSLRI [UNSPEC_SSLI UNSPEC_USLI + UNSPEC_SSRI UNSPEC_USRI]) + + +(define_int_iterator VRSHR_N [UNSPEC_SRSHR UNSPEC_URSHR]) + +(define_int_iterator VQSHL_N [UNSPEC_SQSHLU UNSPEC_SQSHL UNSPEC_UQSHL]) + +(define_int_iterator VQSHRN_N [UNSPEC_SQSHRUN UNSPEC_SQRSHRUN + UNSPEC_SQSHRN UNSPEC_UQSHRN + UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) + +(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 + UNSPEC_TRN1 UNSPEC_TRN2 + UNSPEC_UZP1 UNSPEC_UZP2]) + +(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM + UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX + UNSPEC_FRINTA]) + +(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM + UNSPEC_FRINTA UNSPEC_FRINTN]) + +(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) + +(define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD]) +(define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC]) + +(define_int_iterator CRYPTO_SHA1 [UNSPEC_SHA1C UNSPEC_SHA1M UNSPEC_SHA1P]) + +(define_int_iterator CRYPTO_SHA256 [UNSPEC_SHA256H UNSPEC_SHA256H2]) + +;; ------------------------------------------------------------------- +;; Int Iterators Attributes. +;; ------------------------------------------------------------------- +(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") + (UNSPEC_UMINV "umin") + (UNSPEC_SMAXV "smax") + (UNSPEC_SMINV "smin") + (UNSPEC_FMAX "smax_nan") + (UNSPEC_FMAXNMV "smax") + (UNSPEC_FMAXV "smax_nan") + (UNSPEC_FMIN "smin_nan") + (UNSPEC_FMINNMV "smin") + (UNSPEC_FMINV "smin_nan")]) + +(define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") + (UNSPEC_UMINV "umin") + (UNSPEC_SMAXV "smax") + (UNSPEC_SMINV "smin") + (UNSPEC_FMAX "fmax") + (UNSPEC_FMAXNMV "fmaxnm") + (UNSPEC_FMAXV "fmax") + (UNSPEC_FMIN "fmin") + (UNSPEC_FMINNMV "fminnm") + (UNSPEC_FMINV "fmin")]) + +(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") + (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") + (UNSPEC_SHSUB "s") (UNSPEC_UHSUB "u") + (UNSPEC_SRHSUB "sr") (UNSPEC_URHSUB "ur") + (UNSPEC_ADDHN "") (UNSPEC_RADDHN "r") + (UNSPEC_SUBHN "") (UNSPEC_RSUBHN "r") + (UNSPEC_ADDHN2 "") (UNSPEC_RADDHN2 "r") + (UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r") + (UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u") + (UNSPEC_USQADD "us") (UNSPEC_SUQADD "su") + (UNSPEC_SADDV "s") (UNSPEC_UADDV "u") + (UNSPEC_SSLI "s") (UNSPEC_USLI "u") + (UNSPEC_SSRI "s") (UNSPEC_USRI "u") + (UNSPEC_USRA "u") (UNSPEC_SSRA "s") + (UNSPEC_URSRA "ur") (UNSPEC_SRSRA "sr") + (UNSPEC_URSHR "ur") (UNSPEC_SRSHR "sr") + (UNSPEC_SQSHLU "s") (UNSPEC_SQSHL "s") + (UNSPEC_UQSHL "u") + (UNSPEC_SQSHRUN "s") (UNSPEC_SQRSHRUN "s") + (UNSPEC_SQSHRN "s") (UNSPEC_UQSHRN "u") + (UNSPEC_SQRSHRN "s") (UNSPEC_UQRSHRN "u") + (UNSPEC_USHL "u") (UNSPEC_SSHL "s") + (UNSPEC_USHLL "u") (UNSPEC_SSHLL "s") + (UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr") + (UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s") +]) + +(define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r") + (UNSPEC_SQSHRUN "") (UNSPEC_SQRSHRUN "r") + (UNSPEC_SQSHRN "") (UNSPEC_UQSHRN "") + (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r") + (UNSPEC_SQSHL "") (UNSPEC_UQSHL "") + (UNSPEC_SQRSHL "r")(UNSPEC_UQRSHL "r") +]) + +(define_int_attr lr [(UNSPEC_SSLI "l") (UNSPEC_USLI "l") + (UNSPEC_SSRI "r") (UNSPEC_USRI "r")]) + +(define_int_attr u [(UNSPEC_SQSHLU "u") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "") + (UNSPEC_SQSHRUN "u") (UNSPEC_SQRSHRUN "u") + (UNSPEC_SQSHRN "") (UNSPEC_UQSHRN "") + (UNSPEC_SQRSHRN "") (UNSPEC_UQRSHRN "")]) + +(define_int_attr addsub [(UNSPEC_SHADD "add") + (UNSPEC_UHADD "add") + (UNSPEC_SRHADD "add") + (UNSPEC_URHADD "add") + (UNSPEC_SHSUB "sub") + (UNSPEC_UHSUB "sub") + (UNSPEC_SRHSUB "sub") + (UNSPEC_URHSUB "sub") + (UNSPEC_ADDHN "add") + (UNSPEC_SUBHN "sub") + (UNSPEC_RADDHN "add") + (UNSPEC_RSUBHN "sub") + (UNSPEC_ADDHN2 "add") + (UNSPEC_SUBHN2 "sub") + (UNSPEC_RADDHN2 "add") + (UNSPEC_RSUBHN2 "sub")]) + +(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1") + (UNSPEC_SSRI "0") (UNSPEC_USRI "0")]) + +;; Standard pattern names for floating-point rounding instructions. +(define_int_attr frint_pattern [(UNSPEC_FRINTZ "btrunc") + (UNSPEC_FRINTP "ceil") + (UNSPEC_FRINTM "floor") + (UNSPEC_FRINTI "nearbyint") + (UNSPEC_FRINTX "rint") + (UNSPEC_FRINTA "round") + (UNSPEC_FRINTN "frintn")]) + +;; frint suffix for floating-point rounding instructions. +(define_int_attr frint_suffix [(UNSPEC_FRINTZ "z") (UNSPEC_FRINTP "p") + (UNSPEC_FRINTM "m") (UNSPEC_FRINTI "i") + (UNSPEC_FRINTX "x") (UNSPEC_FRINTA "a") + (UNSPEC_FRINTN "n")]) + +(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round") + (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor") + (UNSPEC_FRINTN "frintn")]) + +(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") + (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") + (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) + +(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") + (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") + (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) + +(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")]) + +(define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")]) +(define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")]) + +(define_int_attr sha1_op [(UNSPEC_SHA1C "c") (UNSPEC_SHA1P "p") + (UNSPEC_SHA1M "m")]) + +(define_int_attr sha256_op [(UNSPEC_SHA256H "") (UNSPEC_SHA256H2 "2")]) diff --git a/gcc-4.9/gcc/config/aarch64/predicates.md b/gcc-4.9/gcc/config/aarch64/predicates.md new file mode 100644 index 000000000..c8e27d871 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/predicates.md @@ -0,0 +1,302 @@ +;; Machine description for AArch64 architecture. +;; Copyright (C) 2009-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_special_predicate "cc_register" + (and (match_code "reg") + (and (match_test "REGNO (op) == CC_REGNUM") + (ior (match_test "mode == GET_MODE (op)") + (match_test "mode == VOIDmode + && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))) +) + +(define_predicate "aarch64_simd_register" + (and (match_code "reg") + (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") + (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS")))) + +(define_predicate "aarch64_reg_or_zero" + (and (match_code "reg,subreg,const_int") + (ior (match_operand 0 "register_operand") + (match_test "op == const0_rtx")))) + +(define_predicate "aarch64_reg_or_fp_zero" + (and (match_code "reg,subreg,const_double") + (ior (match_operand 0 "register_operand") + (match_test "aarch64_float_const_zero_rtx_p (op)")))) + +(define_predicate "aarch64_reg_zero_or_m1_or_1" + (and (match_code "reg,subreg,const_int") + (ior (match_operand 0 "register_operand") + (ior (match_test "op == const0_rtx") + (ior (match_test "op == constm1_rtx") + (match_test "op == const1_rtx")))))) + +(define_predicate "aarch64_fp_compare_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_double") + (match_test "aarch64_float_const_zero_rtx_p (op)")))) + +(define_predicate "aarch64_plus_immediate" + (and (match_code "const_int") + (ior (match_test "aarch64_uimm12_shift (INTVAL (op))") + (match_test "aarch64_uimm12_shift (-INTVAL (op))")))) + +(define_predicate "aarch64_plus_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_plus_immediate"))) + +(define_predicate "aarch64_pluslong_immediate" + (and (match_code "const_int") + (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)"))) + +(define_predicate "aarch64_pluslong_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_pluslong_immediate"))) + +(define_predicate "aarch64_logical_immediate" + (and (match_code "const_int") + (match_test "aarch64_bitmask_imm (INTVAL (op), mode)"))) + +(define_predicate "aarch64_logical_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_logical_immediate"))) + +(define_predicate "aarch64_shift_imm_si" + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 32"))) + +(define_predicate "aarch64_shift_imm_di" + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 64"))) + +(define_predicate "aarch64_shift_imm64_di" + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 64"))) + +(define_predicate "aarch64_reg_or_shift_imm_si" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_shift_imm_si"))) + +(define_predicate "aarch64_reg_or_shift_imm_di" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_shift_imm_di"))) + +;; The imm3 field is a 3-bit field that only accepts immediates in the +;; range 0..4. +(define_predicate "aarch64_imm3" + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) <= 4"))) + +(define_predicate "aarch64_pwr_imm3" + (and (match_code "const_int") + (match_test "INTVAL (op) != 0 + && (unsigned) exact_log2 (INTVAL (op)) <= 4"))) + +(define_predicate "aarch64_pwr_2_si" + (and (match_code "const_int") + (match_test "INTVAL (op) != 0 + && (unsigned) exact_log2 (INTVAL (op)) < 32"))) + +(define_predicate "aarch64_pwr_2_di" + (and (match_code "const_int") + (match_test "INTVAL (op) != 0 + && (unsigned) exact_log2 (INTVAL (op)) < 64"))) + +(define_predicate "aarch64_mem_pair_operand" + (and (match_code "mem") + (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL, + 0)"))) + +(define_predicate "aarch64_valid_symref" + (match_code "const, symbol_ref, label_ref") +{ + return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR) + != SYMBOL_FORCE_TO_MEM); +}) + +(define_predicate "aarch64_tls_ie_symref" + (match_code "const, symbol_ref, label_ref") +{ + switch (GET_CODE (op)) + { + case CONST: + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_CODE (XEXP (op, 0)) != SYMBOL_REF + || GET_CODE (XEXP (op, 1)) != CONST_INT) + return false; + op = XEXP (op, 0); + + case SYMBOL_REF: + return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_INITIAL_EXEC; + + default: + gcc_unreachable (); + } +}) + +(define_predicate "aarch64_tls_le_symref" + (match_code "const, symbol_ref, label_ref") +{ + switch (GET_CODE (op)) + { + case CONST: + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_CODE (XEXP (op, 0)) != SYMBOL_REF + || GET_CODE (XEXP (op, 1)) != CONST_INT) + return false; + op = XEXP (op, 0); + + case SYMBOL_REF: + return SYMBOL_REF_TLS_MODEL (op) == TLS_MODEL_LOCAL_EXEC; + + default: + gcc_unreachable (); + } +}) + +(define_predicate "aarch64_mov_operand" + (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high") + (ior (match_operand 0 "register_operand") + (ior (match_operand 0 "memory_operand") + (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)"))))) + +(define_predicate "aarch64_movti_operand" + (and (match_code "reg,subreg,mem,const_int") + (ior (match_operand 0 "register_operand") + (ior (match_operand 0 "memory_operand") + (match_operand 0 "const_int_operand"))))) + +(define_predicate "aarch64_reg_or_imm" + (and (match_code "reg,subreg,const_int") + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_int_operand")))) + +;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ. +(define_special_predicate "aarch64_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,ordered,unlt,unle,unge,ungt")) + +;; True if the operand is memory reference suitable for a load/store exclusive. +(define_predicate "aarch64_sync_memory_operand" + (and (match_operand 0 "memory_operand") + (match_code "reg" "0"))) + +;; Predicates for parallel expanders based on mode. +(define_special_predicate "vect_par_cnst_hi_half" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int nunits = GET_MODE_NUNITS (mode); + int i; + + if (count < 1 + || count != nunits / 2) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != (nunits / 2) + i) + return false; + } + return true; +}) + +(define_special_predicate "vect_par_cnst_lo_half" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int nunits = GET_MODE_NUNITS (mode); + int i; + + if (count < 1 + || count != nunits / 2) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (GET_CODE (elt) != CONST_INT) + return false; + + val = INTVAL (elt); + if (val != i) + return false; + } + return true; +}) + + +(define_special_predicate "aarch64_simd_lshift_imm" + (match_code "const_vector") +{ + return aarch64_simd_shift_imm_p (op, mode, true); +}) + +(define_special_predicate "aarch64_simd_rshift_imm" + (match_code "const_vector") +{ + return aarch64_simd_shift_imm_p (op, mode, false); +}) + +(define_predicate "aarch64_simd_reg_or_zero" + (and (match_code "reg,subreg,const_int,const_vector") + (ior (match_operand 0 "register_operand") + (ior (match_test "op == const0_rtx") + (match_test "aarch64_simd_imm_zero_p (op, mode)"))))) + +(define_predicate "aarch64_simd_struct_operand" + (and (match_code "mem") + (match_test "TARGET_SIMD && aarch64_simd_mem_operand_p (op)"))) + +;; Like general_operand but allow only valid SIMD addressing modes. +(define_predicate "aarch64_simd_general_operand" + (and (match_operand 0 "general_operand") + (match_test "!MEM_P (op) + || GET_CODE (XEXP (op, 0)) == POST_INC + || GET_CODE (XEXP (op, 0)) == REG"))) + +;; Like nonimmediate_operand but allow only valid SIMD addressing modes. +(define_predicate "aarch64_simd_nonimmediate_operand" + (and (match_operand 0 "nonimmediate_operand") + (match_test "!MEM_P (op) + || GET_CODE (XEXP (op, 0)) == POST_INC + || GET_CODE (XEXP (op, 0)) == REG"))) + +(define_special_predicate "aarch64_simd_imm_zero" + (match_code "const_vector") +{ + return aarch64_simd_imm_zero_p (op, mode); +}) diff --git a/gcc-4.9/gcc/config/aarch64/t-aarch64 b/gcc-4.9/gcc/config/aarch64/t-aarch64 new file mode 100644 index 000000000..158fbb578 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/t-aarch64 @@ -0,0 +1,45 @@ +# Machine description for AArch64 architecture. +# Copyright (C) 2009-2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +TM_H += $(srcdir)/config/aarch64/aarch64-cores.def +OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def + +$(srcdir)/config/aarch64/aarch64-tune.md: $(srcdir)/config/aarch64/gentune.sh \ + $(srcdir)/config/aarch64/aarch64-cores.def + $(SHELL) $(srcdir)/config/aarch64/gentune.sh \ + $(srcdir)/config/aarch64/aarch64-cores.def > \ + $(srcdir)/config/aarch64/aarch64-tune.md + +aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \ + $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \ + $(srcdir)/config/aarch64/aarch64-simd-builtins.def + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-builtins.c + +aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/aarch-common.c + +comma=, +MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) +MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) diff --git a/gcc-4.9/gcc/config/aarch64/t-aarch64-linux b/gcc-4.9/gcc/config/aarch64/t-aarch64-linux new file mode 100644 index 000000000..147452b04 --- /dev/null +++ b/gcc-4.9/gcc/config/aarch64/t-aarch64-linux @@ -0,0 +1,31 @@ +# Machine description for AArch64 architecture. +# Copyright (C) 2009-2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +LIB1ASMSRC = aarch64/lib1funcs.asm +LIB1ASMFUNCS = _aarch64_sync_cache_range + +AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be) +MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) +MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu) + +# Disable the multilib for linux-gnu targets for the time being; focus +# on the baremetal targets. +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = diff --git a/gcc-4.9/gcc/config/alpha/alpha-modes.def b/gcc-4.9/gcc/config/alpha/alpha-modes.def new file mode 100644 index 000000000..dbfbed0b7 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/alpha-modes.def @@ -0,0 +1,27 @@ +/* Alpha extra machine modes. + Copyright (C) 2003-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* 128-bit floating point. This gets reset in alpha_option_override + if VAX float format is in use. */ +FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Vector modes. */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODE (INT, QI, 4); /* V4QI */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ diff --git a/gcc-4.9/gcc/config/alpha/alpha-protos.h b/gcc-4.9/gcc/config/alpha/alpha-protos.h new file mode 100644 index 000000000..753a762a5 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/alpha-protos.h @@ -0,0 +1,117 @@ +/* Prototypes for alpha.c functions used in the md file & elsewhere. + Copyright (C) 1999-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern int alpha_next_sequence_number; + +extern void literal_section (void); +extern int zap_mask (HOST_WIDE_INT); +extern int direct_return (void); + +extern int alpha_sa_size (void); +extern HOST_WIDE_INT alpha_initial_elimination_offset (unsigned int, + unsigned int); +extern void alpha_expand_prologue (void); +extern void alpha_expand_epilogue (void); +extern void alpha_output_filename (FILE *, const char *); + +extern bool alpha_legitimate_constant_p (enum machine_mode, rtx); +extern rtx alpha_legitimize_reload_address (rtx, enum machine_mode, + int, int, int); + +extern rtx split_small_symbolic_operand (rtx); + +extern void get_aligned_mem (rtx, rtx *, rtx *); +extern rtx get_unaligned_address (rtx); +extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT); +extern enum reg_class alpha_preferred_reload_class (rtx, enum reg_class); + +extern void alpha_set_memflags (rtx, rtx); +extern bool alpha_split_const_mov (enum machine_mode, rtx *); +extern bool alpha_expand_mov (enum machine_mode, rtx *); +extern bool alpha_expand_mov_nobwx (enum machine_mode, rtx *); +extern void alpha_expand_movmisalign (enum machine_mode, rtx *); +extern void alpha_emit_floatuns (rtx[]); +extern rtx alpha_emit_conditional_move (rtx, enum machine_mode); +extern void alpha_split_tmode_pair (rtx[], enum machine_mode, bool); +extern void alpha_split_tfmode_frobsign (rtx[], rtx (*)(rtx, rtx, rtx)); +extern void alpha_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT, int); +extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT); +extern int alpha_expand_block_move (rtx []); +extern int alpha_expand_block_clear (rtx []); +extern rtx alpha_expand_zap_mask (HOST_WIDE_INT); +extern void alpha_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx), + enum machine_mode, + rtx, rtx, rtx); +extern void alpha_expand_builtin_establish_vms_condition_handler (rtx, rtx); +extern void alpha_expand_builtin_revert_vms_condition_handler (rtx); + +extern rtx alpha_return_addr (int, rtx); +extern rtx alpha_gp_save_rtx (void); +extern void print_operand (FILE *, rtx, int); +extern void print_operand_address (FILE *, rtx); +extern void alpha_initialize_trampoline (rtx, rtx, rtx, int, int, int); + +extern rtx alpha_va_arg (tree, tree); +extern rtx function_value (const_tree, const_tree, enum machine_mode); + +extern void alpha_start_function (FILE *, const char *, tree); +extern void alpha_end_function (FILE *, const char *, tree); + +extern int alpha_find_lo_sum_using_gp (rtx); + +#ifdef REAL_VALUE_TYPE +extern int check_float_value (enum machine_mode, REAL_VALUE_TYPE *, int); +#endif + +#ifdef RTX_CODE +extern void alpha_emit_conditional_branch (rtx[], enum machine_mode); +extern bool alpha_emit_setcc (rtx[], enum machine_mode); +extern int alpha_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx); +extern void alpha_emit_xfloating_arith (enum rtx_code, rtx[]); +extern void alpha_emit_xfloating_cvt (enum rtx_code, rtx[]); +extern void alpha_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, + enum memmodel); +extern void alpha_split_compare_and_swap (rtx op[]); +extern void alpha_expand_compare_and_swap_12 (rtx op[]); +extern void alpha_split_compare_and_swap_12 (rtx op[]); +extern void alpha_split_atomic_exchange (rtx op[]); +extern void alpha_expand_atomic_exchange_12 (rtx op[]); +extern void alpha_split_atomic_exchange_12 (rtx op[]); +#endif + +extern rtx alpha_use_linkage (rtx, bool, bool); + +#if TARGET_ABI_OPEN_VMS +extern enum avms_arg_type alpha_arg_type (enum machine_mode); +extern rtx alpha_arg_info_reg_val (CUMULATIVE_ARGS); +extern void avms_asm_output_external (FILE *, tree, const char *); +extern void vms_output_aligned_decl_common (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern HOST_WIDE_INT alpha_vms_initial_elimination_offset (unsigned int, + unsigned int); +#endif + +extern rtx unicosmk_add_call_info_word (rtx); + +extern int some_small_symbolic_operand_int (rtx *, void *); +extern int tls_symbolic_operand_1 (rtx, int, int); +extern rtx resolve_reload_operand (rtx); diff --git a/gcc-4.9/gcc/config/alpha/alpha.c b/gcc-4.9/gcc/config/alpha/alpha.c new file mode 100644 index 000000000..df4cc1b1c --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/alpha.c @@ -0,0 +1,9898 @@ +/* Subroutines used for code generation on the DEC Alpha. + Copyright (C) 1992-2014 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "expr.h" +#include "optabs.h" +#include "reload.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "diagnostic-core.h" +#include "ggc.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "common/common-target.h" +#include "debug.h" +#include "langhooks.h" +#include "splay-tree.h" +#include "pointer-set.h" +#include "hash-table.h" +#include "vec.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "gimple-ssa.h" +#include "stringpool.h" +#include "tree-ssanames.h" +#include "tree-stdarg.h" +#include "tm-constrs.h" +#include "df.h" +#include "libfuncs.h" +#include "opts.h" +#include "params.h" + +/* Specify which cpu to schedule for. */ +enum processor_type alpha_tune; + +/* Which cpu we're generating code for. */ +enum processor_type alpha_cpu; + +static const char * const alpha_cpu_name[] = +{ + "ev4", "ev5", "ev6" +}; + +/* Specify how accurate floating-point traps need to be. */ + +enum alpha_trap_precision alpha_tp; + +/* Specify the floating-point rounding mode. */ + +enum alpha_fp_rounding_mode alpha_fprm; + +/* Specify which things cause traps. */ + +enum alpha_fp_trap_mode alpha_fptm; + +/* Nonzero if inside of a function, because the Alpha asm can't + handle .files inside of functions. */ + +static int inside_function = FALSE; + +/* The number of cycles of latency we should assume on memory reads. */ + +int alpha_memory_latency = 3; + +/* Whether the function needs the GP. */ + +static int alpha_function_needs_gp; + +/* The assembler name of the current function. */ + +static const char *alpha_fnname; + +/* The next explicit relocation sequence number. */ +extern GTY(()) int alpha_next_sequence_number; +int alpha_next_sequence_number = 1; + +/* The literal and gpdisp sequence numbers for this insn, as printed + by %# and %* respectively. */ +extern GTY(()) int alpha_this_literal_sequence_number; +extern GTY(()) int alpha_this_gpdisp_sequence_number; +int alpha_this_literal_sequence_number; +int alpha_this_gpdisp_sequence_number; + +/* Costs of various operations on the different architectures. */ + +struct alpha_rtx_cost_data +{ + unsigned char fp_add; + unsigned char fp_mult; + unsigned char fp_div_sf; + unsigned char fp_div_df; + unsigned char int_mult_si; + unsigned char int_mult_di; + unsigned char int_shift; + unsigned char int_cmov; + unsigned short int_div; +}; + +static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] = +{ + { /* EV4 */ + COSTS_N_INSNS (6), /* fp_add */ + COSTS_N_INSNS (6), /* fp_mult */ + COSTS_N_INSNS (34), /* fp_div_sf */ + COSTS_N_INSNS (63), /* fp_div_df */ + COSTS_N_INSNS (23), /* int_mult_si */ + COSTS_N_INSNS (23), /* int_mult_di */ + COSTS_N_INSNS (2), /* int_shift */ + COSTS_N_INSNS (2), /* int_cmov */ + COSTS_N_INSNS (97), /* int_div */ + }, + { /* EV5 */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult */ + COSTS_N_INSNS (15), /* fp_div_sf */ + COSTS_N_INSNS (22), /* fp_div_df */ + COSTS_N_INSNS (8), /* int_mult_si */ + COSTS_N_INSNS (12), /* int_mult_di */ + COSTS_N_INSNS (1) + 1, /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (83), /* int_div */ + }, + { /* EV6 */ + COSTS_N_INSNS (4), /* fp_add */ + COSTS_N_INSNS (4), /* fp_mult */ + COSTS_N_INSNS (12), /* fp_div_sf */ + COSTS_N_INSNS (15), /* fp_div_df */ + COSTS_N_INSNS (7), /* int_mult_si */ + COSTS_N_INSNS (7), /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (2), /* int_cmov */ + COSTS_N_INSNS (86), /* int_div */ + }, +}; + +/* Similar but tuned for code size instead of execution latency. The + extra +N is fractional cost tuning based on latency. It's used to + encourage use of cheaper insns like shift, but only if there's just + one of them. */ + +static struct alpha_rtx_cost_data const alpha_rtx_cost_size = +{ + COSTS_N_INSNS (1), /* fp_add */ + COSTS_N_INSNS (1), /* fp_mult */ + COSTS_N_INSNS (1), /* fp_div_sf */ + COSTS_N_INSNS (1) + 1, /* fp_div_df */ + COSTS_N_INSNS (1) + 1, /* int_mult_si */ + COSTS_N_INSNS (1) + 2, /* int_mult_di */ + COSTS_N_INSNS (1), /* int_shift */ + COSTS_N_INSNS (1), /* int_cmov */ + COSTS_N_INSNS (6), /* int_div */ +}; + +/* Get the number of args of a function in one of two ways. */ +#if TARGET_ABI_OPEN_VMS +#define NUM_ARGS crtl->args.info.num_args +#else +#define NUM_ARGS crtl->args.info +#endif + +#define REG_PV 27 +#define REG_RA 26 + +/* Declarations of static functions. */ +static struct machine_function *alpha_init_machine_status (void); +static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx); + +#if TARGET_ABI_OPEN_VMS +static void alpha_write_linkage (FILE *, const char *); +static bool vms_valid_pointer_mode (enum machine_mode); +#else +#define vms_patch_builtins() gcc_unreachable() +#endif + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +/* Implement TARGET_MANGLE_TYPE. */ + +static const char * +alpha_mangle_type (const_tree type) +{ + if (TYPE_MAIN_VARIANT (type) == long_double_type_node + && TARGET_LONG_DOUBLE_128) + return "g"; + + /* For all other types, use normal C++ mangling. */ + return NULL; +} +#endif + +/* Parse target option strings. */ + +static void +alpha_option_override (void) +{ + static const struct cpu_table { + const char *const name; + const enum processor_type processor; + const int flags; + const unsigned short line_size; /* in bytes */ + const unsigned short l1_size; /* in kb. */ + const unsigned short l2_size; /* in kb. */ + } cpu_table[] = { + /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches. + EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45 + had 64k to 8M 8-byte direct Bcache. */ + { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, + { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, + { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 }, + + /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2, + and 1M to 16M 64 byte L3 (not modeled). + PCA56 had 16k 64-byte cache; PCA57 had 32k Icache. + PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */ + { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 }, + { "21164", PROCESSOR_EV5, 0, 32, 8, 96 }, + { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, + { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, + { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, + { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, + { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, + + /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */ + { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, + { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, + { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, + 64, 64, 16*1024 }, + { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, + 64, 64, 16*1024 } + }; + + int const ct_size = ARRAY_SIZE (cpu_table); + int line_size = 0, l1_size = 0, l2_size = 0; + int i; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + + /* Default to full IEEE compliance mode for Go language. */ + if (strcmp (lang_hooks.name, "GNU Go") == 0 + && !(target_flags_explicit & MASK_IEEE)) + target_flags |= MASK_IEEE; + + alpha_fprm = ALPHA_FPRM_NORM; + alpha_tp = ALPHA_TP_PROG; + alpha_fptm = ALPHA_FPTM_N; + + if (TARGET_IEEE) + { + alpha_tp = ALPHA_TP_INSN; + alpha_fptm = ALPHA_FPTM_SU; + } + if (TARGET_IEEE_WITH_INEXACT) + { + alpha_tp = ALPHA_TP_INSN; + alpha_fptm = ALPHA_FPTM_SUI; + } + + if (alpha_tp_string) + { + if (! strcmp (alpha_tp_string, "p")) + alpha_tp = ALPHA_TP_PROG; + else if (! strcmp (alpha_tp_string, "f")) + alpha_tp = ALPHA_TP_FUNC; + else if (! strcmp (alpha_tp_string, "i")) + alpha_tp = ALPHA_TP_INSN; + else + error ("bad value %qs for -mtrap-precision switch", alpha_tp_string); + } + + if (alpha_fprm_string) + { + if (! strcmp (alpha_fprm_string, "n")) + alpha_fprm = ALPHA_FPRM_NORM; + else if (! strcmp (alpha_fprm_string, "m")) + alpha_fprm = ALPHA_FPRM_MINF; + else if (! strcmp (alpha_fprm_string, "c")) + alpha_fprm = ALPHA_FPRM_CHOP; + else if (! strcmp (alpha_fprm_string,"d")) + alpha_fprm = ALPHA_FPRM_DYN; + else + error ("bad value %qs for -mfp-rounding-mode switch", + alpha_fprm_string); + } + + if (alpha_fptm_string) + { + if (strcmp (alpha_fptm_string, "n") == 0) + alpha_fptm = ALPHA_FPTM_N; + else if (strcmp (alpha_fptm_string, "u") == 0) + alpha_fptm = ALPHA_FPTM_U; + else if (strcmp (alpha_fptm_string, "su") == 0) + alpha_fptm = ALPHA_FPTM_SU; + else if (strcmp (alpha_fptm_string, "sui") == 0) + alpha_fptm = ALPHA_FPTM_SUI; + else + error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string); + } + + if (alpha_cpu_string) + { + for (i = 0; i < ct_size; i++) + if (! strcmp (alpha_cpu_string, cpu_table [i].name)) + { + alpha_tune = alpha_cpu = cpu_table[i].processor; + line_size = cpu_table[i].line_size; + l1_size = cpu_table[i].l1_size; + l2_size = cpu_table[i].l2_size; + target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX); + target_flags |= cpu_table[i].flags; + break; + } + if (i == ct_size) + error ("bad value %qs for -mcpu switch", alpha_cpu_string); + } + + if (alpha_tune_string) + { + for (i = 0; i < ct_size; i++) + if (! strcmp (alpha_tune_string, cpu_table [i].name)) + { + alpha_tune = cpu_table[i].processor; + line_size = cpu_table[i].line_size; + l1_size = cpu_table[i].l1_size; + l2_size = cpu_table[i].l2_size; + break; + } + if (i == ct_size) + error ("bad value %qs for -mtune switch", alpha_tune_string); + } + + if (line_size) + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size, + global_options.x_param_values, + global_options_set.x_param_values); + if (l1_size) + maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size, + global_options.x_param_values, + global_options_set.x_param_values); + if (l2_size) + maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Do some sanity checks on the above options. */ + + if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI) + && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6) + { + warning (0, "fp software completion requires -mtrap-precision=i"); + alpha_tp = ALPHA_TP_INSN; + } + + if (alpha_cpu == PROCESSOR_EV6) + { + /* Except for EV6 pass 1 (not released), we always have precise + arithmetic traps. Which means we can do software completion + without minding trap shadows. */ + alpha_tp = ALPHA_TP_PROG; + } + + if (TARGET_FLOAT_VAX) + { + if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN) + { + warning (0, "rounding mode not supported for VAX floats"); + alpha_fprm = ALPHA_FPRM_NORM; + } + if (alpha_fptm == ALPHA_FPTM_SUI) + { + warning (0, "trap mode not supported for VAX floats"); + alpha_fptm = ALPHA_FPTM_SU; + } + if (target_flags_explicit & MASK_LONG_DOUBLE_128) + warning (0, "128-bit long double not supported for VAX floats"); + target_flags &= ~MASK_LONG_DOUBLE_128; + } + + { + char *end; + int lat; + + if (!alpha_mlat_string) + alpha_mlat_string = "L1"; + + if (ISDIGIT ((unsigned char)alpha_mlat_string[0]) + && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) + ; + else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') + && ISDIGIT ((unsigned char)alpha_mlat_string[1]) + && alpha_mlat_string[2] == '\0') + { + static int const cache_latency[][4] = + { + { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ + { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ + { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */ + }; + + lat = alpha_mlat_string[1] - '0'; + if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1) + { + warning (0, "L%d cache latency unknown for %s", + lat, alpha_cpu_name[alpha_tune]); + lat = 3; + } + else + lat = cache_latency[alpha_tune][lat-1]; + } + else if (! strcmp (alpha_mlat_string, "main")) + { + /* Most current memories have about 370ns latency. This is + a reasonable guess for a fast cpu. */ + lat = 150; + } + else + { + warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string); + lat = 3; + } + + alpha_memory_latency = lat; + } + + /* Default the definition of "small data" to 8 bytes. */ + if (!global_options_set.x_g_switch_value) + g_switch_value = 8; + + /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ + if (flag_pic == 1) + target_flags |= MASK_SMALL_DATA; + else if (flag_pic == 2) + target_flags &= ~MASK_SMALL_DATA; + + /* Align labels and loops for optimal branching. */ + /* ??? Kludge these by not doing anything if we don't optimize. */ + if (optimize > 0) + { + if (align_loops <= 0) + align_loops = 16; + if (align_jumps <= 0) + align_jumps = 16; + } + if (align_functions <= 0) + align_functions = 16; + + /* Register variables and functions with the garbage collector. */ + + /* Set up function hooks. */ + init_machine_status = alpha_init_machine_status; + + /* Tell the compiler when we're using VAX floating point. */ + if (TARGET_FLOAT_VAX) + { + REAL_MODE_FORMAT (SFmode) = &vax_f_format; + REAL_MODE_FORMAT (DFmode) = &vax_g_format; + REAL_MODE_FORMAT (TFmode) = NULL; + } + +#ifdef TARGET_DEFAULT_LONG_DOUBLE_128 + if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) + target_flags |= MASK_LONG_DOUBLE_128; +#endif +} + +/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ + +int +zap_mask (HOST_WIDE_INT value) +{ + int i; + + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; + i++, value >>= 8) + if ((value & 0xff) != 0 && (value & 0xff) != 0xff) + return 0; + + return 1; +} + +/* Return true if OP is valid for a particular TLS relocation. + We are already guaranteed that OP is a CONST. */ + +int +tls_symbolic_operand_1 (rtx op, int size, int unspec) +{ + op = XEXP (op, 0); + + if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) + return 0; + op = XVECEXP (op, 0, 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + switch (SYMBOL_REF_TLS_MODEL (op)) + { + case TLS_MODEL_LOCAL_DYNAMIC: + return unspec == UNSPEC_DTPREL && size == alpha_tls_size; + case TLS_MODEL_INITIAL_EXEC: + return unspec == UNSPEC_TPREL && size == 64; + case TLS_MODEL_LOCAL_EXEC: + return unspec == UNSPEC_TPREL && size == alpha_tls_size; + default: + gcc_unreachable (); + } +} + +/* Used by aligned_memory_operand and unaligned_memory_operand to + resolve what reload is going to do with OP if it's a register. */ + +rtx +resolve_reload_operand (rtx op) +{ + if (reload_in_progress) + { + rtx tmp = op; + if (GET_CODE (tmp) == SUBREG) + tmp = SUBREG_REG (tmp); + if (REG_P (tmp) + && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) + { + op = reg_equiv_memory_loc (REGNO (tmp)); + if (op == 0) + return 0; + } + } + return op; +} + +/* The scalar modes supported differs from the default check-what-c-supports + version in that sometimes TFmode is available even when long double + indicates only DFmode. */ + +static bool +alpha_scalar_mode_supported_p (enum machine_mode mode) +{ + switch (mode) + { + case QImode: + case HImode: + case SImode: + case DImode: + case TImode: /* via optabs.c */ + return true; + + case SFmode: + case DFmode: + return true; + + case TFmode: + return TARGET_HAS_XFLOATING_LIBS; + + default: + return false; + } +} + +/* Alpha implements a couple of integer vector mode operations when + TARGET_MAX is enabled. We do not check TARGET_MAX here, however, + which allows the vectorizer to operate on e.g. move instructions, + or when expand_vector_operations can do something useful. */ + +static bool +alpha_vector_mode_supported_p (enum machine_mode mode) +{ + return mode == V8QImode || mode == V4HImode || mode == V2SImode; +} + +/* Return 1 if this function can directly return via $26. */ + +int +direct_return (void) +{ + return (TARGET_ABI_OSF + && reload_completed + && alpha_sa_size () == 0 + && get_frame_size () == 0 + && crtl->outgoing_args_size == 0 + && crtl->args.pretend_args_size == 0); +} + +/* Return the TLS model to use for SYMBOL. */ + +static enum tls_model +tls_symbolic_operand_type (rtx symbol) +{ + enum tls_model model; + + if (GET_CODE (symbol) != SYMBOL_REF) + return TLS_MODEL_NONE; + model = SYMBOL_REF_TLS_MODEL (symbol); + + /* Local-exec with a 64-bit size is the same code as initial-exec. */ + if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64) + model = TLS_MODEL_INITIAL_EXEC; + + return model; +} + +/* Return true if the function DECL will share the same GP as any + function in the current unit of translation. */ + +static bool +decl_has_samegp (const_tree decl) +{ + /* Functions that are not local can be overridden, and thus may + not share the same gp. */ + if (!(*targetm.binds_local_p) (decl)) + return false; + + /* If -msmall-data is in effect, assume that there is only one GP + for the module, and so any local symbol has this property. We + need explicit relocations to be able to enforce this for symbols + not defined in this unit of translation, however. */ + if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) + return true; + + /* Functions that are not external are defined in this UoT. */ + /* ??? Irritatingly, static functions not yet emitted are still + marked "external". Apply this to non-static functions only. */ + return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); +} + +/* Return true if EXP should be placed in the small data section. */ + +static bool +alpha_in_small_data_p (const_tree exp) +{ + /* We want to merge strings, so we never consider them small data. */ + if (TREE_CODE (exp) == STRING_CST) + return false; + + /* Functions are never in the small data area. Duh. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); + if (strcmp (section, ".sdata") == 0 + || strcmp (section, ".sbss") == 0) + return true; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in sdata because it might be too big when completed. */ + if (size > 0 && size <= g_switch_value) + return true; + } + + return false; +} + +#if TARGET_ABI_OPEN_VMS +static bool +vms_valid_pointer_mode (enum machine_mode mode) +{ + return (mode == SImode || mode == DImode); +} + +static bool +alpha_linkage_symbol_p (const char *symname) +{ + int symlen = strlen (symname); + + if (symlen > 4) + return strcmp (&symname [symlen - 4], "..lk") == 0; + + return false; +} + +#define LINKAGE_SYMBOL_REF_P(X) \ + ((GET_CODE (X) == SYMBOL_REF \ + && alpha_linkage_symbol_p (XSTR (X, 0))) \ + || (GET_CODE (X) == CONST \ + && GET_CODE (XEXP (X, 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \ + && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0)))) +#endif + +/* legitimate_address_p recognizes an RTL expression that is a valid + memory address for an instruction. The MODE argument is the + machine mode for the MEM expression that wants to use this address. + + For Alpha, we have either a constant address or the sum of a + register and a constant address, or just a register. For DImode, + any of those forms can be surrounded with an AND that clear the + low-order three bits; this is an "unaligned" access. */ + +static bool +alpha_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + /* If this is an ldq_u type address, discard the outer AND. */ + if (mode == DImode + && GET_CODE (x) == AND + && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) == -8) + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (GET_CODE (x) == SUBREG + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + /* Unadorned general registers are valid. */ + if (REG_P (x) + && (strict + ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x))) + return true; + + /* Constant addresses (i.e. +/- 32k) are valid. */ + if (CONSTANT_ADDRESS_P (x)) + return true; + +#if TARGET_ABI_OPEN_VMS + if (LINKAGE_SYMBOL_REF_P (x)) + return true; +#endif + + /* Register plus a small constant offset is valid. */ + if (GET_CODE (x) == PLUS) + { + rtx ofs = XEXP (x, 1); + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (GET_CODE (x) == SUBREG + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + if (REG_P (x)) + { + if (! strict + && NONSTRICT_REG_OK_FP_BASE_P (x) + && CONST_INT_P (ofs)) + return true; + if ((strict + ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x)) + && CONSTANT_ADDRESS_P (ofs)) + return true; + } + } + + /* If we're managing explicit relocations, LO_SUM is valid, as are small + data symbols. Avoid explicit relocations of modes larger than word + mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ + else if (TARGET_EXPLICIT_RELOCS + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) + { + if (small_symbolic_operand (x, Pmode)) + return true; + + if (GET_CODE (x) == LO_SUM) + { + rtx ofs = XEXP (x, 1); + x = XEXP (x, 0); + + /* Discard non-paradoxical subregs. */ + if (GET_CODE (x) == SUBREG + && (GET_MODE_SIZE (GET_MODE (x)) + < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) + x = SUBREG_REG (x); + + /* Must have a valid base register. */ + if (! (REG_P (x) + && (strict + ? STRICT_REG_OK_FOR_BASE_P (x) + : NONSTRICT_REG_OK_FOR_BASE_P (x)))) + return false; + + /* The symbol must be local. */ + if (local_symbolic_operand (ofs, Pmode) + || dtp32_symbolic_operand (ofs, Pmode) + || tp32_symbolic_operand (ofs, Pmode)) + return true; + } + } + + return false; +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY(()) rtx tls_get_addr_libfunc; + +static rtx +get_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. */ + +static rtx +alpha_legitimize_address_1 (rtx x, rtx scratch, enum machine_mode mode) +{ + HOST_WIDE_INT addend; + + /* If the address is (plus reg const_int) and the CONST_INT is not a + valid offset, compute the high part of the constant and add it to + the register. Then our address is (plus temp low-part-const). */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1)) + && ! CONSTANT_ADDRESS_P (XEXP (x, 1))) + { + addend = INTVAL (XEXP (x, 1)); + x = XEXP (x, 0); + goto split_addend; + } + + /* If the address is (const (plus FOO const_int)), find the low-order + part of the CONST_INT. Then load FOO plus any high-order part of the + CONST_INT into a register. Our address is (plus reg low-part-const). + This is done to reduce the number of GOT entries. */ + if (can_create_pseudo_p () + && GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + addend = INTVAL (XEXP (XEXP (x, 0), 1)); + x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); + goto split_addend; + } + + /* If we have a (plus reg const), emit the load as in (2), then add + the two registers, and finally generate (plus reg low-part-const) as + our address. */ + if (can_create_pseudo_p () + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) + { + addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); + x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), + XEXP (XEXP (XEXP (x, 1), 0), 0), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + goto split_addend; + } + + /* If this is a local symbol, split the address into HIGH/LO_SUM parts. + Avoid modes larger than word mode since i.e. $LC0+8($1) can fold + around +/- 32k offset. */ + if (TARGET_EXPLICIT_RELOCS + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD + && symbolic_operand (x, Pmode)) + { + rtx r0, r16, eqv, tga, tp, insn, dest, seq; + + switch (tls_symbolic_operand_type (x)) + { + case TLS_MODEL_NONE: + break; + + case TLS_MODEL_GLOBAL_DYNAMIC: + start_sequence (); + + r0 = gen_rtx_REG (Pmode, 0); + r16 = gen_rtx_REG (Pmode, 16); + tga = get_tls_get_addr (); + dest = gen_reg_rtx (Pmode); + seq = GEN_INT (alpha_next_sequence_number++); + + emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); + insn = gen_call_value_osf_tlsgd (r0, tga, seq); + insn = emit_call_insn (insn); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); + + insn = get_insns (); + end_sequence (); + + emit_libcall_block (insn, dest, r0, x); + return dest; + + case TLS_MODEL_LOCAL_DYNAMIC: + start_sequence (); + + r0 = gen_rtx_REG (Pmode, 0); + r16 = gen_rtx_REG (Pmode, 16); + tga = get_tls_get_addr (); + scratch = gen_reg_rtx (Pmode); + seq = GEN_INT (alpha_next_sequence_number++); + + emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); + insn = gen_call_value_osf_tlsldm (r0, tga, seq); + insn = emit_call_insn (insn); + RTL_CONST_CALL_P (insn) = 1; + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); + + insn = get_insns (); + end_sequence (); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLSLDM_CALL); + emit_libcall_block (insn, scratch, r0, eqv); + + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + + if (alpha_tls_size == 64) + { + dest = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, dest, eqv)); + emit_insn (gen_adddi3 (dest, dest, scratch)); + return dest; + } + if (alpha_tls_size == 32) + { + insn = gen_rtx_HIGH (Pmode, eqv); + insn = gen_rtx_PLUS (Pmode, scratch, insn); + scratch = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, insn)); + } + return gen_rtx_LO_SUM (Pmode, scratch, eqv); + + case TLS_MODEL_INITIAL_EXEC: + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + tp = gen_reg_rtx (Pmode); + scratch = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (Pmode); + + emit_insn (gen_get_thread_pointerdi (tp)); + emit_insn (gen_rtx_SET (VOIDmode, scratch, eqv)); + emit_insn (gen_adddi3 (dest, tp, scratch)); + return dest; + + case TLS_MODEL_LOCAL_EXEC: + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); + eqv = gen_rtx_CONST (Pmode, eqv); + tp = gen_reg_rtx (Pmode); + + emit_insn (gen_get_thread_pointerdi (tp)); + if (alpha_tls_size == 32) + { + insn = gen_rtx_HIGH (Pmode, eqv); + insn = gen_rtx_PLUS (Pmode, tp, insn); + tp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, tp, insn)); + } + return gen_rtx_LO_SUM (Pmode, tp, eqv); + + default: + gcc_unreachable (); + } + + if (local_symbolic_operand (x, Pmode)) + { + if (small_symbolic_operand (x, Pmode)) + return x; + else + { + if (can_create_pseudo_p ()) + scratch = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, + gen_rtx_HIGH (Pmode, x))); + return gen_rtx_LO_SUM (Pmode, scratch, x); + } + } + } + + return NULL; + + split_addend: + { + HOST_WIDE_INT low, high; + + low = ((addend & 0xffff) ^ 0x8000) - 0x8000; + addend -= low; + high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; + addend -= high; + + if (addend) + x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), + (!can_create_pseudo_p () ? scratch : NULL_RTX), + 1, OPTAB_LIB_WIDEN); + if (high) + x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), + (!can_create_pseudo_p () ? scratch : NULL_RTX), + 1, OPTAB_LIB_WIDEN); + + return plus_constant (Pmode, x, low); + } +} + + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. Return X or the new, valid address. */ + +static rtx +alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode); + return new_x ? new_x : x; +} + +/* Return true if ADDR has an effect that depends on the machine mode it + is used for. On the Alpha this is true only for the unaligned modes. + We can simplify the test since we know that the address must be valid. */ + +static bool +alpha_mode_dependent_address_p (const_rtx addr, + addr_space_t as ATTRIBUTE_UNUSED) +{ + return GET_CODE (addr) == AND; +} + +/* Primarily this is required for TLS symbols, but given that our move + patterns *ought* to be able to handle any symbol at any time, we + should never be spilling symbolic operands to the constant pool, ever. */ + +static bool +alpha_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + enum rtx_code code = GET_CODE (x); + return code == SYMBOL_REF || code == LABEL_REF || code == CONST; +} + +/* We do not allow indirect calls to be optimized into sibling calls, nor + can we allow a call to a function with a different GP to be optimized + into a sibcall. */ + +static bool +alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +{ + /* Can't do indirect tail calls, since we don't know if the target + uses the same GP. */ + if (!decl) + return false; + + /* Otherwise, we can make a tail call if the target function shares + the same GP. */ + return decl_has_samegp (decl); +} + +int +some_small_symbolic_operand_int (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + /* Don't re-split. */ + if (GET_CODE (x) == LO_SUM) + return -1; + + return small_symbolic_operand (x, Pmode) != 0; +} + +static int +split_small_symbolic_operand_1 (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + /* Don't re-split. */ + if (GET_CODE (x) == LO_SUM) + return -1; + + if (small_symbolic_operand (x, Pmode)) + { + x = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); + *px = x; + return -1; + } + + return 0; +} + +rtx +split_small_symbolic_operand (rtx x) +{ + x = copy_insn (x); + for_each_rtx (&x, split_small_symbolic_operand_1, NULL); + return x; +} + +/* Indicate that INSN cannot be duplicated. This is true for any insn + that we've marked with gpdisp relocs, since those have to stay in + 1-1 correspondence with one another. + + Technically we could copy them if we could set up a mapping from one + sequence number to another, across the set of insns to be duplicated. + This seems overly complicated and error-prone since interblock motion + from sched-ebb could move one of the pair of insns to a different block. + + Also cannot allow jsr insns to be duplicated. If they throw exceptions, + then they'll be in a different block from their ldgp. Which could lead + the bb reorder code to think that it would be ok to copy just the block + containing the call and branch to the block containing the ldgp. */ + +static bool +alpha_cannot_copy_insn_p (rtx insn) +{ + if (!reload_completed || !TARGET_EXPLICIT_RELOCS) + return false; + if (recog_memoized (insn) >= 0) + return get_attr_cannot_copy (insn); + else + return false; +} + + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and return the new rtx. */ + +rtx +alpha_legitimize_reload_address (rtx x, + enum machine_mode mode ATTRIBUTE_UNUSED, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && CONST_INT_P (XEXP (x, 1))) + { + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + /* We wish to handle large displacements off a base register by + splitting the addend across an ldah and the mem insn. This + cuts number of extra insns needed from 3 to 1. */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER + && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) + && GET_CODE (XEXP (x, 1)) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); + HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT high + = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; + + /* Check for 32-bit overflow. */ + if (high + low != val) + return NULL_RTX; + + /* Reload the high part into a base reg; leave the low part + in the mem directly. */ + x = gen_rtx_PLUS (GET_MODE (x), + gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), + GEN_INT (high)), + GEN_INT (low)); + + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + return x; + } + + return NULL_RTX; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total, + bool speed) +{ + enum machine_mode mode = GET_MODE (x); + bool float_mode_p = FLOAT_MODE_P (mode); + const struct alpha_rtx_cost_data *cost_data; + + if (!speed) + cost_data = &alpha_rtx_cost_size; + else + cost_data = &alpha_rtx_cost_data[alpha_tune]; + + switch (code) + { + case CONST_INT: + /* If this is an 8-bit constant, return zero since it can be used + nearly anywhere with no cost. If it is a valid operand for an + ADD or AND, likewise return 0 if we know it will be used in that + context. Otherwise, return 2 since it might be used there later. + All other constants take at least two insns. */ + if (INTVAL (x) >= 0 && INTVAL (x) < 256) + { + *total = 0; + return true; + } + /* FALLTHRU */ + + case CONST_DOUBLE: + if (x == CONST0_RTX (mode)) + *total = 0; + else if ((outer_code == PLUS && add_operand (x, VOIDmode)) + || (outer_code == AND && and_operand (x, VOIDmode))) + *total = 0; + else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) + *total = 2; + else + *total = COSTS_N_INSNS (2); + return true; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) + *total = COSTS_N_INSNS (outer_code != MEM); + else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) + *total = COSTS_N_INSNS (1 + (outer_code != MEM)); + else if (tls_symbolic_operand_type (x)) + /* Estimate of cost for call_pal rduniq. */ + /* ??? How many insns do we emit here? More than one... */ + *total = COSTS_N_INSNS (15); + else + /* Otherwise we do a load from the GOT. */ + *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); + return true; + + case HIGH: + /* This is effectively an add_operand. */ + *total = 2; + return true; + + case PLUS: + case MINUS: + if (float_mode_p) + *total = cost_data->fp_add; + else if (GET_CODE (XEXP (x, 0)) == MULT + && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) + { + *total = (rtx_cost (XEXP (XEXP (x, 0), 0), + (enum rtx_code) outer_code, opno, speed) + + rtx_cost (XEXP (x, 1), + (enum rtx_code) outer_code, opno, speed) + + COSTS_N_INSNS (1)); + return true; + } + return false; + + case MULT: + if (float_mode_p) + *total = cost_data->fp_mult; + else if (mode == DImode) + *total = cost_data->int_mult_di; + else + *total = cost_data->int_mult_si; + return false; + + case ASHIFT: + if (CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) <= 3) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ASHIFTRT: + case LSHIFTRT: + *total = cost_data->int_shift; + return false; + + case IF_THEN_ELSE: + if (float_mode_p) + *total = cost_data->fp_add; + else + *total = cost_data->int_cmov; + return false; + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (!float_mode_p) + *total = cost_data->int_div; + else if (mode == SFmode) + *total = cost_data->fp_div_sf; + else + *total = cost_data->fp_div_df; + return false; + + case MEM: + *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); + return true; + + case NEG: + if (! float_mode_p) + { + *total = COSTS_N_INSNS (1); + return false; + } + /* FALLTHRU */ + + case ABS: + if (! float_mode_p) + { + *total = COSTS_N_INSNS (1) + cost_data->int_cmov; + return false; + } + /* FALLTHRU */ + + case FLOAT: + case UNSIGNED_FLOAT: + case FIX: + case UNSIGNED_FIX: + case FLOAT_TRUNCATE: + *total = cost_data->fp_add; + return false; + + case FLOAT_EXTEND: + if (MEM_P (XEXP (x, 0))) + *total = 0; + else + *total = cost_data->fp_add; + return false; + + default: + return false; + } +} + +/* REF is an alignable memory location. Place an aligned SImode + reference into *PALIGNED_MEM and the number of bits to shift into + *PBITNUM. SCRATCH is a free register for use in reloading out + of range stack slots. */ + +void +get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) +{ + rtx base; + HOST_WIDE_INT disp, offset; + + gcc_assert (MEM_P (ref)); + + if (reload_in_progress + && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0))) + { + base = find_replacement (&XEXP (ref, 0)); + gcc_assert (memory_address_p (GET_MODE (ref), base)); + } + else + base = XEXP (ref, 0); + + if (GET_CODE (base) == PLUS) + disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); + else + disp = 0; + + /* Find the byte offset within an aligned word. If the memory itself is + claimed to be aligned, believe it. Otherwise, aligned_memory_operand + will have examined the base register and determined it is aligned, and + thus displacements from it are naturally alignable. */ + if (MEM_ALIGN (ref) >= 32) + offset = 0; + else + offset = disp & 3; + + /* The location should not cross aligned word boundary. */ + gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) + <= GET_MODE_SIZE (SImode)); + + /* Access the entire aligned word. */ + *paligned_mem = widen_memory_access (ref, SImode, -offset); + + /* Convert the byte offset within the word to a bit offset. */ + offset *= BITS_PER_UNIT; + *pbitnum = GEN_INT (offset); +} + +/* Similar, but just get the address. Handle the two reload cases. + Add EXTRA_OFFSET to the address we return. */ + +rtx +get_unaligned_address (rtx ref) +{ + rtx base; + HOST_WIDE_INT offset = 0; + + gcc_assert (MEM_P (ref)); + + if (reload_in_progress + && ! memory_address_p (GET_MODE (ref), XEXP (ref, 0))) + { + base = find_replacement (&XEXP (ref, 0)); + + gcc_assert (memory_address_p (GET_MODE (ref), base)); + } + else + base = XEXP (ref, 0); + + if (GET_CODE (base) == PLUS) + offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); + + return plus_constant (Pmode, base, offset); +} + +/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. + X is always returned in a register. */ + +rtx +get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) +{ + if (GET_CODE (addr) == PLUS) + { + ofs += INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), + NULL_RTX, 1, OPTAB_LIB_WIDEN); +} + +/* On the Alpha, all (non-symbolic) constants except zero go into + a floating-point register via memory. Note that we cannot + return anything that is not a subset of RCLASS, and that some + symbolic constants cannot be dropped to memory. */ + +enum reg_class +alpha_preferred_reload_class(rtx x, enum reg_class rclass) +{ + /* Zero is present in any register class. */ + if (x == CONST0_RTX (GET_MODE (x))) + return rclass; + + /* These sorts of constants we can easily drop to memory. */ + if (CONST_INT_P (x) + || GET_CODE (x) == CONST_DOUBLE + || GET_CODE (x) == CONST_VECTOR) + { + if (rclass == FLOAT_REGS) + return NO_REGS; + if (rclass == ALL_REGS) + return GENERAL_REGS; + return rclass; + } + + /* All other kinds of constants should not (and in the case of HIGH + cannot) be dropped to memory -- instead we use a GENERAL_REGS + secondary reload. */ + if (CONSTANT_P (x)) + return (rclass == ALL_REGS ? GENERAL_REGS : rclass); + + return rclass; +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. */ + +static reg_class_t +alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + enum reg_class rclass = (enum reg_class) rclass_i; + + /* Loading and storing HImode or QImode values to and from memory + usually requires a scratch register. */ + if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) + { + if (any_memory_operand (x, mode)) + { + if (in_p) + { + if (!aligned_memory_operand (x, mode)) + sri->icode = direct_optab_handler (reload_in_optab, mode); + } + else + sri->icode = direct_optab_handler (reload_out_optab, mode); + return NO_REGS; + } + } + + /* We also cannot do integral arithmetic into FP regs, as might result + from register elimination into a DImode fp register. */ + if (rclass == FLOAT_REGS) + { + if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) + return GENERAL_REGS; + if (in_p && INTEGRAL_MODE_P (mode) + && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x)) + return GENERAL_REGS; + } + + return NO_REGS; +} + +/* Subfunction of the following function. Update the flags of any MEM + found in part of X. */ + +static int +alpha_set_memflags_1 (rtx *xp, void *data) +{ + rtx x = *xp, orig = (rtx) data; + + if (!MEM_P (x)) + return 0; + + MEM_VOLATILE_P (x) = MEM_VOLATILE_P (orig); + MEM_NOTRAP_P (x) = MEM_NOTRAP_P (orig); + MEM_READONLY_P (x) = MEM_READONLY_P (orig); + + /* Sadly, we cannot use alias sets because the extra aliasing + produced by the AND interferes. Given that two-byte quantities + are the only thing we would be able to differentiate anyway, + there does not seem to be any point in convoluting the early + out of the alias check. */ + + return -1; +} + +/* Given SEQ, which is an INSN list, look for any MEMs in either + a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and + volatile flags from REF into each of the MEMs found. If REF is not + a MEM, don't do anything. */ + +void +alpha_set_memflags (rtx seq, rtx ref) +{ + rtx insn; + + if (!MEM_P (ref)) + return; + + /* This is only called from alpha.md, after having had something + generated from one of the insn patterns. So if everything is + zero, the pattern is already up-to-date. */ + if (!MEM_VOLATILE_P (ref) + && !MEM_NOTRAP_P (ref) + && !MEM_READONLY_P (ref)) + return; + + for (insn = seq; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + for_each_rtx (&PATTERN (insn), alpha_set_memflags_1, (void *) ref); + else + gcc_unreachable (); +} + +static rtx alpha_emit_set_const (rtx, enum machine_mode, HOST_WIDE_INT, + int, bool); + +/* Internal routine for alpha_emit_set_const to check for N or below insns. + If NO_OUTPUT is true, then we only check to see if N insns are possible, + and return pc_rtx if successful. */ + +static rtx +alpha_emit_set_const_1 (rtx target, enum machine_mode mode, + HOST_WIDE_INT c, int n, bool no_output) +{ + HOST_WIDE_INT new_const; + int i, bits; + /* Use a pseudo if highly optimizing and still generating RTL. */ + rtx subtarget + = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); + rtx temp, insn; + + /* If this is a sign-extended 32-bit constant, we can do this in at most + three insns, so do it if we have enough insns left. We always have + a sign-extended 32-bit constant when compiling on a narrow machine. */ + + if (HOST_BITS_PER_WIDE_INT != 64 + || c >> 31 == -1 || c >> 31 == 0) + { + HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT tmp1 = c - low; + HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT extra = 0; + + /* If HIGH will be interpreted as negative but the constant is + positive, we must adjust it to do two ldha insns. */ + + if ((high & 0x8000) != 0 && c >= 0) + { + extra = 0x4000; + tmp1 -= 0x40000000; + high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); + } + + if (c == low || (low == 0 && extra == 0)) + { + /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) + but that meant that we can't handle INT_MIN on 32-bit machines + (like NT/Alpha), because we recurse indefinitely through + emit_move_insn to gen_movdi. So instead, since we know exactly + what we want, create it explicitly. */ + + if (no_output) + return pc_rtx; + if (target == NULL) + target = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (c))); + return target; + } + else if (n >= 2 + (extra != 0)) + { + if (no_output) + return pc_rtx; + if (!can_create_pseudo_p ()) + { + emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (high << 16))); + temp = target; + } + else + temp = copy_to_suggested_reg (GEN_INT (high << 16), + subtarget, mode); + + /* As of 2002-02-23, addsi3 is only available when not optimizing. + This means that if we go through expand_binop, we'll try to + generate extensions, etc, which will require new pseudos, which + will fail during some split phases. The SImode add patterns + still exist, but are not named. So build the insns by hand. */ + + if (extra != 0) + { + if (! subtarget) + subtarget = gen_reg_rtx (mode); + insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); + insn = gen_rtx_SET (VOIDmode, subtarget, insn); + emit_insn (insn); + temp = subtarget; + } + + if (target == NULL) + target = gen_reg_rtx (mode); + insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); + insn = gen_rtx_SET (VOIDmode, target, insn); + emit_insn (insn); + return target; + } + } + + /* If we couldn't do it that way, try some other methods. But if we have + no instructions left, don't bother. Likewise, if this is SImode and + we can't make pseudos, we can't do anything since the expand_binop + and expand_unop calls will widen and try to make pseudos. */ + + if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) + return 0; + + /* Next, see if we can load a related constant and then shift and possibly + negate it to get the constant we want. Try this once each increasing + numbers of insns. */ + + for (i = 1; i < n; i++) + { + /* First, see if minus some low bits, we've an easy load of + high bits. */ + + new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; + if (new_const != 0) + { + temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, add_optab, temp, GEN_INT (new_const), + target, 0, OPTAB_WIDEN); + } + } + + /* Next try complementing. */ + temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_unop (mode, one_cmpl_optab, temp, target, 0); + } + + /* Next try to form a constant and do a left shift. We can do this + if some low-order bits are zero; the exact_log2 call below tells + us that information. The bits we are shifting out could be any + value, but here we'll just try the 0- and sign-extended forms of + the constant. To try to increase the chance of having the same + constant in more than one insn, start at the highest number of + bits to shift, but try all possibilities in case a ZAPNOT will + be useful. */ + + bits = exact_log2 (c & -c); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c >> bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp && c < 0) + { + new_const = (unsigned HOST_WIDE_INT)c >> bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, + i, no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), + target, 0, OPTAB_WIDEN); + } + } + + /* Now try high-order zero bits. Here we try the shifted-in bits as + all zero and all ones. Be careful to avoid shifting outside the + mode and to avoid shifting outside the host wide int size. */ + /* On narrow hosts, don't shift a 1 into the high bit, since we'll + confuse the recursive call and set all of the high 32 bits. */ + + bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) + - floor_log2 (c) - 1 - (HOST_BITS_PER_WIDE_INT < 64)); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c << bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp) + { + new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); + temp = alpha_emit_set_const (subtarget, mode, new_const, + i, no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), + target, 1, OPTAB_WIDEN); + } + } + + /* Now try high-order 1 bits. We get that with a sign-extension. + But one bit isn't enough here. Be careful to avoid shifting outside + the mode and to avoid shifting outside the host wide int size. */ + + bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) + - floor_log2 (~ c) - 2); + if (bits > 0) + for (; bits > 0; bits--) + { + new_const = c << bits; + temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); + if (!temp) + { + new_const = (c << bits) | (((HOST_WIDE_INT) 1 << bits) - 1); + temp = alpha_emit_set_const (subtarget, mode, new_const, + i, no_output); + } + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), + target, 0, OPTAB_WIDEN); + } + } + } + +#if HOST_BITS_PER_WIDE_INT == 64 + /* Finally, see if can load a value into the target that is the same as the + constant except that all bytes that are 0 are changed to be 0xff. If we + can, then we can do a ZAPNOT to obtain the desired constant. */ + + new_const = c; + for (i = 0; i < 64; i += 8) + if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) + new_const |= (HOST_WIDE_INT) 0xff << i; + + /* We are only called for SImode and DImode. If this is SImode, ensure that + we are sign extended to a full word. */ + + if (mode == SImode) + new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; + + if (new_const != c) + { + temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output); + if (temp) + { + if (no_output) + return temp; + return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const), + target, 0, OPTAB_WIDEN); + } + } +#endif + + return 0; +} + +/* Try to output insns to set TARGET equal to the constant C if it can be + done in less than N insns. Do all computations in MODE. Returns the place + where the output has been placed if it can be done and the insns have been + emitted. If it would take more than N insns, zero is returned and no + insns and emitted. */ + +static rtx +alpha_emit_set_const (rtx target, enum machine_mode mode, + HOST_WIDE_INT c, int n, bool no_output) +{ + enum machine_mode orig_mode = mode; + rtx orig_target = target; + rtx result = 0; + int i; + + /* If we can't make any pseudos, TARGET is an SImode hard register, we + can't load this constant in one insn, do this in DImode. */ + if (!can_create_pseudo_p () && mode == SImode + && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER) + { + result = alpha_emit_set_const_1 (target, mode, c, 1, no_output); + if (result) + return result; + + target = no_output ? NULL : gen_lowpart (DImode, target); + mode = DImode; + } + else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) + { + target = no_output ? NULL : gen_lowpart (DImode, target); + mode = DImode; + } + + /* Try 1 insn, then 2, then up to N. */ + for (i = 1; i <= n; i++) + { + result = alpha_emit_set_const_1 (target, mode, c, i, no_output); + if (result) + { + rtx insn, set; + + if (no_output) + return result; + + insn = get_last_insn (); + set = single_set (insn); + if (! CONSTANT_P (SET_SRC (set))) + set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); + break; + } + } + + /* Allow for the case where we changed the mode of TARGET. */ + if (result) + { + if (result == target) + result = orig_target; + else if (mode != orig_mode) + result = gen_lowpart (orig_mode, result); + } + + return result; +} + +/* Having failed to find a 3 insn sequence in alpha_emit_set_const, + fall back to a straight forward decomposition. We do this to avoid + exponential run times encountered when looking for longer sequences + with alpha_emit_set_const. */ + +static rtx +alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1, HOST_WIDE_INT c2) +{ + HOST_WIDE_INT d1, d2, d3, d4; + + /* Decompose the entire word */ +#if HOST_BITS_PER_WIDE_INT >= 64 + gcc_assert (c2 == -(c1 < 0)); + d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d1; + d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + c1 = (c1 - d2) >> 32; + d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d3; + d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c1 == d4); +#else + d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; + c1 -= d1; + d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c1 == d2); + c2 += (d2 < 0); + d3 = ((c2 & 0xffff) ^ 0x8000) - 0x8000; + c2 -= d3; + d4 = ((c2 & 0xffffffff) ^ 0x80000000) - 0x80000000; + gcc_assert (c2 == d4); +#endif + + /* Construct the high word */ + if (d4) + { + emit_move_insn (target, GEN_INT (d4)); + if (d3) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); + } + else + emit_move_insn (target, GEN_INT (d3)); + + /* Shift it into place */ + emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); + + /* Add in the low bits. */ + if (d2) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); + if (d1) + emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); + + return target; +} + +/* Given an integral CONST_INT, CONST_DOUBLE, or CONST_VECTOR, return + the low 64 bits. */ + +static void +alpha_extract_integer (rtx x, HOST_WIDE_INT *p0, HOST_WIDE_INT *p1) +{ + HOST_WIDE_INT i0, i1; + + if (GET_CODE (x) == CONST_VECTOR) + x = simplify_subreg (DImode, x, GET_MODE (x), 0); + + + if (CONST_INT_P (x)) + { + i0 = INTVAL (x); + i1 = -(i0 < 0); + } + else if (HOST_BITS_PER_WIDE_INT >= 64) + { + i0 = CONST_DOUBLE_LOW (x); + i1 = -(i0 < 0); + } + else + { + i0 = CONST_DOUBLE_LOW (x); + i1 = CONST_DOUBLE_HIGH (x); + } + + *p0 = i0; + *p1 = i1; +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which + we are willing to load the value into a register via a move pattern. + Normally this is all symbolic constants, integral constants that + take three or fewer instructions, and floating-point zero. */ + +bool +alpha_legitimate_constant_p (enum machine_mode mode, rtx x) +{ + HOST_WIDE_INT i0, i1; + + switch (GET_CODE (x)) + { + case LABEL_REF: + case HIGH: + return true; + + case CONST: + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) + x = XEXP (XEXP (x, 0), 0); + else + return true; + + if (GET_CODE (x) != SYMBOL_REF) + return true; + + /* FALLTHRU */ + + case SYMBOL_REF: + /* TLS symbols are never valid. */ + return SYMBOL_REF_TLS_MODEL (x) == 0; + + case CONST_DOUBLE: + if (x == CONST0_RTX (mode)) + return true; + if (FLOAT_MODE_P (mode)) + return false; + goto do_integer; + + case CONST_VECTOR: + if (x == CONST0_RTX (mode)) + return true; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return false; + if (GET_MODE_SIZE (mode) != 8) + return false; + goto do_integer; + + case CONST_INT: + do_integer: + if (TARGET_BUILD_CONSTANTS) + return true; + alpha_extract_integer (x, &i0, &i1); + if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == (-i0 < 0)) + return alpha_emit_set_const_1 (x, mode, i0, 3, true) != NULL; + return false; + + default: + return false; + } +} + +/* Operand 1 is known to be a constant, and should require more than one + instruction to load. Emit that multi-part load. */ + +bool +alpha_split_const_mov (enum machine_mode mode, rtx *operands) +{ + HOST_WIDE_INT i0, i1; + rtx temp = NULL_RTX; + + alpha_extract_integer (operands[1], &i0, &i1); + + if (HOST_BITS_PER_WIDE_INT >= 64 || i1 == -(i0 < 0)) + temp = alpha_emit_set_const (operands[0], mode, i0, 3, false); + + if (!temp && TARGET_BUILD_CONSTANTS) + temp = alpha_emit_set_long_const (operands[0], i0, i1); + + if (temp) + { + if (!rtx_equal_p (operands[0], temp)) + emit_move_insn (operands[0], temp); + return true; + } + + return false; +} + +/* Expand a move instruction; return true if all work is done. + We don't handle non-bwx subword loads here. */ + +bool +alpha_expand_mov (enum machine_mode mode, rtx *operands) +{ + rtx tmp; + + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + + /* Allow legitimize_address to perform some simplifications. */ + if (mode == Pmode && symbolic_operand (operands[1], mode)) + { + tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode); + if (tmp) + { + if (tmp == operands[0]) + return true; + operands[1] = tmp; + return false; + } + } + + /* Early out for non-constants and valid constants. */ + if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) + return false; + + /* Split large integers. */ + if (CONST_INT_P (operands[1]) + || GET_CODE (operands[1]) == CONST_DOUBLE + || GET_CODE (operands[1]) == CONST_VECTOR) + { + if (alpha_split_const_mov (mode, operands)) + return true; + } + + /* Otherwise we've nothing left but to drop the thing to memory. */ + tmp = force_const_mem (mode, operands[1]); + + if (tmp == NULL_RTX) + return false; + + if (reload_in_progress) + { + emit_move_insn (operands[0], XEXP (tmp, 0)); + operands[1] = replace_equiv_address (tmp, operands[0]); + } + else + operands[1] = validize_mem (tmp); + return false; +} + +/* Expand a non-bwx QImode or HImode move instruction; + return true if all work is done. */ + +bool +alpha_expand_mov_nobwx (enum machine_mode mode, rtx *operands) +{ + rtx seq; + + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + + /* Handle four memory cases, unaligned and aligned for either the input + or the output. The only case where we can be called during reload is + for aligned loads; all other cases require temporaries. */ + + if (any_memory_operand (operands[1], mode)) + { + if (aligned_memory_operand (operands[1], mode)) + { + if (reload_in_progress) + { + if (mode == QImode) + seq = gen_reload_inqi_aligned (operands[0], operands[1]); + else + seq = gen_reload_inhi_aligned (operands[0], operands[1]); + emit_insn (seq); + } + else + { + rtx aligned_mem, bitnum; + rtx scratch = gen_reg_rtx (SImode); + rtx subtarget; + bool copyout; + + get_aligned_mem (operands[1], &aligned_mem, &bitnum); + + subtarget = operands[0]; + if (REG_P (subtarget)) + subtarget = gen_lowpart (DImode, subtarget), copyout = false; + else + subtarget = gen_reg_rtx (DImode), copyout = true; + + if (mode == QImode) + seq = gen_aligned_loadqi (subtarget, aligned_mem, + bitnum, scratch); + else + seq = gen_aligned_loadhi (subtarget, aligned_mem, + bitnum, scratch); + emit_insn (seq); + + if (copyout) + emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); + } + } + else + { + /* Don't pass these as parameters since that makes the generated + code depend on parameter evaluation order which will cause + bootstrap failures. */ + + rtx temp1, temp2, subtarget, ua; + bool copyout; + + temp1 = gen_reg_rtx (DImode); + temp2 = gen_reg_rtx (DImode); + + subtarget = operands[0]; + if (REG_P (subtarget)) + subtarget = gen_lowpart (DImode, subtarget), copyout = false; + else + subtarget = gen_reg_rtx (DImode), copyout = true; + + ua = get_unaligned_address (operands[1]); + if (mode == QImode) + seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); + else + seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); + + alpha_set_memflags (seq, operands[1]); + emit_insn (seq); + + if (copyout) + emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); + } + return true; + } + + if (any_memory_operand (operands[0], mode)) + { + if (aligned_memory_operand (operands[0], mode)) + { + rtx aligned_mem, bitnum; + rtx temp1 = gen_reg_rtx (SImode); + rtx temp2 = gen_reg_rtx (SImode); + + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + + emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, + temp1, temp2)); + } + else + { + rtx temp1 = gen_reg_rtx (DImode); + rtx temp2 = gen_reg_rtx (DImode); + rtx temp3 = gen_reg_rtx (DImode); + rtx ua = get_unaligned_address (operands[0]); + + if (mode == QImode) + seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3); + else + seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3); + + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + } + return true; + } + + return false; +} + +/* Implement the movmisalign patterns. One of the operands is a memory + that is not naturally aligned. Emit instructions to load it. */ + +void +alpha_expand_movmisalign (enum machine_mode mode, rtx *operands) +{ + /* Honor misaligned loads, for those we promised to do so. */ + if (MEM_P (operands[1])) + { + rtx tmp; + + if (register_operand (operands[0], mode)) + tmp = operands[0]; + else + tmp = gen_reg_rtx (mode); + + alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0); + if (tmp != operands[0]) + emit_move_insn (operands[0], tmp); + } + else if (MEM_P (operands[0])) + { + if (!reg_or_0_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); + } + else + gcc_unreachable (); +} + +/* Generate an unsigned DImode to FP conversion. This is the same code + optabs would emit if we didn't have TFmode patterns. + + For SFmode, this is the only construction I've found that can pass + gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode + intermediates will work, because you'll get intermediate rounding + that ruins the end result. Some of this could be fixed by turning + on round-to-positive-infinity, but that requires diddling the fpsr, + which kills performance. I tried turning this around and converting + to a negative number, so that I could turn on /m, but either I did + it wrong or there's something else cause I wound up with the exact + same single-bit error. There is a branch-less form of this same code: + + srl $16,1,$1 + and $16,1,$2 + cmplt $16,0,$3 + or $1,$2,$2 + cmovge $16,$16,$2 + itoft $3,$f10 + itoft $2,$f11 + cvtqs $f11,$f11 + adds $f11,$f11,$f0 + fcmoveq $f10,$f11,$f0 + + I'm not using it because it's the same number of instructions as + this branch-full form, and it has more serialized long latency + instructions on the critical path. + + For DFmode, we can avoid rounding errors by breaking up the word + into two pieces, converting them separately, and adding them back: + + LC0: .long 0,0x5f800000 + + itoft $16,$f11 + lda $2,LC0 + cmplt $16,0,$1 + cpyse $f11,$f31,$f10 + cpyse $f31,$f11,$f11 + s4addq $1,$2,$1 + lds $f12,0($1) + cvtqt $f10,$f10 + cvtqt $f11,$f11 + addt $f12,$f10,$f0 + addt $f0,$f11,$f0 + + This doesn't seem to be a clear-cut win over the optabs form. + It probably all depends on the distribution of numbers being + converted -- in the optabs form, all but high-bit-set has a + much lower minimum execution time. */ + +void +alpha_emit_floatuns (rtx operands[2]) +{ + rtx neglab, donelab, i0, i1, f0, in, out; + enum machine_mode mode; + + out = operands[0]; + in = force_reg (DImode, operands[1]); + mode = GET_MODE (out); + neglab = gen_label_rtx (); + donelab = gen_label_rtx (); + i0 = gen_reg_rtx (DImode); + i1 = gen_reg_rtx (DImode); + f0 = gen_reg_rtx (mode); + + emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); + + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); + emit_jump_insn (gen_jump (donelab)); + emit_barrier (); + + emit_label (neglab); + + emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); + emit_insn (gen_anddi3 (i1, in, const1_rtx)); + emit_insn (gen_iordi3 (i0, i0, i1)); + emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); + emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); + + emit_label (donelab); +} + +/* Generate the comparison for a conditional branch. */ + +void +alpha_emit_conditional_branch (rtx operands[], enum machine_mode cmp_mode) +{ + enum rtx_code cmp_code, branch_code; + enum machine_mode branch_mode = VOIDmode; + enum rtx_code code = GET_CODE (operands[0]); + rtx op0 = operands[1], op1 = operands[2]; + rtx tem; + + if (cmp_mode == TFmode) + { + op0 = alpha_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + /* The general case: fold the comparison code to the types of compares + that we have, choosing the branch as necessary. */ + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares. */ + cmp_code = code, branch_code = NE; + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + cmp_code = reverse_condition (code), branch_code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* For FP, we swap them, for INT, we reverse them. */ + if (cmp_mode == DFmode) + { + cmp_code = swap_condition (code); + branch_code = NE; + tem = op0, op0 = op1, op1 = tem; + } + else + { + cmp_code = reverse_condition (code); + branch_code = EQ; + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DFmode) + { + if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) + { + /* When we are not as concerned about non-finite values, and we + are comparing against zero, we can branch directly. */ + if (op1 == CONST0_RTX (DFmode)) + cmp_code = UNKNOWN, branch_code = code; + else if (op0 == CONST0_RTX (DFmode)) + { + /* Undo the swap we probably did just above. */ + tem = op0, op0 = op1, op1 = tem; + branch_code = swap_condition (cmp_code); + cmp_code = UNKNOWN; + } + } + else + { + /* ??? We mark the branch mode to be CCmode to prevent the + compare and branch from being combined, since the compare + insn follows IEEE rules that the branch does not. */ + branch_mode = CCmode; + } + } + else + { + /* The following optimizations are only for signed compares. */ + if (code != LEU && code != LTU && code != GEU && code != GTU) + { + /* Whee. Compare and branch against 0 directly. */ + if (op1 == const0_rtx) + cmp_code = UNKNOWN, branch_code = code; + + /* If the constants doesn't fit into an immediate, but can + be generated by lda/ldah, we adjust the argument and + compare against zero, so we can use beq/bne directly. */ + /* ??? Don't do this when comparing against symbols, otherwise + we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will + be declared false out of hand (at least for non-weak). */ + else if (CONST_INT_P (op1) + && (code == EQ || code == NE) + && !(symbolic_operand (op0, VOIDmode) + || (REG_P (op0) && REG_POINTER (op0)))) + { + rtx n_op1 = GEN_INT (-INTVAL (op1)); + + if (! satisfies_constraint_I (op1) + && (satisfies_constraint_K (n_op1) + || satisfies_constraint_L (n_op1))) + cmp_code = PLUS, branch_code = code, op1 = n_op1; + } + } + + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* Emit an initial compare instruction, if necessary. */ + tem = op0; + if (cmp_code != UNKNOWN) + { + tem = gen_reg_rtx (cmp_mode); + emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); + } + + /* Emit the branch instruction. */ + tem = gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, + gen_rtx_fmt_ee (branch_code, + branch_mode, tem, + CONST0_RTX (cmp_mode)), + gen_rtx_LABEL_REF (VOIDmode, + operands[3]), + pc_rtx)); + emit_jump_insn (tem); +} + +/* Certain simplifications can be done to make invalid setcc operations + valid. Return the final comparison, or NULL if we can't work. */ + +bool +alpha_emit_setcc (rtx operands[], enum machine_mode cmp_mode) +{ + enum rtx_code cmp_code; + enum rtx_code code = GET_CODE (operands[1]); + rtx op0 = operands[2], op1 = operands[3]; + rtx tmp; + + if (cmp_mode == TFmode) + { + op0 = alpha_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + if (cmp_mode == DFmode && !TARGET_FIX) + return 0; + + /* The general case: fold the comparison code to the types of compares + that we have, choosing the branch as necessary. */ + + cmp_code = UNKNOWN; + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares. */ + if (cmp_mode == DFmode) + cmp_code = code, code = NE; + break; + + case NE: + if (cmp_mode == DImode && op1 == const0_rtx) + break; + /* FALLTHRU */ + + case ORDERED: + cmp_code = reverse_condition (code); + code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + if (cmp_mode == DFmode) + cmp_code = code, code = NE; + tmp = op0, op0 = op1, op1 = tmp; + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!register_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* Emit an initial compare instruction, if necessary. */ + if (cmp_code != UNKNOWN) + { + tmp = gen_reg_rtx (cmp_mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1))); + + op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; + op1 = const0_rtx; + } + + /* Emit the setcc instruction. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (code, DImode, op0, op1))); + return true; +} + + +/* Rewrite a comparison against zero CMP of the form + (CODE (cc0) (const_int 0)) so it can be written validly in + a conditional move (if_then_else CMP ...). + If both of the operands that set cc0 are nonzero we must emit + an insn to perform the compare (it can't be done within + the conditional move). */ + +rtx +alpha_emit_conditional_move (rtx cmp, enum machine_mode mode) +{ + enum rtx_code code = GET_CODE (cmp); + enum rtx_code cmov_code = NE; + rtx op0 = XEXP (cmp, 0); + rtx op1 = XEXP (cmp, 1); + enum machine_mode cmp_mode + = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); + enum machine_mode cmov_mode = VOIDmode; + int local_fast_math = flag_unsafe_math_optimizations; + rtx tem; + + if (cmp_mode == TFmode) + { + op0 = alpha_emit_xfloating_compare (&code, op0, op1); + op1 = const0_rtx; + cmp_mode = DImode; + } + + gcc_assert (cmp_mode == DFmode || cmp_mode == DImode); + + if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) + { + enum rtx_code cmp_code; + + if (! TARGET_FIX) + return 0; + + /* If we have fp<->int register move instructions, do a cmov by + performing the comparison in fp registers, and move the + zero/nonzero value to integer registers, where we can then + use a normal cmov, or vice-versa. */ + + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares. */ + cmp_code = code, code = NE; + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + cmp_code = reverse_condition (code), code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + cmp_code = code, code = NE; + else + { + cmp_code = swap_condition (code); + code = NE; + tem = op0, op0 = op1, op1 = tem; + } + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + tem = gen_reg_rtx (cmp_mode); + emit_insn (gen_rtx_SET (VOIDmode, tem, + gen_rtx_fmt_ee (cmp_code, cmp_mode, + op0, op1))); + + cmp_mode = cmp_mode == DImode ? DFmode : DImode; + op0 = gen_lowpart (cmp_mode, tem); + op1 = CONST0_RTX (cmp_mode); + cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + local_fast_math = 1; + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* We may be able to use a conditional move directly. + This avoids emitting spurious compares. */ + if (signed_comparison_operator (cmp, VOIDmode) + && (cmp_mode == DImode || local_fast_math) + && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) + return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); + + /* We can't put the comparison inside the conditional move; + emit a compare instruction and put that inside the + conditional move. Make sure we emit only comparisons we have; + swap or reverse as necessary. */ + + if (!can_create_pseudo_p ()) + return NULL_RTX; + + switch (code) + { + case EQ: case LE: case LT: case LEU: case LTU: + case UNORDERED: + /* We have these compares: */ + break; + + case NE: + case ORDERED: + /* These must be reversed. */ + code = reverse_condition (code); + cmov_code = EQ; + break; + + case GE: case GT: case GEU: case GTU: + /* These normally need swapping, but for integer zero we have + special patterns that recognize swapped operands. */ + if (cmp_mode == DImode && op1 == const0_rtx) + break; + code = swap_condition (code); + tem = op0, op0 = op1, op1 = tem; + break; + + default: + gcc_unreachable (); + } + + if (cmp_mode == DImode) + { + if (!reg_or_0_operand (op0, DImode)) + op0 = force_reg (DImode, op0); + if (!reg_or_8bit_operand (op1, DImode)) + op1 = force_reg (DImode, op1); + } + + /* ??? We mark the branch mode to be CCmode to prevent the compare + and cmov from being combined, since the compare insn follows IEEE + rules that the cmov does not. */ + if (cmp_mode == DFmode && !local_fast_math) + cmov_mode = CCmode; + + tem = gen_reg_rtx (cmp_mode); + emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); + return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); +} + +/* Simplify a conditional move of two constants into a setcc with + arithmetic. This is done with a splitter since combine would + just undo the work if done during code generation. It also catches + cases we wouldn't have before cse. */ + +int +alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, + rtx t_rtx, rtx f_rtx) +{ + HOST_WIDE_INT t, f, diff; + enum machine_mode mode; + rtx target, subtarget, tmp; + + mode = GET_MODE (dest); + t = INTVAL (t_rtx); + f = INTVAL (f_rtx); + diff = t - f; + + if (((code == NE || code == EQ) && diff < 0) + || (code == GE || code == GT)) + { + code = reverse_condition (code); + diff = t, t = f, f = diff; + diff = t - f; + } + + subtarget = target = dest; + if (mode != DImode) + { + target = gen_lowpart (DImode, dest); + if (can_create_pseudo_p ()) + subtarget = gen_reg_rtx (DImode); + else + subtarget = target; + } + /* Below, we must be careful to use copy_rtx on target and subtarget + in intermediate insns, as they may be a subreg rtx, which may not + be shared. */ + + if (f == 0 && exact_log2 (diff) > 0 + /* On EV6, we've got enough shifters to make non-arithmetic shifts + viable over a longer latency cmove. On EV5, the E0 slot is a + scarce resource, and on EV4 shift has the same latency as a cmove. */ + && (diff <= 8 || alpha_tune == PROCESSOR_EV6)) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); + + tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), + GEN_INT (exact_log2 (t))); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else if (f == 0 && t == -1) + { + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); + + emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); + } + else if (diff == 1 || diff == 4 || diff == 8) + { + rtx add_op; + + tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (subtarget), tmp)); + + if (diff == 1) + emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); + else + { + add_op = GEN_INT (f); + if (sext_add_operand (add_op, mode)) + { + tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), + GEN_INT (diff)); + tmp = gen_rtx_PLUS (DImode, tmp, add_op); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + } + else + return 0; + } + } + else + return 0; + + return 1; +} + +/* Look up the function X_floating library function name for the + given operation. */ + +struct GTY(()) xfloating_op +{ + const enum rtx_code code; + const char *const GTY((skip)) osf_func; + const char *const GTY((skip)) vms_func; + rtx libcall; +}; + +static GTY(()) struct xfloating_op xfloating_ops[] = +{ + { PLUS, "_OtsAddX", "OTS$ADD_X", 0 }, + { MINUS, "_OtsSubX", "OTS$SUB_X", 0 }, + { MULT, "_OtsMulX", "OTS$MUL_X", 0 }, + { DIV, "_OtsDivX", "OTS$DIV_X", 0 }, + { EQ, "_OtsEqlX", "OTS$EQL_X", 0 }, + { NE, "_OtsNeqX", "OTS$NEQ_X", 0 }, + { LT, "_OtsLssX", "OTS$LSS_X", 0 }, + { LE, "_OtsLeqX", "OTS$LEQ_X", 0 }, + { GT, "_OtsGtrX", "OTS$GTR_X", 0 }, + { GE, "_OtsGeqX", "OTS$GEQ_X", 0 }, + { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 }, + { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 }, + { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 }, + { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 }, + { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 } +}; + +static GTY(()) struct xfloating_op vax_cvt_ops[] = +{ + { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 }, + { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 } +}; + +static rtx +alpha_lookup_xfloating_lib_func (enum rtx_code code) +{ + struct xfloating_op *ops = xfloating_ops; + long n = ARRAY_SIZE (xfloating_ops); + long i; + + gcc_assert (TARGET_HAS_XFLOATING_LIBS); + + /* How irritating. Nothing to key off for the main table. */ + if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) + { + ops = vax_cvt_ops; + n = ARRAY_SIZE (vax_cvt_ops); + } + + for (i = 0; i < n; ++i, ++ops) + if (ops->code == code) + { + rtx func = ops->libcall; + if (!func) + { + func = init_one_libfunc (TARGET_ABI_OPEN_VMS + ? ops->vms_func : ops->osf_func); + ops->libcall = func; + } + return func; + } + + gcc_unreachable (); +} + +/* Most X_floating operations take the rounding mode as an argument. + Compute that here. */ + +static int +alpha_compute_xfloating_mode_arg (enum rtx_code code, + enum alpha_fp_rounding_mode round) +{ + int mode; + + switch (round) + { + case ALPHA_FPRM_NORM: + mode = 2; + break; + case ALPHA_FPRM_MINF: + mode = 1; + break; + case ALPHA_FPRM_CHOP: + mode = 0; + break; + case ALPHA_FPRM_DYN: + mode = 4; + break; + default: + gcc_unreachable (); + + /* XXX For reference, round to +inf is mode = 3. */ + } + + if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N) + mode |= 0x10000; + + return mode; +} + +/* Emit an X_floating library function call. + + Note that these functions do not follow normal calling conventions: + TFmode arguments are passed in two integer registers (as opposed to + indirect); TFmode return values appear in R16+R17. + + FUNC is the function to call. + TARGET is where the output belongs. + OPERANDS are the inputs. + NOPERANDS is the count of inputs. + EQUIV is the expression equivalent for the function. +*/ + +static void +alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], + int noperands, rtx equiv) +{ + rtx usage = NULL_RTX, tmp, reg; + int regno = 16, i; + + start_sequence (); + + for (i = 0; i < noperands; ++i) + { + switch (GET_MODE (operands[i])) + { + case TFmode: + reg = gen_rtx_REG (TFmode, regno); + regno += 2; + break; + + case DFmode: + reg = gen_rtx_REG (DFmode, regno + 32); + regno += 1; + break; + + case VOIDmode: + gcc_assert (CONST_INT_P (operands[i])); + /* FALLTHRU */ + case DImode: + reg = gen_rtx_REG (DImode, regno); + regno += 1; + break; + + default: + gcc_unreachable (); + } + + emit_move_insn (reg, operands[i]); + use_reg (&usage, reg); + } + + switch (GET_MODE (target)) + { + case TFmode: + reg = gen_rtx_REG (TFmode, 16); + break; + case DFmode: + reg = gen_rtx_REG (DFmode, 32); + break; + case DImode: + reg = gen_rtx_REG (DImode, 0); + break; + default: + gcc_unreachable (); + } + + tmp = gen_rtx_MEM (QImode, func); + tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx, + const0_rtx, const0_rtx)); + CALL_INSN_FUNCTION_USAGE (tmp) = usage; + RTL_CONST_CALL_P (tmp) = 1; + + tmp = get_insns (); + end_sequence (); + + emit_libcall_block (tmp, target, reg, equiv); +} + +/* Emit an X_floating library function call for arithmetic (+,-,*,/). */ + +void +alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[]) +{ + rtx func; + int mode; + rtx out_operands[3]; + + func = alpha_lookup_xfloating_lib_func (code); + mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); + + out_operands[0] = operands[1]; + out_operands[1] = operands[2]; + out_operands[2] = GEN_INT (mode); + alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3, + gen_rtx_fmt_ee (code, TFmode, operands[1], + operands[2])); +} + +/* Emit an X_floating library function call for a comparison. */ + +static rtx +alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1) +{ + enum rtx_code cmp_code, res_code; + rtx func, out, operands[2], note; + + /* X_floating library comparison functions return + -1 unordered + 0 false + 1 true + Convert the compare against the raw return value. */ + + cmp_code = *pcode; + switch (cmp_code) + { + case UNORDERED: + cmp_code = EQ; + res_code = LT; + break; + case ORDERED: + cmp_code = EQ; + res_code = GE; + break; + case NE: + res_code = NE; + break; + case EQ: + case LT: + case GT: + case LE: + case GE: + res_code = GT; + break; + default: + gcc_unreachable (); + } + *pcode = res_code; + + func = alpha_lookup_xfloating_lib_func (cmp_code); + + operands[0] = op0; + operands[1] = op1; + out = gen_reg_rtx (DImode); + + /* What's actually returned is -1,0,1, not a proper boolean value. */ + note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1); + note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE); + alpha_emit_xfloating_libcall (func, out, operands, 2, note); + + return out; +} + +/* Emit an X_floating library function call for a conversion. */ + +void +alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[]) +{ + int noperands = 1, mode; + rtx out_operands[2]; + rtx func; + enum rtx_code code = orig_code; + + if (code == UNSIGNED_FIX) + code = FIX; + + func = alpha_lookup_xfloating_lib_func (code); + + out_operands[0] = operands[1]; + + switch (code) + { + case FIX: + mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP); + out_operands[1] = GEN_INT (mode); + noperands = 2; + break; + case FLOAT_TRUNCATE: + mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm); + out_operands[1] = GEN_INT (mode); + noperands = 2; + break; + default: + break; + } + + alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands, + gen_rtx_fmt_e (orig_code, + GET_MODE (operands[0]), + operands[1])); +} + +/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of + DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true, + guarantee that the sequence + set (OP[0] OP[2]) + set (OP[1] OP[3]) + is valid. Naturally, output operand ordering is little-endian. + This is used by *movtf_internal and *movti_internal. */ + +void +alpha_split_tmode_pair (rtx operands[4], enum machine_mode mode, + bool fixup_overlap) +{ + switch (GET_CODE (operands[1])) + { + case REG: + operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[1])); + break; + + case MEM: + operands[3] = adjust_address (operands[1], DImode, 8); + operands[2] = adjust_address (operands[1], DImode, 0); + break; + + case CONST_INT: + case CONST_DOUBLE: + gcc_assert (operands[1] == CONST0_RTX (mode)); + operands[2] = operands[3] = const0_rtx; + break; + + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[0])) + { + case REG: + operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1); + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + break; + + case MEM: + operands[1] = adjust_address (operands[0], DImode, 8); + operands[0] = adjust_address (operands[0], DImode, 0); + break; + + default: + gcc_unreachable (); + } + + if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3])) + { + rtx tmp; + tmp = operands[0], operands[0] = operands[1], operands[1] = tmp; + tmp = operands[2], operands[2] = operands[3], operands[3] = tmp; + } +} + +/* Implement negtf2 or abstf2. Op0 is destination, op1 is source, + op2 is a register containing the sign bit, operation is the + logical operation to be performed. */ + +void +alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx)) +{ + rtx high_bit = operands[2]; + rtx scratch; + int move; + + alpha_split_tmode_pair (operands, TFmode, false); + + /* Detect three flavors of operand overlap. */ + move = 1; + if (rtx_equal_p (operands[0], operands[2])) + move = 0; + else if (rtx_equal_p (operands[1], operands[2])) + { + if (rtx_equal_p (operands[0], high_bit)) + move = 2; + else + move = -1; + } + + if (move < 0) + emit_move_insn (operands[0], operands[2]); + + /* ??? If the destination overlaps both source tf and high_bit, then + assume source tf is dead in its entirety and use the other half + for a scratch register. Otherwise "scratch" is just the proper + destination register. */ + scratch = operands[move < 2 ? 1 : 3]; + + emit_insn ((*operation) (scratch, high_bit, operands[3])); + + if (move > 0) + { + emit_move_insn (operands[0], operands[2]); + if (move > 1) + emit_move_insn (operands[1], scratch); + } +} + +/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting + unaligned data: + + unsigned: signed: + word: ldq_u r1,X(r11) ldq_u r1,X(r11) + ldq_u r2,X+1(r11) ldq_u r2,X+1(r11) + lda r3,X(r11) lda r3,X+2(r11) + extwl r1,r3,r1 extql r1,r3,r1 + extwh r2,r3,r2 extqh r2,r3,r2 + or r1.r2.r1 or r1,r2,r1 + sra r1,48,r1 + + long: ldq_u r1,X(r11) ldq_u r1,X(r11) + ldq_u r2,X+3(r11) ldq_u r2,X+3(r11) + lda r3,X(r11) lda r3,X(r11) + extll r1,r3,r1 extll r1,r3,r1 + extlh r2,r3,r2 extlh r2,r3,r2 + or r1.r2.r1 addl r1,r2,r1 + + quad: ldq_u r1,X(r11) + ldq_u r2,X+7(r11) + lda r3,X(r11) + extql r1,r3,r1 + extqh r2,r3,r2 + or r1.r2.r1 +*/ + +void +alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size, + HOST_WIDE_INT ofs, int sign) +{ + rtx meml, memh, addr, extl, exth, tmp, mema; + enum machine_mode mode; + + if (TARGET_BWX && size == 2) + { + meml = adjust_address (mem, QImode, ofs); + memh = adjust_address (mem, QImode, ofs+1); + extl = gen_reg_rtx (DImode); + exth = gen_reg_rtx (DImode); + emit_insn (gen_zero_extendqidi2 (extl, meml)); + emit_insn (gen_zero_extendqidi2 (exth, memh)); + exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), + NULL, 1, OPTAB_LIB_WIDEN); + addr = expand_simple_binop (DImode, IOR, extl, exth, + NULL, 1, OPTAB_LIB_WIDEN); + + if (sign && GET_MODE (tgt) != HImode) + { + addr = gen_lowpart (HImode, addr); + emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0)); + } + else + { + if (GET_MODE (tgt) != DImode) + addr = gen_lowpart (GET_MODE (tgt), addr); + emit_move_insn (tgt, addr); + } + return; + } + + meml = gen_reg_rtx (DImode); + memh = gen_reg_rtx (DImode); + addr = gen_reg_rtx (DImode); + extl = gen_reg_rtx (DImode); + exth = gen_reg_rtx (DImode); + + mema = XEXP (mem, 0); + if (GET_CODE (mema) == LO_SUM) + mema = force_reg (Pmode, mema); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + + tmp = change_address (mem, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, mema, ofs), + GEN_INT (-8))); + set_mem_alias_set (tmp, 0); + emit_move_insn (meml, tmp); + + tmp = change_address (mem, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, mema, + ofs + size - 1), + GEN_INT (-8))); + set_mem_alias_set (tmp, 0); + emit_move_insn (memh, tmp); + + if (sign && size == 2) + { + emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2)); + + emit_insn (gen_extql (extl, meml, addr)); + emit_insn (gen_extqh (exth, memh, addr)); + + /* We must use tgt here for the target. Alpha-vms port fails if we use + addr for the target, because addr is marked as a pointer and combine + knows that pointers are always sign-extended 32-bit values. */ + addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN); + addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48), + addr, 1, OPTAB_WIDEN); + } + else + { + emit_move_insn (addr, plus_constant (Pmode, mema, ofs)); + emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr)); + switch ((int) size) + { + case 2: + emit_insn (gen_extwh (exth, memh, addr)); + mode = HImode; + break; + case 4: + emit_insn (gen_extlh (exth, memh, addr)); + mode = SImode; + break; + case 8: + emit_insn (gen_extqh (exth, memh, addr)); + mode = DImode; + break; + default: + gcc_unreachable (); + } + + addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl), + gen_lowpart (mode, exth), gen_lowpart (mode, tgt), + sign, OPTAB_WIDEN); + } + + if (addr != tgt) + emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr)); +} + +/* Similarly, use ins and msk instructions to perform unaligned stores. */ + +void +alpha_expand_unaligned_store (rtx dst, rtx src, + HOST_WIDE_INT size, HOST_WIDE_INT ofs) +{ + rtx dstl, dsth, addr, insl, insh, meml, memh, dsta; + + if (TARGET_BWX && size == 2) + { + if (src != const0_rtx) + { + dstl = gen_lowpart (QImode, src); + dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), + NULL, 1, OPTAB_LIB_WIDEN); + dsth = gen_lowpart (QImode, dsth); + } + else + dstl = dsth = const0_rtx; + + meml = adjust_address (dst, QImode, ofs); + memh = adjust_address (dst, QImode, ofs+1); + + emit_move_insn (meml, dstl); + emit_move_insn (memh, dsth); + return; + } + + dstl = gen_reg_rtx (DImode); + dsth = gen_reg_rtx (DImode); + insl = gen_reg_rtx (DImode); + insh = gen_reg_rtx (DImode); + + dsta = XEXP (dst, 0); + if (GET_CODE (dsta) == LO_SUM) + dsta = force_reg (Pmode, dsta); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + + meml = change_address (dst, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, dsta, ofs), + GEN_INT (-8))); + set_mem_alias_set (meml, 0); + + memh = change_address (dst, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, dsta, + ofs + size - 1), + GEN_INT (-8))); + set_mem_alias_set (memh, 0); + + emit_move_insn (dsth, memh); + emit_move_insn (dstl, meml); + + addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); + + if (src != CONST0_RTX (GET_MODE (src))) + { + emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), + GEN_INT (size*8), addr)); + + switch ((int) size) + { + case 2: + emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); + break; + case 4: + emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); + break; + case 8: + emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); + break; + default: + gcc_unreachable (); + } + } + + emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr)); + + switch ((int) size) + { + case 2: + emit_insn (gen_mskwl (dstl, dstl, addr)); + break; + case 4: + emit_insn (gen_mskll (dstl, dstl, addr)); + break; + case 8: + emit_insn (gen_mskql (dstl, dstl, addr)); + break; + default: + gcc_unreachable (); + } + + if (src != CONST0_RTX (GET_MODE (src))) + { + dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN); + dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN); + } + + /* Must store high before low for degenerate case of aligned. */ + emit_move_insn (memh, dsth); + emit_move_insn (meml, dstl); +} + +/* The block move code tries to maximize speed by separating loads and + stores at the expense of register pressure: we load all of the data + before we store it back out. There are two secondary effects worth + mentioning, that this speeds copying to/from aligned and unaligned + buffers, and that it makes the code significantly easier to write. */ + +#define MAX_MOVE_WORDS 8 + +/* Load an integral number of consecutive unaligned quadwords. */ + +static void +alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem, + HOST_WIDE_INT words, HOST_WIDE_INT ofs) +{ + rtx const im8 = GEN_INT (-8); + rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1]; + rtx sreg, areg, tmp, smema; + HOST_WIDE_INT i; + + smema = XEXP (smem, 0); + if (GET_CODE (smema) == LO_SUM) + smema = force_reg (Pmode, smema); + + /* Generate all the tmp registers we need. */ + for (i = 0; i < words; ++i) + { + data_regs[i] = out_regs[i]; + ext_tmps[i] = gen_reg_rtx (DImode); + } + data_regs[words] = gen_reg_rtx (DImode); + + if (ofs != 0) + smem = adjust_address (smem, GET_MODE (smem), ofs); + + /* Load up all of the source data. */ + for (i = 0; i < words; ++i) + { + tmp = change_address (smem, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, smema, 8*i), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (data_regs[i], tmp); + } + + tmp = change_address (smem, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, smema, + 8*words - 1), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (data_regs[words], tmp); + + /* Extract the half-word fragments. Unfortunately DEC decided to make + extxh with offset zero a noop instead of zeroing the register, so + we must take care of that edge condition ourselves with cmov. */ + + sreg = copy_addr_to_reg (smema); + areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, + 1, OPTAB_WIDEN); + for (i = 0; i < words; ++i) + { + emit_insn (gen_extql (data_regs[i], data_regs[i], sreg)); + emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg)); + emit_insn (gen_rtx_SET (VOIDmode, ext_tmps[i], + gen_rtx_IF_THEN_ELSE (DImode, + gen_rtx_EQ (DImode, areg, + const0_rtx), + const0_rtx, ext_tmps[i]))); + } + + /* Merge the half-words into whole words. */ + for (i = 0; i < words; ++i) + { + out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], + ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN); + } +} + +/* Store an integral number of consecutive unaligned quadwords. DATA_REGS + may be NULL to store zeros. */ + +static void +alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, + HOST_WIDE_INT words, HOST_WIDE_INT ofs) +{ + rtx const im8 = GEN_INT (-8); + rtx ins_tmps[MAX_MOVE_WORDS]; + rtx st_tmp_1, st_tmp_2, dreg; + rtx st_addr_1, st_addr_2, dmema; + HOST_WIDE_INT i; + + dmema = XEXP (dmem, 0); + if (GET_CODE (dmema) == LO_SUM) + dmema = force_reg (Pmode, dmema); + + /* Generate all the tmp registers we need. */ + if (data_regs != NULL) + for (i = 0; i < words; ++i) + ins_tmps[i] = gen_reg_rtx(DImode); + st_tmp_1 = gen_reg_rtx(DImode); + st_tmp_2 = gen_reg_rtx(DImode); + + if (ofs != 0) + dmem = adjust_address (dmem, GET_MODE (dmem), ofs); + + st_addr_2 = change_address (dmem, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, dmema, + words*8 - 1), + im8)); + set_mem_alias_set (st_addr_2, 0); + + st_addr_1 = change_address (dmem, DImode, + gen_rtx_AND (DImode, dmema, im8)); + set_mem_alias_set (st_addr_1, 0); + + /* Load up the destination end bits. */ + emit_move_insn (st_tmp_2, st_addr_2); + emit_move_insn (st_tmp_1, st_addr_1); + + /* Shift the input data into place. */ + dreg = copy_addr_to_reg (dmema); + if (data_regs != NULL) + { + for (i = words-1; i >= 0; --i) + { + emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); + emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); + } + for (i = words-1; i > 0; --i) + { + ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i], + ins_tmps[i-1], ins_tmps[i-1], 1, + OPTAB_WIDEN); + } + } + + /* Split and merge the ends with the destination data. */ + emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg)); + emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg)); + + if (data_regs != NULL) + { + st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1], + st_tmp_2, 1, OPTAB_WIDEN); + st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0], + st_tmp_1, 1, OPTAB_WIDEN); + } + + /* Store it all. */ + emit_move_insn (st_addr_2, st_tmp_2); + for (i = words-1; i > 0; --i) + { + rtx tmp = change_address (dmem, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, + dmema, i*8), + im8)); + set_mem_alias_set (tmp, 0); + emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx); + } + emit_move_insn (st_addr_1, st_tmp_1); +} + + +/* Expand string/block move operations. + + operands[0] is the pointer to the destination. + operands[1] is the pointer to the source. + operands[2] is the number of bytes to move. + operands[3] is the alignment. */ + +int +alpha_expand_block_move (rtx operands[]) +{ + rtx bytes_rtx = operands[2]; + rtx align_rtx = operands[3]; + HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT dst_align = src_align; + rtx orig_src = operands[1]; + rtx orig_dst = operands[0]; + rtx data_regs[2 * MAX_MOVE_WORDS + 16]; + rtx tmp; + unsigned int i, words, ofs, nregs = 0; + + if (orig_bytes <= 0) + return 1; + else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) + return 0; + + /* Look for additional alignment information from recorded register info. */ + + tmp = XEXP (orig_src, 0); + if (REG_P (tmp)) + src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS + && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > src_align) + { + if (a >= 64 && c % 8 == 0) + src_align = 64; + else if (a >= 32 && c % 4 == 0) + src_align = 32; + else if (a >= 16 && c % 2 == 0) + src_align = 16; + } + } + + tmp = XEXP (orig_dst, 0); + if (REG_P (tmp)) + dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS + && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > dst_align) + { + if (a >= 64 && c % 8 == 0) + dst_align = 64; + else if (a >= 32 && c % 4 == 0) + dst_align = 32; + else if (a >= 16 && c % 2 == 0) + dst_align = 16; + } + } + + ofs = 0; + if (src_align >= 64 && bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words; ++i) + data_regs[nregs + i] = gen_reg_rtx (DImode); + + for (i = 0; i < words; ++i) + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, DImode, ofs + i * 8)); + + nregs += words; + bytes -= words * 8; + ofs += words * 8; + } + + if (src_align >= 32 && bytes >= 4) + { + words = bytes / 4; + + for (i = 0; i < words; ++i) + data_regs[nregs + i] = gen_reg_rtx (SImode); + + for (i = 0; i < words; ++i) + emit_move_insn (data_regs[nregs + i], + adjust_address (orig_src, SImode, ofs + i * 4)); + + nregs += words; + bytes -= words * 4; + ofs += words * 4; + } + + if (bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words+1; ++i) + data_regs[nregs + i] = gen_reg_rtx (DImode); + + alpha_expand_unaligned_load_words (data_regs + nregs, orig_src, + words, ofs); + + nregs += words; + bytes -= words * 8; + ofs += words * 8; + } + + if (! TARGET_BWX && bytes >= 4) + { + data_regs[nregs++] = tmp = gen_reg_rtx (SImode); + alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0); + bytes -= 4; + ofs += 4; + } + + if (bytes >= 2) + { + if (src_align >= 16) + { + do { + data_regs[nregs++] = tmp = gen_reg_rtx (HImode); + emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs)); + bytes -= 2; + ofs += 2; + } while (bytes >= 2); + } + else if (! TARGET_BWX) + { + data_regs[nregs++] = tmp = gen_reg_rtx (HImode); + alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0); + bytes -= 2; + ofs += 2; + } + } + + while (bytes > 0) + { + data_regs[nregs++] = tmp = gen_reg_rtx (QImode); + emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs)); + bytes -= 1; + ofs += 1; + } + + gcc_assert (nregs <= ARRAY_SIZE (data_regs)); + + /* Now save it back out again. */ + + i = 0, ofs = 0; + + /* Write out the data in whatever chunks reading the source allowed. */ + if (dst_align >= 64) + { + while (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + emit_move_insn (adjust_address (orig_dst, DImode, ofs), + data_regs[i]); + ofs += 8; + i++; + } + } + + if (dst_align >= 32) + { + /* If the source has remaining DImode regs, write them out in + two pieces. */ + while (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + gen_lowpart (SImode, data_regs[i])); + emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4), + gen_lowpart (SImode, tmp)); + ofs += 8; + i++; + } + + while (i < nregs && GET_MODE (data_regs[i]) == SImode) + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + data_regs[i]); + ofs += 4; + i++; + } + } + + if (i < nregs && GET_MODE (data_regs[i]) == DImode) + { + /* Write out a remaining block of words using unaligned methods. */ + + for (words = 1; i + words < nregs; words++) + if (GET_MODE (data_regs[i + words]) != DImode) + break; + + if (words == 1) + alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + else + alpha_expand_unaligned_store_words (data_regs + i, orig_dst, + words, ofs); + + i += words; + ofs += words * 8; + } + + /* Due to the above, this won't be aligned. */ + /* ??? If we have more than one of these, consider constructing full + words in registers and using alpha_expand_unaligned_store_words. */ + while (i < nregs && GET_MODE (data_regs[i]) == SImode) + { + alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); + ofs += 4; + i++; + } + + if (dst_align >= 16) + while (i < nregs && GET_MODE (data_regs[i]) == HImode) + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); + i++; + ofs += 2; + } + else + while (i < nregs && GET_MODE (data_regs[i]) == HImode) + { + alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); + i++; + ofs += 2; + } + + /* The remainder must be byte copies. */ + while (i < nregs) + { + gcc_assert (GET_MODE (data_regs[i]) == QImode); + emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); + i++; + ofs += 1; + } + + return 1; +} + +int +alpha_expand_block_clear (rtx operands[]) +{ + rtx bytes_rtx = operands[1]; + rtx align_rtx = operands[3]; + HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx); + HOST_WIDE_INT bytes = orig_bytes; + HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; + HOST_WIDE_INT alignofs = 0; + rtx orig_dst = operands[0]; + rtx tmp; + int i, words, ofs = 0; + + if (orig_bytes <= 0) + return 1; + if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) + return 0; + + /* Look for stricter alignment. */ + tmp = XEXP (orig_dst, 0); + if (REG_P (tmp)) + align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); + else if (GET_CODE (tmp) == PLUS + && REG_P (XEXP (tmp, 0)) + && CONST_INT_P (XEXP (tmp, 1))) + { + HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); + int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + + if (a > align) + { + if (a >= 64) + align = a, alignofs = 8 - c % 8; + else if (a >= 32) + align = a, alignofs = 4 - c % 4; + else if (a >= 16) + align = a, alignofs = 2 - c % 2; + } + } + + /* Handle an unaligned prefix first. */ + + if (alignofs > 0) + { +#if HOST_BITS_PER_WIDE_INT >= 64 + /* Given that alignofs is bounded by align, the only time BWX could + generate three stores is for a 7 byte fill. Prefer two individual + stores over a load/mask/store sequence. */ + if ((!TARGET_BWX || alignofs == 7) + && align >= 32 + && !(alignofs == 4 && bytes >= 4)) + { + enum machine_mode mode = (align >= 64 ? DImode : SImode); + int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs; + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, mode, ofs - inv_alignofs); + set_mem_alias_set (mem, 0); + + mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8)); + if (bytes < alignofs) + { + mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8); + ofs += bytes; + bytes = 0; + } + else + { + bytes -= alignofs; + ofs += alignofs; + } + alignofs = 0; + + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + } +#endif + + if (TARGET_BWX && (alignofs & 1) && bytes >= 1) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + alignofs -= 1; + } + if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2) + { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx); + bytes -= 2; + ofs += 2; + alignofs -= 2; + } + if (alignofs == 4 && bytes >= 4) + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); + bytes -= 4; + ofs += 4; + alignofs = 0; + } + + /* If we've not used the extra lead alignment information by now, + we won't be able to. Downgrade align to match what's left over. */ + if (alignofs > 0) + { + alignofs = alignofs & -alignofs; + align = MIN (align, alignofs * BITS_PER_UNIT); + } + } + + /* Handle a block of contiguous long-words. */ + + if (align >= 64 && bytes >= 8) + { + words = bytes / 8; + + for (i = 0; i < words; ++i) + emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8), + const0_rtx); + + bytes -= words * 8; + ofs += words * 8; + } + + /* If the block is large and appropriately aligned, emit a single + store followed by a sequence of stq_u insns. */ + + if (align >= 32 && bytes > 16) + { + rtx orig_dsta; + + emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx); + bytes -= 4; + ofs += 4; + + orig_dsta = XEXP (orig_dst, 0); + if (GET_CODE (orig_dsta) == LO_SUM) + orig_dsta = force_reg (Pmode, orig_dsta); + + words = bytes / 8; + for (i = 0; i < words; ++i) + { + rtx mem + = change_address (orig_dst, DImode, + gen_rtx_AND (DImode, + plus_constant (DImode, orig_dsta, + ofs + i*8), + GEN_INT (-8))); + set_mem_alias_set (mem, 0); + emit_move_insn (mem, const0_rtx); + } + + /* Depending on the alignment, the first stq_u may have overlapped + with the initial stl, which means that the last stq_u didn't + write as much as it would appear. Leave those questionable bytes + unaccounted for. */ + bytes -= words * 8 - 4; + ofs += words * 8 - 4; + } + + /* Handle a smaller block of aligned words. */ + + if ((align >= 64 && bytes == 4) + || (align == 32 && bytes >= 4)) + { + words = bytes / 4; + + for (i = 0; i < words; ++i) + emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4), + const0_rtx); + + bytes -= words * 4; + ofs += words * 4; + } + + /* An unaligned block uses stq_u stores for as many as possible. */ + + if (bytes >= 8) + { + words = bytes / 8; + + alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); + + bytes -= words * 8; + ofs += words * 8; + } + + /* Next clean up any trailing pieces. */ + +#if HOST_BITS_PER_WIDE_INT >= 64 + /* Count the number of bits in BYTES for which aligned stores could + be emitted. */ + words = 0; + for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1) + if (bytes & i) + words += 1; + + /* If we have appropriate alignment (and it wouldn't take too many + instructions otherwise), mask out the bytes we need. */ + if (TARGET_BWX ? words > 2 : bytes > 0) + { + if (align >= 64) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, DImode, ofs); + set_mem_alias_set (mem, 0); + + mask = ~(HOST_WIDE_INT)0 << (bytes * 8); + + tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + else if (align >= 32 && bytes < 4) + { + rtx mem, tmp; + HOST_WIDE_INT mask; + + mem = adjust_address (orig_dst, SImode, ofs); + set_mem_alias_set (mem, 0); + + mask = ~(HOST_WIDE_INT)0 << (bytes * 8); + + tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); + + emit_move_insn (mem, tmp); + return 1; + } + } +#endif + + if (!TARGET_BWX && bytes >= 4) + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); + bytes -= 4; + ofs += 4; + } + + if (bytes >= 2) + { + if (align >= 16) + { + do { + emit_move_insn (adjust_address (orig_dst, HImode, ofs), + const0_rtx); + bytes -= 2; + ofs += 2; + } while (bytes >= 2); + } + else if (! TARGET_BWX) + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); + bytes -= 2; + ofs += 2; + } + } + + while (bytes > 0) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + } + + return 1; +} + +/* Returns a mask so that zap(x, value) == x & mask. */ + +rtx +alpha_expand_zap_mask (HOST_WIDE_INT value) +{ + rtx result; + int i; + + if (HOST_BITS_PER_WIDE_INT >= 64) + { + HOST_WIDE_INT mask = 0; + + for (i = 7; i >= 0; --i) + { + mask <<= 8; + if (!((value >> i) & 1)) + mask |= 0xff; + } + + result = gen_int_mode (mask, DImode); + } + else + { + HOST_WIDE_INT mask_lo = 0, mask_hi = 0; + + gcc_assert (HOST_BITS_PER_WIDE_INT == 32); + + for (i = 7; i >= 4; --i) + { + mask_hi <<= 8; + if (!((value >> i) & 1)) + mask_hi |= 0xff; + } + + for (i = 3; i >= 0; --i) + { + mask_lo <<= 8; + if (!((value >> i) & 1)) + mask_lo |= 0xff; + } + + result = immed_double_const (mask_lo, mask_hi, DImode); + } + + return result; +} + +void +alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), + enum machine_mode mode, + rtx op0, rtx op1, rtx op2) +{ + op0 = gen_lowpart (mode, op0); + + if (op1 == const0_rtx) + op1 = CONST0_RTX (mode); + else + op1 = gen_lowpart (mode, op1); + + if (op2 == const0_rtx) + op2 = CONST0_RTX (mode); + else + op2 = gen_lowpart (mode, op2); + + emit_insn ((*gen) (op0, op1, op2)); +} + +/* A subroutine of the atomic operation splitters. Jump to LABEL if + COND is true. Mark the jump as unlikely to be taken. */ + +static void +emit_unlikely_jump (rtx cond, rtx label) +{ + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + rtx x; + + x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); + x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); + add_int_reg_note (x, REG_BR_PROB, very_unlikely); +} + +/* A subroutine of the atomic operation splitters. Emit a load-locked + instruction in MODE. */ + +static void +emit_load_locked (enum machine_mode mode, rtx reg, rtx mem) +{ + rtx (*fn) (rtx, rtx) = NULL; + if (mode == SImode) + fn = gen_load_locked_si; + else if (mode == DImode) + fn = gen_load_locked_di; + emit_insn (fn (reg, mem)); +} + +/* A subroutine of the atomic operation splitters. Emit a store-conditional + instruction in MODE. */ + +static void +emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val) +{ + rtx (*fn) (rtx, rtx, rtx) = NULL; + if (mode == SImode) + fn = gen_store_conditional_si; + else if (mode == DImode) + fn = gen_store_conditional_di; + emit_insn (fn (res, mem, val)); +} + +/* Subroutines of the atomic operation splitters. Emit barriers + as needed for the memory MODEL. */ + +static void +alpha_pre_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, true)) + emit_insn (gen_memory_barrier ()); +} + +static void +alpha_post_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, false)) + emit_insn (gen_memory_barrier ()); +} + +/* A subroutine of the atomic operation splitters. Emit an insxl + instruction in MODE. */ + +static rtx +emit_insxl (enum machine_mode mode, rtx op1, rtx op2) +{ + rtx ret = gen_reg_rtx (DImode); + rtx (*fn) (rtx, rtx, rtx); + + switch (mode) + { + case QImode: + fn = gen_insbl; + break; + case HImode: + fn = gen_inswl; + break; + case SImode: + fn = gen_insll; + break; + case DImode: + fn = gen_insql; + break; + default: + gcc_unreachable (); + } + + op1 = force_reg (mode, op1); + emit_insn (fn (ret, op1, op2)); + + return ret; +} + +/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation + to perform. MEM is the memory on which to operate. VAL is the second + operand of the binary operator. BEFORE and AFTER are optional locations to + return the value of MEM either before of after the operation. SCRATCH is + a scratch register. */ + +void +alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, + rtx after, rtx scratch, enum memmodel model) +{ + enum machine_mode mode = GET_MODE (mem); + rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch)); + + alpha_pre_atomic_barrier (model); + + label = gen_label_rtx (); + emit_label (label); + label = gen_rtx_LABEL_REF (DImode, label); + + if (before == NULL) + before = scratch; + emit_load_locked (mode, before, mem); + + if (code == NOT) + { + x = gen_rtx_AND (mode, before, val); + emit_insn (gen_rtx_SET (VOIDmode, val, x)); + + x = gen_rtx_NOT (mode, val); + } + else + x = gen_rtx_fmt_ee (code, mode, before, val); + if (after) + emit_insn (gen_rtx_SET (VOIDmode, after, copy_rtx (x))); + emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); + + emit_store_conditional (mode, cond, mem, scratch); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label); + + alpha_post_atomic_barrier (model); +} + +/* Expand a compare and swap operation. */ + +void +alpha_split_compare_and_swap (rtx operands[]) +{ + rtx cond, retval, mem, oldval, newval; + bool is_weak; + enum memmodel mod_s, mod_f; + enum machine_mode mode; + rtx label1, label2, x; + + cond = operands[0]; + retval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = (operands[5] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[6]); + mod_f = (enum memmodel) INTVAL (operands[7]); + mode = GET_MODE (mem); + + alpha_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + } + label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + + emit_load_locked (mode, retval, mem); + + x = gen_lowpart (DImode, retval); + if (oldval == const0_rtx) + { + emit_move_insn (cond, const0_rtx); + x = gen_rtx_NE (DImode, x, const0_rtx); + } + else + { + x = gen_rtx_EQ (DImode, x, oldval); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } + emit_unlikely_jump (x, label2); + + emit_move_insn (cond, newval); + emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond)); + + if (!is_weak) + { + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label1); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (XEXP (label2, 0)); + + alpha_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (XEXP (label2, 0)); +} + +void +alpha_expand_compare_and_swap_12 (rtx operands[]) +{ + rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f; + enum machine_mode mode; + rtx addr, align, wdst; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); + + cond = operands[0]; + dst = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + + /* We forced the address into a register via mem_noofs_operand. */ + addr = XEXP (mem, 0); + gcc_assert (register_operand (addr, DImode)); + + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), + NULL_RTX, 1, OPTAB_DIRECT); + + oldval = convert_modes (DImode, mode, oldval, 1); + + if (newval != const0_rtx) + newval = emit_insxl (mode, newval, addr); + + wdst = gen_reg_rtx (DImode); + if (mode == QImode) + gen = gen_atomic_compare_and_swapqi_1; + else + gen = gen_atomic_compare_and_swaphi_1; + emit_insn (gen (cond, wdst, mem, oldval, newval, align, + is_weak, mod_s, mod_f)); + + emit_move_insn (dst, gen_lowpart (mode, wdst)); +} + +void +alpha_split_compare_and_swap_12 (rtx operands[]) +{ + rtx cond, dest, orig_mem, oldval, newval, align, scratch; + enum machine_mode mode; + bool is_weak; + enum memmodel mod_s, mod_f; + rtx label1, label2, mem, addr, width, mask, x; + + cond = operands[0]; + dest = operands[1]; + orig_mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + align = operands[5]; + is_weak = (operands[6] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[7]); + mod_f = (enum memmodel) INTVAL (operands[8]); + scratch = operands[9]; + mode = GET_MODE (orig_mem); + addr = XEXP (orig_mem, 0); + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); + if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) + set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); + + alpha_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label1, 0)); + } + label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + + emit_load_locked (DImode, scratch, mem); + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + emit_insn (gen_extxl (dest, scratch, width, addr)); + + if (oldval == const0_rtx) + { + emit_move_insn (cond, const0_rtx); + x = gen_rtx_NE (DImode, dest, const0_rtx); + } + else + { + x = gen_rtx_EQ (DImode, dest, oldval); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + x = gen_rtx_EQ (DImode, cond, const0_rtx); + } + emit_unlikely_jump (x, label2); + + emit_insn (gen_mskxl (cond, scratch, mask, addr)); + + if (newval != const0_rtx) + emit_insn (gen_iordi3 (cond, cond, newval)); + + emit_store_conditional (DImode, cond, mem, cond); + + if (!is_weak) + { + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label1); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (XEXP (label2, 0)); + + alpha_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (XEXP (label2, 0)); +} + +/* Expand an atomic exchange operation. */ + +void +alpha_split_atomic_exchange (rtx operands[]) +{ + rtx retval, mem, val, scratch; + enum memmodel model; + enum machine_mode mode; + rtx label, x, cond; + + retval = operands[0]; + mem = operands[1]; + val = operands[2]; + model = (enum memmodel) INTVAL (operands[3]); + scratch = operands[4]; + mode = GET_MODE (mem); + cond = gen_lowpart (DImode, scratch); + + alpha_pre_atomic_barrier (model); + + label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_load_locked (mode, retval, mem); + emit_move_insn (scratch, val); + emit_store_conditional (mode, cond, mem, scratch); + + x = gen_rtx_EQ (DImode, cond, const0_rtx); + emit_unlikely_jump (x, label); + + alpha_post_atomic_barrier (model); +} + +void +alpha_expand_atomic_exchange_12 (rtx operands[]) +{ + rtx dst, mem, val, model; + enum machine_mode mode; + rtx addr, align, wdst; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx); + + dst = operands[0]; + mem = operands[1]; + val = operands[2]; + model = operands[3]; + mode = GET_MODE (mem); + + /* We forced the address into a register via mem_noofs_operand. */ + addr = XEXP (mem, 0); + gcc_assert (register_operand (addr, DImode)); + + align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), + NULL_RTX, 1, OPTAB_DIRECT); + + /* Insert val into the correct byte location within the word. */ + if (val != const0_rtx) + val = emit_insxl (mode, val, addr); + + wdst = gen_reg_rtx (DImode); + if (mode == QImode) + gen = gen_atomic_exchangeqi_1; + else + gen = gen_atomic_exchangehi_1; + emit_insn (gen (wdst, mem, val, align, model)); + + emit_move_insn (dst, gen_lowpart (mode, wdst)); +} + +void +alpha_split_atomic_exchange_12 (rtx operands[]) +{ + rtx dest, orig_mem, addr, val, align, scratch; + rtx label, mem, width, mask, x; + enum machine_mode mode; + enum memmodel model; + + dest = operands[0]; + orig_mem = operands[1]; + val = operands[2]; + align = operands[3]; + model = (enum memmodel) INTVAL (operands[4]); + scratch = operands[5]; + mode = GET_MODE (orig_mem); + addr = XEXP (orig_mem, 0); + + mem = gen_rtx_MEM (DImode, align); + MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); + if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) + set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); + + alpha_pre_atomic_barrier (model); + + label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_load_locked (DImode, scratch, mem); + + width = GEN_INT (GET_MODE_BITSIZE (mode)); + mask = GEN_INT (mode == QImode ? 0xff : 0xffff); + emit_insn (gen_extxl (dest, scratch, width, addr)); + emit_insn (gen_mskxl (scratch, scratch, mask, addr)); + if (val != const0_rtx) + emit_insn (gen_iordi3 (scratch, scratch, val)); + + emit_store_conditional (DImode, scratch, mem, scratch); + + x = gen_rtx_EQ (DImode, scratch, const0_rtx); + emit_unlikely_jump (x, label); + + alpha_post_atomic_barrier (model); +} + +/* Adjust the cost of a scheduling dependency. Return the new cost of + a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ + +static int +alpha_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type dep_insn_type; + + /* If the dependence is an anti-dependence, there is no cost. For an + output dependence, there is sometimes a cost, but it doesn't seem + worth handling those few cases. */ + if (REG_NOTE_KIND (link) != 0) + return cost; + + /* If we can't recognize the insns, we can't really do anything. */ + if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) + return cost; + + dep_insn_type = get_attr_type (dep_insn); + + /* Bring in the user-defined memory latency. */ + if (dep_insn_type == TYPE_ILD + || dep_insn_type == TYPE_FLD + || dep_insn_type == TYPE_LDSYM) + cost += alpha_memory_latency-1; + + /* Everything else handled in DFA bypasses now. */ + + return cost; +} + +/* The number of instructions that can be issued per cycle. */ + +static int +alpha_issue_rate (void) +{ + return (alpha_tune == PROCESSOR_EV4 ? 2 : 4); +} + +/* How many alternative schedules to try. This should be as wide as the + scheduling freedom in the DFA, but no wider. Making this value too + large results extra work for the scheduler. + + For EV4, loads can be issued to either IB0 or IB1, thus we have 2 + alternative schedules. For EV5, we can choose between E0/E1 and + FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */ + +static int +alpha_multipass_dfa_lookahead (void) +{ + return (alpha_tune == PROCESSOR_EV6 ? 4 : 2); +} + +/* Machine-specific function data. */ + +struct GTY(()) alpha_links; + +struct GTY(()) machine_function +{ + /* For OSF. */ + const char *some_ld_name; + + /* For flag_reorder_blocks_and_partition. */ + rtx gp_save_rtx; + + /* For VMS condition handlers. */ + bool uses_condition_handler; + + /* Linkage entries. */ + splay_tree GTY ((param1_is (char *), param2_is (struct alpha_links *))) + links; +}; + +/* How to allocate a 'struct machine_function'. */ + +static struct machine_function * +alpha_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Support for frame based VMS condition handlers. */ + +/* A VMS condition handler may be established for a function with a call to + __builtin_establish_vms_condition_handler, and cancelled with a call to + __builtin_revert_vms_condition_handler. + + The VMS Condition Handling Facility knows about the existence of a handler + from the procedure descriptor .handler field. As the VMS native compilers, + we store the user specified handler's address at a fixed location in the + stack frame and point the procedure descriptor at a common wrapper which + fetches the real handler's address and issues an indirect call. + + The indirection wrapper is "__gcc_shell_handler", provided by libgcc. + + We force the procedure kind to PT_STACK, and the fixed frame location is + fp+8, just before the register save area. We use the handler_data field in + the procedure descriptor to state the fp offset at which the installed + handler address can be found. */ + +#define VMS_COND_HANDLER_FP_OFFSET 8 + +/* Expand code to store the currently installed user VMS condition handler + into TARGET and install HANDLER as the new condition handler. */ + +void +alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler) +{ + rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx, + VMS_COND_HANDLER_FP_OFFSET); + + rtx handler_slot + = gen_rtx_MEM (DImode, handler_slot_address); + + emit_move_insn (target, handler_slot); + emit_move_insn (handler_slot, handler); + + /* Notify the start/prologue/epilogue emitters that the condition handler + slot is needed. In addition to reserving the slot space, this will force + the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx + use above is correct. */ + cfun->machine->uses_condition_handler = true; +} + +/* Expand code to store the current VMS condition handler into TARGET and + nullify it. */ + +void +alpha_expand_builtin_revert_vms_condition_handler (rtx target) +{ + /* We implement this by establishing a null condition handler, with the tiny + side effect of setting uses_condition_handler. This is a little bit + pessimistic if no actual builtin_establish call is ever issued, which is + not a real problem and expected never to happen anyway. */ + + alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx); +} + +/* Functions to save and restore alpha_return_addr_rtx. */ + +/* Start the ball rolling with RETURN_ADDR_RTX. */ + +rtx +alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, REG_RA); +} + +/* Return or create a memory slot containing the gp value for the current + function. Needed only if TARGET_LD_BUGGY_LDGP. */ + +rtx +alpha_gp_save_rtx (void) +{ + rtx seq, m = cfun->machine->gp_save_rtx; + + if (m == NULL) + { + start_sequence (); + + m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD); + m = validize_mem (m); + emit_move_insn (m, pic_offset_table_rtx); + + seq = get_insns (); + end_sequence (); + + /* We used to simply emit the sequence after entry_of_function. + However this breaks the CFG if the first instruction in the + first block is not the NOTE_INSN_BASIC_BLOCK, for example a + label. Emit the sequence properly on the edge. We are only + invoked from dw2_build_landing_pads and finish_eh_generation + will call commit_edge_insertions thanks to a kludge. */ + insert_insn_on_edge (seq, + single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + + cfun->machine->gp_save_rtx = m; + } + + return m; +} + +static void +alpha_instantiate_decls (void) +{ + if (cfun->machine->gp_save_rtx != NULL_RTX) + instantiate_decl_rtl (cfun->machine->gp_save_rtx); +} + +static int +alpha_ra_ever_killed (void) +{ + rtx top; + + if (!has_hard_reg_initial_val (Pmode, REG_RA)) + return (int)df_regs_ever_live_p (REG_RA); + + push_topmost_sequence (); + top = get_insns (); + pop_topmost_sequence (); + + return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL_RTX); +} + + +/* Return the trap mode suffix applicable to the current + instruction, or NULL. */ + +static const char * +get_trap_mode_suffix (void) +{ + enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn); + + switch (s) + { + case TRAP_SUFFIX_NONE: + return NULL; + + case TRAP_SUFFIX_SU: + if (alpha_fptm >= ALPHA_FPTM_SU) + return "su"; + return NULL; + + case TRAP_SUFFIX_SUI: + if (alpha_fptm >= ALPHA_FPTM_SUI) + return "sui"; + return NULL; + + case TRAP_SUFFIX_V_SV: + switch (alpha_fptm) + { + case ALPHA_FPTM_N: + return NULL; + case ALPHA_FPTM_U: + return "v"; + case ALPHA_FPTM_SU: + case ALPHA_FPTM_SUI: + return "sv"; + default: + gcc_unreachable (); + } + + case TRAP_SUFFIX_V_SV_SVI: + switch (alpha_fptm) + { + case ALPHA_FPTM_N: + return NULL; + case ALPHA_FPTM_U: + return "v"; + case ALPHA_FPTM_SU: + return "sv"; + case ALPHA_FPTM_SUI: + return "svi"; + default: + gcc_unreachable (); + } + break; + + case TRAP_SUFFIX_U_SU_SUI: + switch (alpha_fptm) + { + case ALPHA_FPTM_N: + return NULL; + case ALPHA_FPTM_U: + return "u"; + case ALPHA_FPTM_SU: + return "su"; + case ALPHA_FPTM_SUI: + return "sui"; + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Return the rounding mode suffix applicable to the current + instruction, or NULL. */ + +static const char * +get_round_mode_suffix (void) +{ + enum attr_round_suffix s = get_attr_round_suffix (current_output_insn); + + switch (s) + { + case ROUND_SUFFIX_NONE: + return NULL; + case ROUND_SUFFIX_NORMAL: + switch (alpha_fprm) + { + case ALPHA_FPRM_NORM: + return NULL; + case ALPHA_FPRM_MINF: + return "m"; + case ALPHA_FPRM_CHOP: + return "c"; + case ALPHA_FPRM_DYN: + return "d"; + default: + gcc_unreachable (); + } + break; + + case ROUND_SUFFIX_C: + return "c"; + + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in some movdi_er_tlsldm pattern. */ + +static int +get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + if (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) + { + cfun->machine->some_ld_name = XSTR (x, 0); + return 1; + } + + return 0; +} + +static const char * +get_some_local_dynamic_name (void) +{ + rtx insn; + + if (cfun->machine->some_ld_name) + return cfun->machine->some_ld_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) + return cfun->machine->some_ld_name; + + gcc_unreachable (); +} + +/* Print an operand. Recognize special options, documented below. */ + +void +print_operand (FILE *file, rtx x, int code) +{ + int i; + + switch (code) + { + case '~': + /* Print the assembler name of the current function. */ + assemble_name (file, alpha_fnname); + break; + + case '&': + assemble_name (file, get_some_local_dynamic_name ()); + break; + + case '/': + { + const char *trap = get_trap_mode_suffix (); + const char *round = get_round_mode_suffix (); + + if (trap || round) + fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : "")); + break; + } + + case ',': + /* Generates single precision instruction suffix. */ + fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file); + break; + + case '-': + /* Generates double precision instruction suffix. */ + fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file); + break; + + case '#': + if (alpha_this_literal_sequence_number == 0) + alpha_this_literal_sequence_number = alpha_next_sequence_number++; + fprintf (file, "%d", alpha_this_literal_sequence_number); + break; + + case '*': + if (alpha_this_gpdisp_sequence_number == 0) + alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++; + fprintf (file, "%d", alpha_this_gpdisp_sequence_number); + break; + + case 'H': + if (GET_CODE (x) == HIGH) + output_addr_const (file, XEXP (x, 0)); + else + output_operand_lossage ("invalid %%H value"); + break; + + case 'J': + { + const char *lituse; + + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL) + { + x = XVECEXP (x, 0, 0); + lituse = "lituse_tlsgd"; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL) + { + x = XVECEXP (x, 0, 0); + lituse = "lituse_tlsldm"; + } + else if (CONST_INT_P (x)) + lituse = "lituse_jsr"; + else + { + output_operand_lossage ("invalid %%J value"); + break; + } + + if (x != const0_rtx) + fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); + } + break; + + case 'j': + { + const char *lituse; + +#ifdef HAVE_AS_JSRDIRECT_RELOCS + lituse = "lituse_jsrdirect"; +#else + lituse = "lituse_jsr"; +#endif + + gcc_assert (INTVAL (x) != 0); + fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x)); + } + break; + case 'r': + /* If this operand is the constant zero, write it as "$31". */ + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (x == CONST0_RTX (GET_MODE (x))) + fprintf (file, "$31"); + else + output_operand_lossage ("invalid %%r value"); + break; + + case 'R': + /* Similar, but for floating-point. */ + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (x == CONST0_RTX (GET_MODE (x))) + fprintf (file, "$f31"); + else + output_operand_lossage ("invalid %%R value"); + break; + + case 'N': + /* Write the 1's complement of a constant. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%N value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); + break; + + case 'P': + /* Write 1 << C, for a constant C. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%P value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT) 1 << INTVAL (x)); + break; + + case 'h': + /* Write the high-order 16 bits of a constant, sign-extended. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%h value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16); + break; + + case 'L': + /* Write the low-order 16 bits of a constant, sign-extended. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("invalid %%L value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000)); + break; + + case 'm': + /* Write mask for ZAP insn. */ + if (GET_CODE (x) == CONST_DOUBLE) + { + HOST_WIDE_INT mask = 0; + HOST_WIDE_INT value; + + value = CONST_DOUBLE_LOW (x); + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; + i++, value >>= 8) + if (value & 0xff) + mask |= (1 << i); + + value = CONST_DOUBLE_HIGH (x); + for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; + i++, value >>= 8) + if (value & 0xff) + mask |= (1 << (i + sizeof (int))); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask & 0xff); + } + + else if (CONST_INT_P (x)) + { + HOST_WIDE_INT mask = 0, value = INTVAL (x); + + for (i = 0; i < 8; i++, value >>= 8) + if (value & 0xff) + mask |= (1 << i); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask); + } + else + output_operand_lossage ("invalid %%m value"); + break; + + case 'M': + /* 'b', 'w', 'l', or 'q' as the value of the constant. */ + if (!CONST_INT_P (x) + || (INTVAL (x) != 8 && INTVAL (x) != 16 + && INTVAL (x) != 32 && INTVAL (x) != 64)) + output_operand_lossage ("invalid %%M value"); + + fprintf (file, "%s", + (INTVAL (x) == 8 ? "b" + : INTVAL (x) == 16 ? "w" + : INTVAL (x) == 32 ? "l" + : "q")); + break; + + case 'U': + /* Similar, except do it from the mask. */ + if (CONST_INT_P (x)) + { + HOST_WIDE_INT value = INTVAL (x); + + if (value == 0xff) + { + fputc ('b', file); + break; + } + if (value == 0xffff) + { + fputc ('w', file); + break; + } + if (value == 0xffffffff) + { + fputc ('l', file); + break; + } + if (value == -1) + { + fputc ('q', file); + break; + } + } + else if (HOST_BITS_PER_WIDE_INT == 32 + && GET_CODE (x) == CONST_DOUBLE + && CONST_DOUBLE_LOW (x) == 0xffffffff + && CONST_DOUBLE_HIGH (x) == 0) + { + fputc ('l', file); + break; + } + output_operand_lossage ("invalid %%U value"); + break; + + case 's': + /* Write the constant value divided by 8. */ + if (!CONST_INT_P (x) + || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 + || (INTVAL (x) & 7) != 0) + output_operand_lossage ("invalid %%s value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8); + break; + + case 'S': + /* Same, except compute (64 - c) / 8 */ + + if (!CONST_INT_P (x) + && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64 + && (INTVAL (x) & 7) != 8) + output_operand_lossage ("invalid %%s value"); + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8); + break; + + case 'C': case 'D': case 'c': case 'd': + /* Write out comparison name. */ + { + enum rtx_code c = GET_CODE (x); + + if (!COMPARISON_P (x)) + output_operand_lossage ("invalid %%C value"); + + else if (code == 'D') + c = reverse_condition (c); + else if (code == 'c') + c = swap_condition (c); + else if (code == 'd') + c = swap_condition (reverse_condition (c)); + + if (c == LEU) + fprintf (file, "ule"); + else if (c == LTU) + fprintf (file, "ult"); + else if (c == UNORDERED) + fprintf (file, "un"); + else + fprintf (file, "%s", GET_RTX_NAME (c)); + } + break; + + case 'E': + /* Write the divide or modulus operator. */ + switch (GET_CODE (x)) + { + case DIV: + fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q"); + break; + case UDIV: + fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q"); + break; + case MOD: + fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q"); + break; + case UMOD: + fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q"); + break; + default: + output_operand_lossage ("invalid %%E value"); + break; + } + break; + + case 'A': + /* Write "_u" for unaligned access. */ + if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) + fprintf (file, "_u"); + break; + + case 0: + if (REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x)]); + else if (MEM_P (x)) + output_address (XEXP (x, 0)); + else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC) + { + switch (XINT (XEXP (x, 0), 1)) + { + case UNSPEC_DTPREL: + case UNSPEC_TPREL: + output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0)); + break; + default: + output_operand_lossage ("unknown relocation unspec"); + break; + } + } + else + output_addr_const (file, x); + break; + + default: + output_operand_lossage ("invalid %%xn code"); + } +} + +void +print_operand_address (FILE *file, rtx addr) +{ + int basereg = 31; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (addr) == AND) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == PLUS + && CONST_INT_P (XEXP (addr, 1))) + { + offset = INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + if (GET_CODE (addr) == LO_SUM) + { + const char *reloc16, *reloclo; + rtx op1 = XEXP (addr, 1); + + if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC) + { + op1 = XEXP (op1, 0); + switch (XINT (op1, 1)) + { + case UNSPEC_DTPREL: + reloc16 = NULL; + reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello"); + break; + case UNSPEC_TPREL: + reloc16 = NULL; + reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello"); + break; + default: + output_operand_lossage ("unknown relocation unspec"); + return; + } + + output_addr_const (file, XVECEXP (op1, 0, 0)); + } + else + { + reloc16 = "gprel"; + reloclo = "gprellow"; + output_addr_const (file, op1); + } + + if (offset) + fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); + + addr = XEXP (addr, 0); + switch (GET_CODE (addr)) + { + case REG: + basereg = REGNO (addr); + break; + + case SUBREG: + basereg = subreg_regno (addr); + break; + + default: + gcc_unreachable (); + } + + fprintf (file, "($%d)\t\t!%s", basereg, + (basereg == 29 ? reloc16 : reloclo)); + return; + } + + switch (GET_CODE (addr)) + { + case REG: + basereg = REGNO (addr); + break; + + case SUBREG: + basereg = subreg_regno (addr); + break; + + case CONST_INT: + offset = INTVAL (addr); + break; + +#if TARGET_ABI_OPEN_VMS + case SYMBOL_REF: + fprintf (file, "%s", XSTR (addr, 0)); + return; + + case CONST: + gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF); + fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC, + XSTR (XEXP (XEXP (addr, 0), 0), 0), + INTVAL (XEXP (XEXP (addr, 0), 1))); + return; + +#endif + default: + gcc_unreachable (); + } + + fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline at + M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx + for the static chain value for the function. */ + +static void +alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr, mem, word1, word2; + + fnaddr = XEXP (DECL_RTL (fndecl), 0); + +#ifdef POINTERS_EXTEND_UNSIGNED + fnaddr = convert_memory_address (Pmode, fnaddr); + chain_value = convert_memory_address (Pmode, chain_value); +#endif + + if (TARGET_ABI_OPEN_VMS) + { + const char *fnname; + char *trname; + + /* Construct the name of the trampoline entry point. */ + fnname = XSTR (fnaddr, 0); + trname = (char *) alloca (strlen (fnname) + 5); + strcpy (trname, fnname); + strcat (trname, "..tr"); + fnname = ggc_alloc_string (trname, strlen (trname) + 1); + word2 = gen_rtx_SYMBOL_REF (Pmode, fnname); + + /* Trampoline (or "bounded") procedure descriptor is constructed from + the function's procedure descriptor with certain fields zeroed IAW + the VMS calling standard. This is stored in the first quadword. */ + word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr)); + word1 = expand_and (DImode, word1, + GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)), + NULL); + } + else + { + /* These 4 instructions are: + ldq $1,24($27) + ldq $27,16($27) + jmp $31,($27),0 + nop + We don't bother setting the HINT field of the jump; the nop + is merely there for padding. */ + word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018)); + word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000)); + } + + /* Store the first two words, as computed above. */ + mem = adjust_address (m_tramp, DImode, 0); + emit_move_insn (mem, word1); + mem = adjust_address (m_tramp, DImode, 8); + emit_move_insn (mem, word2); + + /* Store function address and static chain value. */ + mem = adjust_address (m_tramp, Pmode, 16); + emit_move_insn (mem, fnaddr); + mem = adjust_address (m_tramp, Pmode, 24); + emit_move_insn (mem, chain_value); + + if (TARGET_ABI_OSF) + { + emit_insn (gen_imb ()); +#ifdef HAVE_ENABLE_EXECUTE_STACK + emit_library_call (init_one_libfunc ("__enable_execute_stack"), + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); +#endif + } +} + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On Alpha the first 6 words of args are normally in registers + and the rest are pushed. */ + +static rtx +alpha_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int basereg; + int num_args; + + /* Don't get confused and pass small structures in FP registers. */ + if (type && AGGREGATE_TYPE_P (type)) + basereg = 16; + else + { +#ifdef ENABLE_CHECKING + /* With alpha_split_complex_arg, we shouldn't see any raw complex + values here. */ + gcc_assert (!COMPLEX_MODE_P (mode)); +#endif + + /* Set up defaults for FP operands passed in FP registers, and + integral operands passed in integer registers. */ + if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT) + basereg = 32 + 16; + else + basereg = 16; + } + + /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for + the two platforms, so we can't avoid conditional compilation. */ +#if TARGET_ABI_OPEN_VMS + { + if (mode == VOIDmode) + return alpha_arg_info_reg_val (*cum); + + num_args = cum->num_args; + if (num_args >= 6 + || targetm.calls.must_pass_in_stack (mode, type)) + return NULL_RTX; + } +#elif TARGET_ABI_OSF + { + if (*cum >= 6) + return NULL_RTX; + num_args = *cum; + + /* VOID is passed as a special flag for "last argument". */ + if (type == void_type_node) + basereg = 16; + else if (targetm.calls.must_pass_in_stack (mode, type)) + return NULL_RTX; + } +#else +#error Unhandled ABI +#endif + + return gen_rtx_REG (mode, num_args + basereg); +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +alpha_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + bool onstack = targetm.calls.must_pass_in_stack (mode, type); + int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named); + +#if TARGET_ABI_OSF + *cum += increment; +#else + if (!onstack && cum->num_args < 6) + cum->atypes[cum->num_args] = alpha_arg_type (mode); + cum->num_args += increment; +#endif +} + +static int +alpha_arg_partial_bytes (cumulative_args_t cum_v, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + int words = 0; + CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v); + +#if TARGET_ABI_OPEN_VMS + if (cum->num_args < 6 + && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named)) + words = 6 - cum->num_args; +#elif TARGET_ABI_OSF + if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named)) + words = 6 - *cum; +#else +#error Unhandled ABI +#endif + + return words * UNITS_PER_WORD; +} + + +/* Return true if TYPE must be returned in memory, instead of in registers. */ + +static bool +alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +{ + enum machine_mode mode = VOIDmode; + int size; + + if (type) + { + mode = TYPE_MODE (type); + + /* All aggregates are returned in memory, except on OpenVMS where + records that fit 64 bits should be returned by immediate value + as required by section 3.8.7.1 of the OpenVMS Calling Standard. */ + if (TARGET_ABI_OPEN_VMS + && TREE_CODE (type) != ARRAY_TYPE + && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8) + return false; + + if (AGGREGATE_TYPE_P (type)) + return true; + } + + size = GET_MODE_SIZE (mode); + switch (GET_MODE_CLASS (mode)) + { + case MODE_VECTOR_FLOAT: + /* Pass all float vectors in memory, like an aggregate. */ + return true; + + case MODE_COMPLEX_FLOAT: + /* We judge complex floats on the size of their element, + not the size of the whole type. */ + size = GET_MODE_UNIT_SIZE (mode); + break; + + case MODE_INT: + case MODE_FLOAT: + case MODE_COMPLEX_INT: + case MODE_VECTOR_INT: + break; + + default: + /* ??? We get called on all sorts of random stuff from + aggregate_value_p. We must return something, but it's not + clear what's safe to return. Pretend it's a struct I + guess. */ + return true; + } + + /* Otherwise types must fit in one register. */ + return size > UNITS_PER_WORD; +} + +/* Return true if TYPE should be passed by invisible reference. */ + +static bool +alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, + enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + return mode == TFmode || mode == TCmode; +} + +/* Define how to find the value returned by a function. VALTYPE is the + data type of the value (as a tree). If the precise function being + called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0. + MODE is set instead of VALTYPE for libcalls. + + On Alpha the value is found in $0 for integer functions and + $f0 for floating-point functions. */ + +rtx +function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + unsigned int regnum, dummy ATTRIBUTE_UNUSED; + enum mode_class mclass; + + gcc_assert (!valtype || !alpha_return_in_memory (valtype, func)); + + if (valtype) + mode = TYPE_MODE (valtype); + + mclass = GET_MODE_CLASS (mode); + switch (mclass) + { + case MODE_INT: + /* Do the same thing as PROMOTE_MODE except for libcalls on VMS, + where we have them returning both SImode and DImode. */ + if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype))) + PROMOTE_MODE (mode, dummy, valtype); + /* FALLTHRU */ + + case MODE_COMPLEX_INT: + case MODE_VECTOR_INT: + regnum = 0; + break; + + case MODE_FLOAT: + regnum = 32; + break; + + case MODE_COMPLEX_FLOAT: + { + enum machine_mode cmode = GET_MODE_INNER (mode); + + return gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, + gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32), + const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33), + GEN_INT (GET_MODE_SIZE (cmode))))); + } + + case MODE_RANDOM: + /* We should only reach here for BLKmode on VMS. */ + gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode); + regnum = 0; + break; + + default: + gcc_unreachable (); + } + + return gen_rtx_REG (mode, regnum); +} + +/* TCmode complex values are passed by invisible reference. We + should not split these values. */ + +static bool +alpha_split_complex_arg (const_tree type) +{ + return TYPE_MODE (type) != TCmode; +} + +static tree +alpha_build_builtin_va_list (void) +{ + tree base, ofs, space, record, type_decl; + + if (TARGET_ABI_OPEN_VMS) + return ptr_type_node; + + record = (*lang_hooks.types.make_type) (RECORD_TYPE); + type_decl = build_decl (BUILTINS_LOCATION, + TYPE_DECL, get_identifier ("__va_list_tag"), record); + TYPE_STUB_DECL (record) = type_decl; + TYPE_NAME (record) = type_decl; + + /* C++? SET_IS_AGGR_TYPE (record, 1); */ + + /* Dummy field to prevent alignment warnings. */ + space = build_decl (BUILTINS_LOCATION, + FIELD_DECL, NULL_TREE, integer_type_node); + DECL_FIELD_CONTEXT (space) = record; + DECL_ARTIFICIAL (space) = 1; + DECL_IGNORED_P (space) = 1; + + ofs = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__offset"), + integer_type_node); + DECL_FIELD_CONTEXT (ofs) = record; + DECL_CHAIN (ofs) = space; + /* ??? This is a hack, __offset is marked volatile to prevent + DCE that confuses stdarg optimization and results in + gcc.c-torture/execute/stdarg-1.c failure. See PR 41089. */ + TREE_THIS_VOLATILE (ofs) = 1; + + base = build_decl (BUILTINS_LOCATION, + FIELD_DECL, get_identifier ("__base"), + ptr_type_node); + DECL_FIELD_CONTEXT (base) = record; + DECL_CHAIN (base) = ofs; + + TYPE_FIELDS (record) = base; + layout_type (record); + + va_list_gpr_counter_field = ofs; + return record; +} + +#if TARGET_ABI_OSF +/* Helper function for alpha_stdarg_optimize_hook. Skip over casts + and constant additions. */ + +static gimple +va_list_skip_additions (tree lhs) +{ + gimple stmt; + + for (;;) + { + enum tree_code code; + + stmt = SSA_NAME_DEF_STMT (lhs); + + if (gimple_code (stmt) == GIMPLE_PHI) + return stmt; + + if (!is_gimple_assign (stmt) + || gimple_assign_lhs (stmt) != lhs) + return NULL; + + if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME) + return stmt; + code = gimple_assign_rhs_code (stmt); + if (!CONVERT_EXPR_CODE_P (code) + && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR) + || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST + || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt)))) + return stmt; + + lhs = gimple_assign_rhs1 (stmt); + } +} + +/* Check if LHS = RHS statement is + LHS = *(ap.__base + ap.__offset + cst) + or + LHS = *(ap.__base + + ((ap.__offset + cst <= 47) + ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2). + If the former, indicate that GPR registers are needed, + if the latter, indicate that FPR registers are needed. + + Also look for LHS = (*ptr).field, where ptr is one of the forms + listed above. + + On alpha, cfun->va_list_gpr_size is used as size of the needed + regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR + registers are needed and bit 1 set if FPR registers are needed. + Return true if va_list references should not be scanned for the + current statement. */ + +static bool +alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt) +{ + tree base, offset, rhs; + int offset_arg = 1; + gimple base_stmt; + + if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) + != GIMPLE_SINGLE_RHS) + return false; + + rhs = gimple_assign_rhs1 (stmt); + while (handled_component_p (rhs)) + rhs = TREE_OPERAND (rhs, 0); + if (TREE_CODE (rhs) != MEM_REF + || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME) + return false; + + stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0)); + if (stmt == NULL + || !is_gimple_assign (stmt) + || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR) + return false; + + base = gimple_assign_rhs1 (stmt); + if (TREE_CODE (base) == SSA_NAME) + { + base_stmt = va_list_skip_additions (base); + if (base_stmt + && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } + + if (TREE_CODE (base) != COMPONENT_REF + || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) + { + base = gimple_assign_rhs2 (stmt); + if (TREE_CODE (base) == SSA_NAME) + { + base_stmt = va_list_skip_additions (base); + if (base_stmt + && is_gimple_assign (base_stmt) + && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF) + base = gimple_assign_rhs1 (base_stmt); + } + + if (TREE_CODE (base) != COMPONENT_REF + || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node)) + return false; + + offset_arg = 0; + } + + base = get_base_address (base); + if (TREE_CODE (base) != VAR_DECL + || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names)) + return false; + + offset = gimple_op (stmt, 1 + offset_arg); + if (TREE_CODE (offset) == SSA_NAME) + { + gimple offset_stmt = va_list_skip_additions (offset); + + if (offset_stmt + && gimple_code (offset_stmt) == GIMPLE_PHI) + { + HOST_WIDE_INT sub; + gimple arg1_stmt, arg2_stmt; + tree arg1, arg2; + enum tree_code code1, code2; + + if (gimple_phi_num_args (offset_stmt) != 2) + goto escapes; + + arg1_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0)); + arg2_stmt + = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1)); + if (arg1_stmt == NULL + || !is_gimple_assign (arg1_stmt) + || arg2_stmt == NULL + || !is_gimple_assign (arg2_stmt)) + goto escapes; + + code1 = gimple_assign_rhs_code (arg1_stmt); + code2 = gimple_assign_rhs_code (arg2_stmt); + if (code1 == COMPONENT_REF + && (code2 == MINUS_EXPR || code2 == PLUS_EXPR)) + /* Do nothing. */; + else if (code2 == COMPONENT_REF + && (code1 == MINUS_EXPR || code1 == PLUS_EXPR)) + { + gimple tem = arg1_stmt; + code2 = code1; + arg1_stmt = arg2_stmt; + arg2_stmt = tem; + } + else + goto escapes; + + if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt))) + goto escapes; + + sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt)); + if (code2 == MINUS_EXPR) + sub = -sub; + if (sub < -48 || sub > -32) + goto escapes; + + arg1 = gimple_assign_rhs1 (arg1_stmt); + arg2 = gimple_assign_rhs1 (arg2_stmt); + if (TREE_CODE (arg2) == SSA_NAME) + { + arg2_stmt = va_list_skip_additions (arg2); + if (arg2_stmt == NULL + || !is_gimple_assign (arg2_stmt) + || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF) + goto escapes; + arg2 = gimple_assign_rhs1 (arg2_stmt); + } + if (arg1 != arg2) + goto escapes; + + if (TREE_CODE (arg1) != COMPONENT_REF + || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field + || get_base_address (arg1) != base) + goto escapes; + + /* Need floating point regs. */ + cfun->va_list_fpr_size |= 2; + return false; + } + if (offset_stmt + && is_gimple_assign (offset_stmt) + && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF) + offset = gimple_assign_rhs1 (offset_stmt); + } + if (TREE_CODE (offset) != COMPONENT_REF + || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field + || get_base_address (offset) != base) + goto escapes; + else + /* Need general regs. */ + cfun->va_list_fpr_size |= 1; + return false; + +escapes: + si->va_list_escapes = true; + return false; +} +#endif + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. */ + +static void +alpha_setup_incoming_varargs (cumulative_args_t pcum, enum machine_mode mode, + tree type, int *pretend_size, int no_rtl) +{ + CUMULATIVE_ARGS cum = *get_cumulative_args (pcum); + + /* Skip the current argument. */ + targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type, + true); + +#if TARGET_ABI_OPEN_VMS + /* For VMS, we allocate space for all 6 arg registers plus a count. + + However, if NO registers need to be saved, don't allocate any space. + This is not only because we won't need the space, but because AP + includes the current_pretend_args_size and we don't want to mess up + any ap-relative addresses already made. */ + if (cum.num_args < 6) + { + if (!no_rtl) + { + emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx); + emit_insn (gen_arg_home ()); + } + *pretend_size = 7 * UNITS_PER_WORD; + } +#else + /* On OSF/1 and friends, we allocate space for all 12 arg registers, but + only push those that are remaining. However, if NO registers need to + be saved, don't allocate any space. This is not only because we won't + need the space, but because AP includes the current_pretend_args_size + and we don't want to mess up any ap-relative addresses already made. + + If we are not to use the floating-point registers, save the integer + registers where we would put the floating-point registers. This is + not the most efficient way to implement varargs with just one register + class, but it isn't worth doing anything more efficient in this rare + case. */ + if (cum >= 6) + return; + + if (!no_rtl) + { + int count; + alias_set_type set = get_varargs_alias_set (); + rtx tmp; + + count = cfun->va_list_gpr_size / UNITS_PER_WORD; + if (count > 6 - cum) + count = 6 - cum; + + /* Detect whether integer registers or floating-point registers + are needed by the detected va_arg statements. See above for + how these values are computed. Note that the "escape" value + is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of + these bits set. */ + gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3); + + if (cfun->va_list_fpr_size & 1) + { + tmp = gen_rtx_MEM (BLKmode, + plus_constant (Pmode, virtual_incoming_args_rtx, + (cum + 6) * UNITS_PER_WORD)); + MEM_NOTRAP_P (tmp) = 1; + set_mem_alias_set (tmp, set); + move_block_from_reg (16 + cum, tmp, count); + } + + if (cfun->va_list_fpr_size & 2) + { + tmp = gen_rtx_MEM (BLKmode, + plus_constant (Pmode, virtual_incoming_args_rtx, + cum * UNITS_PER_WORD)); + MEM_NOTRAP_P (tmp) = 1; + set_mem_alias_set (tmp, set); + move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count); + } + } + *pretend_size = 12 * UNITS_PER_WORD; +#endif +} + +static void +alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT offset; + tree t, offset_field, base_field; + + if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK) + return; + + /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base + up by 48, storing fp arg registers in the first 48 bytes, and the + integer arg registers in the next 48 bytes. This is only done, + however, if any integer registers need to be stored. + + If no integer registers need be stored, then we must subtract 48 + in order to account for the integer arg registers which are counted + in argsize above, but which are not actually stored on the stack. + Must further be careful here about structures straddling the last + integer argument register; that futzes with pretend_args_size, + which changes the meaning of AP. */ + + if (NUM_ARGS < 6) + offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD; + else + offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size; + + if (TARGET_ABI_OPEN_VMS) + { + t = make_tree (ptr_type_node, virtual_incoming_args_rtx); + t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD); + t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + else + { + base_field = TYPE_FIELDS (TREE_TYPE (valist)); + offset_field = DECL_CHAIN (base_field); + + base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), + valist, base_field, NULL_TREE); + offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), + valist, offset_field, NULL_TREE); + + t = make_tree (ptr_type_node, virtual_incoming_args_rtx); + t = fold_build_pointer_plus_hwi (t, offset); + t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + + t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD); + t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } +} + +static tree +alpha_gimplify_va_arg_1 (tree type, tree base, tree offset, + gimple_seq *pre_p) +{ + tree type_size, ptr_type, addend, t, addr; + gimple_seq internal_post; + + /* If the type could not be passed in registers, skip the block + reserved for the registers. */ + if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type)) + { + t = build_int_cst (TREE_TYPE (offset), 6*8); + gimplify_assign (offset, + build2 (MAX_EXPR, TREE_TYPE (offset), offset, t), + pre_p); + } + + addend = offset; + ptr_type = build_pointer_type_for_mode (type, ptr_mode, true); + + if (TREE_CODE (type) == COMPLEX_TYPE) + { + tree real_part, imag_part, real_temp; + + real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, + offset, pre_p); + + /* Copy the value into a new temporary, lest the formal temporary + be reused out from under us. */ + real_temp = get_initialized_tmp_var (real_part, pre_p, NULL); + + imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base, + offset, pre_p); + + return build2 (COMPLEX_EXPR, type, real_temp, imag_part); + } + else if (TREE_CODE (type) == REAL_TYPE) + { + tree fpaddend, cond, fourtyeight; + + fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8); + fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend), + addend, fourtyeight); + cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight); + addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, + fpaddend, addend); + } + + /* Build the final address and force that value into a temporary. */ + addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend); + internal_post = NULL; + gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue); + gimple_seq_add_seq (pre_p, internal_post); + + /* Update the offset field. */ + type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type)); + if (type_size == NULL || TREE_OVERFLOW (type_size)) + t = size_zero_node; + else + { + t = size_binop (PLUS_EXPR, type_size, size_int (7)); + t = size_binop (TRUNC_DIV_EXPR, t, size_int (8)); + t = size_binop (MULT_EXPR, t, size_int (8)); + } + t = fold_convert (TREE_TYPE (offset), t); + gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t), + pre_p); + + return build_va_arg_indirect_ref (addr); +} + +static tree +alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + tree offset_field, base_field, offset, base, t, r; + bool indirect; + + if (TARGET_ABI_OPEN_VMS) + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + + base_field = TYPE_FIELDS (va_list_type_node); + offset_field = DECL_CHAIN (base_field); + base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), + valist, base_field, NULL_TREE); + offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), + valist, offset_field, NULL_TREE); + + /* Pull the fields of the structure out into temporaries. Since we never + modify the base field, we can use a formal temporary. Sign-extend the + offset field so that it's the proper width for pointer arithmetic. */ + base = get_formal_tmp_var (base_field, pre_p); + + t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); + offset = get_initialized_tmp_var (t, pre_p, NULL); + + indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); + if (indirect) + type = build_pointer_type_for_mode (type, ptr_mode, true); + + /* Find the value. Note that this will be a stable indirection, or + a composite of stable indirections in the case of complex. */ + r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p); + + /* Stuff the offset temporary back into its field. */ + gimplify_assign (unshare_expr (offset_field), + fold_convert (TREE_TYPE (offset_field), offset), pre_p); + + if (indirect) + r = build_va_arg_indirect_ref (r); + + return r; +} + +/* Builtins. */ + +enum alpha_builtin +{ + ALPHA_BUILTIN_CMPBGE, + ALPHA_BUILTIN_EXTBL, + ALPHA_BUILTIN_EXTWL, + ALPHA_BUILTIN_EXTLL, + ALPHA_BUILTIN_EXTQL, + ALPHA_BUILTIN_EXTWH, + ALPHA_BUILTIN_EXTLH, + ALPHA_BUILTIN_EXTQH, + ALPHA_BUILTIN_INSBL, + ALPHA_BUILTIN_INSWL, + ALPHA_BUILTIN_INSLL, + ALPHA_BUILTIN_INSQL, + ALPHA_BUILTIN_INSWH, + ALPHA_BUILTIN_INSLH, + ALPHA_BUILTIN_INSQH, + ALPHA_BUILTIN_MSKBL, + ALPHA_BUILTIN_MSKWL, + ALPHA_BUILTIN_MSKLL, + ALPHA_BUILTIN_MSKQL, + ALPHA_BUILTIN_MSKWH, + ALPHA_BUILTIN_MSKLH, + ALPHA_BUILTIN_MSKQH, + ALPHA_BUILTIN_UMULH, + ALPHA_BUILTIN_ZAP, + ALPHA_BUILTIN_ZAPNOT, + ALPHA_BUILTIN_AMASK, + ALPHA_BUILTIN_IMPLVER, + ALPHA_BUILTIN_RPCC, + ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, + ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, + + /* TARGET_MAX */ + ALPHA_BUILTIN_MINUB8, + ALPHA_BUILTIN_MINSB8, + ALPHA_BUILTIN_MINUW4, + ALPHA_BUILTIN_MINSW4, + ALPHA_BUILTIN_MAXUB8, + ALPHA_BUILTIN_MAXSB8, + ALPHA_BUILTIN_MAXUW4, + ALPHA_BUILTIN_MAXSW4, + ALPHA_BUILTIN_PERR, + ALPHA_BUILTIN_PKLB, + ALPHA_BUILTIN_PKWB, + ALPHA_BUILTIN_UNPKBL, + ALPHA_BUILTIN_UNPKBW, + + /* TARGET_CIX */ + ALPHA_BUILTIN_CTTZ, + ALPHA_BUILTIN_CTLZ, + ALPHA_BUILTIN_CTPOP, + + ALPHA_BUILTIN_max +}; + +static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = { + CODE_FOR_builtin_cmpbge, + CODE_FOR_extbl, + CODE_FOR_extwl, + CODE_FOR_extll, + CODE_FOR_extql, + CODE_FOR_extwh, + CODE_FOR_extlh, + CODE_FOR_extqh, + CODE_FOR_builtin_insbl, + CODE_FOR_builtin_inswl, + CODE_FOR_builtin_insll, + CODE_FOR_insql, + CODE_FOR_inswh, + CODE_FOR_inslh, + CODE_FOR_insqh, + CODE_FOR_mskbl, + CODE_FOR_mskwl, + CODE_FOR_mskll, + CODE_FOR_mskql, + CODE_FOR_mskwh, + CODE_FOR_msklh, + CODE_FOR_mskqh, + CODE_FOR_umuldi3_highpart, + CODE_FOR_builtin_zap, + CODE_FOR_builtin_zapnot, + CODE_FOR_builtin_amask, + CODE_FOR_builtin_implver, + CODE_FOR_builtin_rpcc, + CODE_FOR_builtin_establish_vms_condition_handler, + CODE_FOR_builtin_revert_vms_condition_handler, + + /* TARGET_MAX */ + CODE_FOR_builtin_minub8, + CODE_FOR_builtin_minsb8, + CODE_FOR_builtin_minuw4, + CODE_FOR_builtin_minsw4, + CODE_FOR_builtin_maxub8, + CODE_FOR_builtin_maxsb8, + CODE_FOR_builtin_maxuw4, + CODE_FOR_builtin_maxsw4, + CODE_FOR_builtin_perr, + CODE_FOR_builtin_pklb, + CODE_FOR_builtin_pkwb, + CODE_FOR_builtin_unpkbl, + CODE_FOR_builtin_unpkbw, + + /* TARGET_CIX */ + CODE_FOR_ctzdi2, + CODE_FOR_clzdi2, + CODE_FOR_popcountdi2 +}; + +struct alpha_builtin_def +{ + const char *name; + enum alpha_builtin code; + unsigned int target_mask; + bool is_const; +}; + +static struct alpha_builtin_def const zero_arg_builtins[] = { + { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true }, + { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false } +}; + +static struct alpha_builtin_def const one_arg_builtins[] = { + { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true }, + { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true }, + { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true }, + { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true }, + { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true }, + { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true }, + { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true }, + { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true } +}; + +static struct alpha_builtin_def const two_arg_builtins[] = { + { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true }, + { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true }, + { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true }, + { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true }, + { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true }, + { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true }, + { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true }, + { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true }, + { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true }, + { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true }, + { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true }, + { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true }, + { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true }, + { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true }, + { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true }, + { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true }, + { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true }, + { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true }, + { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true }, + { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true }, + { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true }, + { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true }, + { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true }, + { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true }, + { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true }, + { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true }, + { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true }, + { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true }, + { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true }, + { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true }, + { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true }, + { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true }, + { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true }, + { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true } +}; + +static GTY(()) tree alpha_dimode_u; +static GTY(()) tree alpha_v8qi_u; +static GTY(()) tree alpha_v8qi_s; +static GTY(()) tree alpha_v4hi_u; +static GTY(()) tree alpha_v4hi_s; + +static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max]; + +/* Return the alpha builtin for CODE. */ + +static tree +alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= ALPHA_BUILTIN_max) + return error_mark_node; + return alpha_builtins[code]; +} + +/* Helper function of alpha_init_builtins. Add the built-in specified + by NAME, TYPE, CODE, and ECF. */ + +static void +alpha_builtin_function (const char *name, tree ftype, + enum alpha_builtin code, unsigned ecf) +{ + tree decl = add_builtin_function (name, ftype, (int) code, + BUILT_IN_MD, NULL, NULL_TREE); + + if (ecf & ECF_CONST) + TREE_READONLY (decl) = 1; + if (ecf & ECF_NOTHROW) + TREE_NOTHROW (decl) = 1; + + alpha_builtins [(int) code] = decl; +} + +/* Helper function of alpha_init_builtins. Add the COUNT built-in + functions pointed to by P, with function type FTYPE. */ + +static void +alpha_add_builtins (const struct alpha_builtin_def *p, size_t count, + tree ftype) +{ + size_t i; + + for (i = 0; i < count; ++i, ++p) + if ((target_flags & p->target_mask) == p->target_mask) + alpha_builtin_function (p->name, ftype, p->code, + (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW); +} + +static void +alpha_init_builtins (void) +{ + tree ftype; + + alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1); + alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8); + alpha_v8qi_s = build_vector_type (intQI_type_node, 8); + alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4); + alpha_v4hi_s = build_vector_type (intHI_type_node, 4); + + ftype = build_function_type_list (alpha_dimode_u, NULL_TREE); + alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype); + + ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE); + alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype); + + ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, + alpha_dimode_u, NULL_TREE); + alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype); + + if (TARGET_ABI_OPEN_VMS) + { + ftype = build_function_type_list (ptr_type_node, ptr_type_node, + NULL_TREE); + alpha_builtin_function ("__builtin_establish_vms_condition_handler", + ftype, + ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER, + 0); + + ftype = build_function_type_list (ptr_type_node, void_type_node, + NULL_TREE); + alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype, + ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0); + + vms_patch_builtins (); + } +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +alpha_expand_builtin (tree exp, rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ +#define MAX_ARGS 2 + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg; + call_expr_arg_iterator iter; + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + + if (fcode >= ALPHA_BUILTIN_max) + internal_error ("bad builtin fcode"); + icode = code_for_builtin[fcode]; + if (icode == 0) + internal_error ("bad builtin fcode"); + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + arity = 0; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + const struct insn_operand_data *insn_op; + + if (arg == error_mark_node) + return NULL_RTX; + if (arity > MAX_ARGS) + return NULL_RTX; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL); + + if (!(*insn_op->predicate) (op[arity], insn_op->mode)) + op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); + arity++; + } + + if (nonvoid) + { + enum machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + + +/* Several bits below assume HWI >= 64 bits. This should be enforced + by config.gcc. */ +#if HOST_BITS_PER_WIDE_INT < 64 +# error "HOST_WIDE_INT too small" +#endif + +/* Fold the builtin for the CMPBGE instruction. This is a vector comparison + with an 8-bit output vector. OPINT contains the integer operands; bit N + of OP_CONST is set if OPINT[N] is valid. */ + +static tree +alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const) +{ + if (op_const == 3) + { + int i, val; + for (i = 0, val = 0; i < 8; ++i) + { + unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff; + unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff; + if (c0 >= c1) + val |= 1 << i; + } + return build_int_cst (alpha_dimode_u, val); + } + else if (op_const == 2 && opint[1] == 0) + return build_int_cst (alpha_dimode_u, 0xff); + return NULL; +} + +/* Fold the builtin for the ZAPNOT instruction. This is essentially a + specialized form of an AND operation. Other byte manipulation instructions + are defined in terms of this instruction, so this is also used as a + subroutine for other builtins. + + OP contains the tree operands; OPINT contains the extracted integer values. + Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only + OPINT may be considered. */ + +static tree +alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[], + long op_const) +{ + if (op_const & 2) + { + unsigned HOST_WIDE_INT mask = 0; + int i; + + for (i = 0; i < 8; ++i) + if ((opint[1] >> i) & 1) + mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8); + + if (op_const & 1) + return build_int_cst (alpha_dimode_u, opint[0] & mask); + + if (op) + return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0], + build_int_cst (alpha_dimode_u, mask)); + } + else if ((op_const & 1) && opint[0] == 0) + return build_int_cst (alpha_dimode_u, 0); + return NULL; +} + +/* Fold the builtins for the EXT family of instructions. */ + +static tree +alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + long zap_const = 2; + tree *zap_op = NULL; + + if (op_const & 2) + { + unsigned HOST_WIDE_INT loc; + + loc = opint[1] & 7; + loc *= BITS_PER_UNIT; + + if (loc != 0) + { + if (op_const & 1) + { + unsigned HOST_WIDE_INT temp = opint[0]; + if (is_high) + temp <<= loc; + else + temp >>= loc; + opint[0] = temp; + zap_const = 3; + } + } + else + zap_op = op; + } + + opint[1] = bytemask; + return alpha_fold_builtin_zapnot (zap_op, opint, zap_const); +} + +/* Fold the builtins for the INS family of instructions. */ + +static tree +alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + if ((op_const & 1) && opint[0] == 0) + return build_int_cst (alpha_dimode_u, 0); + + if (op_const & 2) + { + unsigned HOST_WIDE_INT temp, loc, byteloc; + tree *zap_op = NULL; + + loc = opint[1] & 7; + bytemask <<= loc; + + temp = opint[0]; + if (is_high) + { + byteloc = (64 - (loc * 8)) & 0x3f; + if (byteloc == 0) + zap_op = op; + else + temp >>= byteloc; + bytemask >>= 8; + } + else + { + byteloc = loc * 8; + if (byteloc == 0) + zap_op = op; + else + temp <<= byteloc; + } + + opint[0] = temp; + opint[1] = bytemask; + return alpha_fold_builtin_zapnot (zap_op, opint, op_const); + } + + return NULL; +} + +static tree +alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[], + long op_const, unsigned HOST_WIDE_INT bytemask, + bool is_high) +{ + if (op_const & 2) + { + unsigned HOST_WIDE_INT loc; + + loc = opint[1] & 7; + bytemask <<= loc; + + if (is_high) + bytemask >>= 8; + + opint[1] = bytemask ^ 0xff; + } + + return alpha_fold_builtin_zapnot (op, opint, op_const); +} + +static tree +alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype) +{ + tree op0 = fold_convert (vtype, op[0]); + tree op1 = fold_convert (vtype, op[1]); + tree val = fold_build2 (code, vtype, op0, op1); + return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val); +} + +static tree +alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp = 0; + int i; + + if (op_const != 3) + return NULL; + + for (i = 0; i < 8; ++i) + { + unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff; + unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff; + if (a >= b) + temp += a - b; + else + temp += b - a; + } + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] >> 24) & 0xff00; + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] >> 8) & 0xff00; + temp |= (opint[0] >> 16) & 0xff0000; + temp |= (opint[0] >> 24) & 0xff000000; + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] & 0xff00) << 24; + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + temp = opint[0] & 0xff; + temp |= (opint[0] & 0x0000ff00) << 8; + temp |= (opint[0] & 0x00ff0000) << 16; + temp |= (opint[0] & 0xff000000) << 24; + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + if (opint[0] == 0) + temp = 64; + else + temp = exact_log2 (opint[0] & -opint[0]); + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp; + + if (op_const == 0) + return NULL; + + if (opint[0] == 0) + temp = 64; + else + temp = 64 - floor_log2 (opint[0]) - 1; + + return build_int_cst (alpha_dimode_u, temp); +} + +static tree +alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const) +{ + unsigned HOST_WIDE_INT temp, op; + + if (op_const == 0) + return NULL; + + op = opint[0]; + temp = 0; + while (op) + temp++, op &= op - 1; + + return build_int_cst (alpha_dimode_u, temp); +} + +/* Fold one of our builtin functions. */ + +static tree +alpha_fold_builtin (tree fndecl, int n_args, tree *op, + bool ignore ATTRIBUTE_UNUSED) +{ + unsigned HOST_WIDE_INT opint[MAX_ARGS]; + long op_const = 0; + int i; + + if (n_args > MAX_ARGS) + return NULL; + + for (i = 0; i < n_args; i++) + { + tree arg = op[i]; + if (arg == error_mark_node) + return NULL; + + opint[i] = 0; + if (TREE_CODE (arg) == INTEGER_CST) + { + op_const |= 1L << i; + opint[i] = int_cst_value (arg); + } + } + + switch (DECL_FUNCTION_CODE (fndecl)) + { + case ALPHA_BUILTIN_CMPBGE: + return alpha_fold_builtin_cmpbge (opint, op_const); + + case ALPHA_BUILTIN_EXTBL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false); + case ALPHA_BUILTIN_EXTWL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false); + case ALPHA_BUILTIN_EXTLL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false); + case ALPHA_BUILTIN_EXTQL: + return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false); + case ALPHA_BUILTIN_EXTWH: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true); + case ALPHA_BUILTIN_EXTLH: + return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true); + case ALPHA_BUILTIN_EXTQH: + return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true); + + case ALPHA_BUILTIN_INSBL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false); + case ALPHA_BUILTIN_INSWL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false); + case ALPHA_BUILTIN_INSLL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false); + case ALPHA_BUILTIN_INSQL: + return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false); + case ALPHA_BUILTIN_INSWH: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true); + case ALPHA_BUILTIN_INSLH: + return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true); + case ALPHA_BUILTIN_INSQH: + return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true); + + case ALPHA_BUILTIN_MSKBL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false); + case ALPHA_BUILTIN_MSKWL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false); + case ALPHA_BUILTIN_MSKLL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false); + case ALPHA_BUILTIN_MSKQL: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false); + case ALPHA_BUILTIN_MSKWH: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true); + case ALPHA_BUILTIN_MSKLH: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true); + case ALPHA_BUILTIN_MSKQH: + return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true); + + case ALPHA_BUILTIN_UMULH: + return fold_build2 (MULT_HIGHPART_EXPR, alpha_dimode_u, op[0], op[1]); + + case ALPHA_BUILTIN_ZAP: + opint[1] ^= 0xff; + /* FALLTHRU */ + case ALPHA_BUILTIN_ZAPNOT: + return alpha_fold_builtin_zapnot (op, opint, op_const); + + case ALPHA_BUILTIN_MINUB8: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u); + case ALPHA_BUILTIN_MINSB8: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s); + case ALPHA_BUILTIN_MINUW4: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u); + case ALPHA_BUILTIN_MINSW4: + return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s); + case ALPHA_BUILTIN_MAXUB8: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u); + case ALPHA_BUILTIN_MAXSB8: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s); + case ALPHA_BUILTIN_MAXUW4: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u); + case ALPHA_BUILTIN_MAXSW4: + return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s); + + case ALPHA_BUILTIN_PERR: + return alpha_fold_builtin_perr (opint, op_const); + case ALPHA_BUILTIN_PKLB: + return alpha_fold_builtin_pklb (opint, op_const); + case ALPHA_BUILTIN_PKWB: + return alpha_fold_builtin_pkwb (opint, op_const); + case ALPHA_BUILTIN_UNPKBL: + return alpha_fold_builtin_unpkbl (opint, op_const); + case ALPHA_BUILTIN_UNPKBW: + return alpha_fold_builtin_unpkbw (opint, op_const); + + case ALPHA_BUILTIN_CTTZ: + return alpha_fold_builtin_cttz (opint, op_const); + case ALPHA_BUILTIN_CTLZ: + return alpha_fold_builtin_ctlz (opint, op_const); + case ALPHA_BUILTIN_CTPOP: + return alpha_fold_builtin_ctpop (opint, op_const); + + case ALPHA_BUILTIN_AMASK: + case ALPHA_BUILTIN_IMPLVER: + case ALPHA_BUILTIN_RPCC: + /* None of these are foldable at compile-time. */ + default: + return NULL; + } +} + +/* This page contains routines that are used to determine what the function + prologue and epilogue code will do and write them out. */ + +/* Compute the size of the save area in the stack. */ + +/* These variables are used for communication between the following functions. + They indicate various things about the current function being compiled + that are used to tell what kind of prologue, epilogue and procedure + descriptor to generate. */ + +/* Nonzero if we need a stack procedure. */ +enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2}; +static enum alpha_procedure_types alpha_procedure_type; + +/* Register number (either FP or SP) that is used to unwind the frame. */ +static int vms_unwind_regno; + +/* Register number used to save FP. We need not have one for RA since + we don't modify it for register procedures. This is only defined + for register frame procedures. */ +static int vms_save_fp_regno; + +/* Register number used to reference objects off our PV. */ +static int vms_base_regno; + +/* Compute register masks for saved registers. */ + +static void +alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP) +{ + unsigned long imask = 0; + unsigned long fmask = 0; + unsigned int i; + + /* When outputting a thunk, we don't have valid register life info, + but assemble_start_function wants to output .frame and .mask + directives. */ + if (cfun->is_thunk) + { + *imaskP = 0; + *fmaskP = 0; + return; + } + + if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) + imask |= (1UL << HARD_FRAME_POINTER_REGNUM); + + /* One for every register we have to save. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (! fixed_regs[i] && ! call_used_regs[i] + && df_regs_ever_live_p (i) && i != REG_RA) + { + if (i < 32) + imask |= (1UL << i); + else + fmask |= (1UL << (i - 32)); + } + + /* We need to restore these for the handler. */ + if (crtl->calls_eh_return) + { + for (i = 0; ; ++i) + { + unsigned regno = EH_RETURN_DATA_REGNO (i); + if (regno == INVALID_REGNUM) + break; + imask |= 1UL << regno; + } + } + + /* If any register spilled, then spill the return address also. */ + /* ??? This is required by the Digital stack unwind specification + and isn't needed if we're doing Dwarf2 unwinding. */ + if (imask || fmask || alpha_ra_ever_killed ()) + imask |= (1UL << REG_RA); + + *imaskP = imask; + *fmaskP = fmask; +} + +int +alpha_sa_size (void) +{ + unsigned long mask[2]; + int sa_size = 0; + int i, j; + + alpha_sa_mask (&mask[0], &mask[1]); + + for (j = 0; j < 2; ++j) + for (i = 0; i < 32; ++i) + if ((mask[j] >> i) & 1) + sa_size++; + + if (TARGET_ABI_OPEN_VMS) + { + /* Start with a stack procedure if we make any calls (REG_RA used), or + need a frame pointer, with a register procedure if we otherwise need + at least a slot, and with a null procedure in other cases. */ + if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed) + alpha_procedure_type = PT_STACK; + else if (get_frame_size() != 0) + alpha_procedure_type = PT_REGISTER; + else + alpha_procedure_type = PT_NULL; + + /* Don't reserve space for saving FP & RA yet. Do that later after we've + made the final decision on stack procedure vs register procedure. */ + if (alpha_procedure_type == PT_STACK) + sa_size -= 2; + + /* Decide whether to refer to objects off our PV via FP or PV. + If we need FP for something else or if we receive a nonlocal + goto (which expects PV to contain the value), we must use PV. + Otherwise, start by assuming we can use FP. */ + + vms_base_regno + = (frame_pointer_needed + || cfun->has_nonlocal_label + || alpha_procedure_type == PT_STACK + || crtl->outgoing_args_size) + ? REG_PV : HARD_FRAME_POINTER_REGNUM; + + /* If we want to copy PV into FP, we need to find some register + in which to save FP. */ + + vms_save_fp_regno = -1; + if (vms_base_regno == HARD_FRAME_POINTER_REGNUM) + for (i = 0; i < 32; i++) + if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i)) + vms_save_fp_regno = i; + + /* A VMS condition handler requires a stack procedure in our + implementation. (not required by the calling standard). */ + if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER) + || cfun->machine->uses_condition_handler) + vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK; + else if (alpha_procedure_type == PT_NULL) + vms_base_regno = REG_PV; + + /* Stack unwinding should be done via FP unless we use it for PV. */ + vms_unwind_regno = (vms_base_regno == REG_PV + ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM); + + /* If this is a stack procedure, allow space for saving FP, RA and + a condition handler slot if needed. */ + if (alpha_procedure_type == PT_STACK) + sa_size += 2 + cfun->machine->uses_condition_handler; + } + else + { + /* Our size must be even (multiple of 16 bytes). */ + if (sa_size & 1) + sa_size++; + } + + return sa_size * 8; +} + +/* Define the offset between two registers, one to be eliminated, + and the other its replacement, at the start of a routine. */ + +HOST_WIDE_INT +alpha_initial_elimination_offset (unsigned int from, + unsigned int to ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT ret; + + ret = alpha_sa_size (); + ret += ALPHA_ROUND (crtl->outgoing_args_size); + + switch (from) + { + case FRAME_POINTER_REGNUM: + break; + + case ARG_POINTER_REGNUM: + ret += (ALPHA_ROUND (get_frame_size () + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size); + break; + + default: + gcc_unreachable (); + } + + return ret; +} + +#if TARGET_ABI_OPEN_VMS + +/* Worker function for TARGET_CAN_ELIMINATE. */ + +static bool +alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + /* We need the alpha_procedure_type to decide. Evaluate it now. */ + alpha_sa_size (); + + switch (alpha_procedure_type) + { + case PT_NULL: + /* NULL procedures have no frame of their own and we only + know how to resolve from the current stack pointer. */ + return to == STACK_POINTER_REGNUM; + + case PT_REGISTER: + case PT_STACK: + /* We always eliminate except to the stack pointer if there is no + usable frame pointer at hand. */ + return (to != STACK_POINTER_REGNUM + || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM); + } + + gcc_unreachable (); +} + +/* FROM is to be eliminated for TO. Return the offset so that TO+offset + designates the same location as FROM. */ + +HOST_WIDE_INT +alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to) +{ + /* The only possible attempts we ever expect are ARG or FRAME_PTR to + HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide + on the proper computations and will need the register save area size + in most cases. */ + + HOST_WIDE_INT sa_size = alpha_sa_size (); + + /* PT_NULL procedures have no frame of their own and we only allow + elimination to the stack pointer. This is the argument pointer and we + resolve the soft frame pointer to that as well. */ + + if (alpha_procedure_type == PT_NULL) + return 0; + + /* For a PT_STACK procedure the frame layout looks as follows + + -----> decreasing addresses + + < size rounded up to 16 | likewise > + --------------#------------------------------+++--------------+++-------# + incoming args # pretended args | "frame" | regs sa | PV | outgoing args # + --------------#---------------------------------------------------------# + ^ ^ ^ ^ + ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR + + + PT_REGISTER procedures are similar in that they may have a frame of their + own. They have no regs-sa/pv/outgoing-args area. + + We first compute offset to HARD_FRAME_PTR, then add what we need to get + to STACK_PTR if need be. */ + + { + HOST_WIDE_INT offset; + HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0; + + switch (from) + { + case FRAME_POINTER_REGNUM: + offset = ALPHA_ROUND (sa_size + pv_save_size); + break; + case ARG_POINTER_REGNUM: + offset = (ALPHA_ROUND (sa_size + pv_save_size + + get_frame_size () + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size); + break; + default: + gcc_unreachable (); + } + + if (to == STACK_POINTER_REGNUM) + offset += ALPHA_ROUND (crtl->outgoing_args_size); + + return offset; + } +} + +#define COMMON_OBJECT "common_object" + +static tree +common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs ATTRIBUTE_UNUSED) +{ + tree decl = *node; + gcc_assert (DECL_P (decl)); + + DECL_COMMON (decl) = 1; + return NULL_TREE; +} + +static const struct attribute_spec vms_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; + +void +vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + tree attr = DECL_ATTRIBUTES (decl); + fprintf (file, "%s", COMMON_ASM_OP); + assemble_name (file, name); + fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size); + /* ??? Unlike on OSF/1, the alignment factor is not in log units. */ + fprintf (file, ",%u", align / BITS_PER_UNIT); + if (attr) + { + attr = lookup_attribute (COMMON_OBJECT, attr); + if (attr) + fprintf (file, ",%s", + IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr)))); + } + fputc ('\n', file); +} + +#undef COMMON_OBJECT + +#endif + +static int +find_lo_sum_using_gp (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx; +} + +int +alpha_find_lo_sum_using_gp (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0; +} + +static int +alpha_does_function_need_gp (void) +{ + rtx insn; + + /* The GP being variable is an OSF abi thing. */ + if (! TARGET_ABI_OSF) + return 0; + + /* We need the gp to load the address of __mcount. */ + if (TARGET_PROFILING_NEEDS_GP && crtl->profile) + return 1; + + /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */ + if (cfun->is_thunk) + return 1; + + /* The nonlocal receiver pattern assumes that the gp is valid for + the nested function. Reasonable because it's almost always set + correctly already. For the cases where that's wrong, make sure + the nested function loads its gp on entry. */ + if (crtl->has_nonlocal_goto) + return 1; + + /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first. + Even if we are a static function, we still need to do this in case + our address is taken and passed to something like qsort. */ + + push_topmost_sequence (); + insn = get_insns (); + pop_topmost_sequence (); + + for (; insn; insn = NEXT_INSN (insn)) + if (NONDEBUG_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && get_attr_usegp (insn)) + return 1; + + return 0; +} + + +/* Helper function to set RTX_FRAME_RELATED_P on instructions, including + sequences. */ + +static rtx +set_frame_related_p (void) +{ + rtx seq = get_insns (); + rtx insn; + + end_sequence (); + + if (!seq) + return NULL_RTX; + + if (INSN_P (seq)) + { + insn = seq; + while (insn != NULL_RTX) + { + RTX_FRAME_RELATED_P (insn) = 1; + insn = NEXT_INSN (insn); + } + seq = emit_insn (seq); + } + else + { + seq = emit_insn (seq); + RTX_FRAME_RELATED_P (seq) = 1; + } + return seq; +} + +#define FRP(exp) (start_sequence (), exp, set_frame_related_p ()) + +/* Generates a store with the proper unwind info attached. VALUE is + stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG + contains SP+FRAME_BIAS, and that is the unwind info that should be + generated. If FRAME_REG != VALUE, then VALUE is being stored on + behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */ + +static void +emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias, + HOST_WIDE_INT base_ofs, rtx frame_reg) +{ + rtx addr, mem, insn; + + addr = plus_constant (Pmode, base_reg, base_ofs); + mem = gen_frame_mem (DImode, addr); + + insn = emit_move_insn (mem, value); + RTX_FRAME_RELATED_P (insn) = 1; + + if (frame_bias || value != frame_reg) + { + if (frame_bias) + { + addr = plus_constant (Pmode, stack_pointer_rtx, + frame_bias + base_ofs); + mem = gen_rtx_MEM (DImode, addr); + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, mem, frame_reg)); + } +} + +static void +emit_frame_store (unsigned int regno, rtx base_reg, + HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs) +{ + rtx reg = gen_rtx_REG (DImode, regno); + emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg); +} + +/* Compute the frame size. SIZE is the size of the "naked" frame + and SA_SIZE is the size of the register save area. */ + +static HOST_WIDE_INT +compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size) +{ + if (TARGET_ABI_OPEN_VMS) + return ALPHA_ROUND (sa_size + + (alpha_procedure_type == PT_STACK ? 8 : 0) + + size + + crtl->args.pretend_args_size); + else + return ALPHA_ROUND (crtl->outgoing_args_size) + + sa_size + + ALPHA_ROUND (size + + crtl->args.pretend_args_size); +} + +/* Write function prologue. */ + +/* On vms we have two kinds of functions: + + - stack frame (PROC_STACK) + these are 'normal' functions with local vars and which are + calling other functions + - register frame (PROC_REGISTER) + keeps all data in registers, needs no stack + + We must pass this to the assembler so it can generate the + proper pdsc (procedure descriptor) + This is done with the '.pdesc' command. + + On not-vms, we don't really differentiate between the two, as we can + simply allocate stack without saving registers. */ + +void +alpha_expand_prologue (void) +{ + /* Registers to save. */ + unsigned long imask = 0; + unsigned long fmask = 0; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size, sa_bias; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size; + /* Probed stack size; it additionally includes the size of + the "reserve region" if any. */ + HOST_WIDE_INT probed_size; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + rtx sa_reg; + int i; + + sa_size = alpha_sa_size (); + frame_size = compute_frame_size (get_frame_size (), sa_size); + + if (flag_stack_usage_info) + current_function_static_stack_size = frame_size; + + if (TARGET_ABI_OPEN_VMS) + reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; + else + reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); + + alpha_sa_mask (&imask, &fmask); + + /* Emit an insn to reload GP, if needed. */ + if (TARGET_ABI_OSF) + { + alpha_function_needs_gp = alpha_does_function_need_gp (); + if (alpha_function_needs_gp) + emit_insn (gen_prologue_ldgp ()); + } + + /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert + the call to mcount ourselves, rather than having the linker do it + magically in response to -pg. Since _mcount has special linkage, + don't represent the call as a call. */ + if (TARGET_PROFILING_NEEDS_GP && crtl->profile) + emit_insn (gen_prologue_mcount ()); + + /* Adjust the stack by the frame size. If the frame size is > 4096 + bytes, we need to be sure we probe somewhere in the first and last + 4096 bytes (we can probably get away without the latter test) and + every 8192 bytes in between. If the frame size is > 32768, we + do this in a loop. Otherwise, we generate the explicit probe + instructions. + + Note that we are only allowed to adjust sp once in the prologue. */ + + probed_size = frame_size; + if (flag_stack_check) + probed_size += STACK_CHECK_PROTECT; + + if (probed_size <= 32768) + { + if (probed_size > 4096) + { + int probed; + + for (probed = 4096; probed < probed_size; probed += 8192) + emit_insn (gen_probe_stack (GEN_INT (-probed))); + + /* We only have to do this probe if we aren't saving registers or + if we are probing beyond the frame because of -fstack-check. */ + if ((sa_size == 0 && probed_size > probed - 4096) + || flag_stack_check) + emit_insn (gen_probe_stack (GEN_INT (-probed_size))); + } + + if (frame_size != 0) + FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-frame_size)))); + } + else + { + /* Here we generate code to set R22 to SP + 4096 and set R23 to the + number of 8192 byte blocks to probe. We then probe each block + in the loop and then set SP to the proper location. If the + amount remaining is > 4096, we have to do one more probe if we + are not saving any registers or if we are probing beyond the + frame because of -fstack-check. */ + + HOST_WIDE_INT blocks = (probed_size + 4096) / 8192; + HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192; + rtx ptr = gen_rtx_REG (DImode, 22); + rtx count = gen_rtx_REG (DImode, 23); + rtx seq; + + emit_move_insn (count, GEN_INT (blocks)); + emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096))); + + /* Because of the difficulty in emitting a new basic block this + late in the compilation, generate the loop as a single insn. */ + emit_insn (gen_prologue_stack_probe_loop (count, ptr)); + + if ((leftover > 4096 && sa_size == 0) || flag_stack_check) + { + rtx last = gen_rtx_MEM (DImode, + plus_constant (Pmode, ptr, -leftover)); + MEM_VOLATILE_P (last) = 1; + emit_move_insn (last, const0_rtx); + } + + if (flag_stack_check) + { + /* If -fstack-check is specified we have to load the entire + constant into a register and subtract from the sp in one go, + because the probed stack size is not equal to the frame size. */ + HOST_WIDE_INT lo, hi; + lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; + hi = frame_size - lo; + + emit_move_insn (ptr, GEN_INT (hi)); + emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo))); + seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, + ptr)); + } + else + { + seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr, + GEN_INT (-leftover))); + } + + /* This alternative is special, because the DWARF code cannot + possibly intuit through the loop above. So we invent this + note it looks at instead. */ + RTX_FRAME_RELATED_P (seq) = 1; + add_reg_note (seq, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -frame_size))); + } + + /* Cope with very large offsets to the register save area. */ + sa_bias = 0; + sa_reg = stack_pointer_rtx; + if (reg_offset + sa_size > 0x8000) + { + int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; + rtx sa_bias_rtx; + + if (low + sa_size <= 0x8000) + sa_bias = reg_offset - low, reg_offset = low; + else + sa_bias = reg_offset, reg_offset = 0; + + sa_reg = gen_rtx_REG (DImode, 24); + sa_bias_rtx = GEN_INT (sa_bias); + + if (add_operand (sa_bias_rtx, DImode)) + emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx)); + else + { + emit_move_insn (sa_reg, sa_bias_rtx); + emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg)); + } + } + + /* Save regs in stack order. Beginning with VMS PV. */ + if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK) + emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0); + + /* Save register RA next. */ + if (imask & (1UL << REG_RA)) + { + emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset); + imask &= ~(1UL << REG_RA); + reg_offset += 8; + } + + /* Now save any other registers required to be saved. */ + for (i = 0; i < 31; i++) + if (imask & (1UL << i)) + { + emit_frame_store (i, sa_reg, sa_bias, reg_offset); + reg_offset += 8; + } + + for (i = 0; i < 31; i++) + if (fmask & (1UL << i)) + { + emit_frame_store (i+32, sa_reg, sa_bias, reg_offset); + reg_offset += 8; + } + + if (TARGET_ABI_OPEN_VMS) + { + /* Register frame procedures save the fp. */ + if (alpha_procedure_type == PT_REGISTER) + { + rtx insn = emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno), + hard_frame_pointer_rtx); + add_reg_note (insn, REG_CFA_REGISTER, NULL); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV) + emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno), + gen_rtx_REG (DImode, REG_PV))); + + if (alpha_procedure_type != PT_NULL + && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM) + FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); + + /* If we have to allocate space for outgoing args, do it now. */ + if (crtl->outgoing_args_size != 0) + { + rtx seq + = emit_move_insn (stack_pointer_rtx, + plus_constant + (Pmode, hard_frame_pointer_rtx, + - (ALPHA_ROUND + (crtl->outgoing_args_size)))); + + /* Only set FRAME_RELATED_P on the stack adjustment we just emitted + if ! frame_pointer_needed. Setting the bit will change the CFA + computation rule to use sp again, which would be wrong if we had + frame_pointer_needed, as this means sp might move unpredictably + later on. + + Also, note that + frame_pointer_needed + => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM + and + crtl->outgoing_args_size != 0 + => alpha_procedure_type != PT_NULL, + + so when we are not setting the bit here, we are guaranteed to + have emitted an FRP frame pointer update just before. */ + RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed; + } + } + else + { + /* If we need a frame pointer, set it from the stack pointer. */ + if (frame_pointer_needed) + { + if (TARGET_CAN_FAULT_IN_PROLOGUE) + FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)); + else + /* This must always be the last instruction in the + prologue, thus we emit a special move + clobber. */ + FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx, + stack_pointer_rtx, sa_reg))); + } + } + + /* The ABIs for VMS and OSF/1 say that while we can schedule insns into + the prologue, for exception handling reasons, we cannot do this for + any insn that might fault. We could prevent this for mems with a + (clobber:BLK (scratch)), but this doesn't work for fp insns. So we + have to prevent all such scheduling with a blockage. + + Linux, on the other hand, never bothered to implement OSF/1's + exception handling, and so doesn't care about such things. Anyone + planning to use dwarf2 frame-unwind info can also omit the blockage. */ + + if (! TARGET_CAN_FAULT_IN_PROLOGUE) + emit_insn (gen_blockage ()); +} + +/* Count the number of .file directives, so that .loc is up to date. */ +int num_source_filenames = 0; + +/* Output the textual info surrounding the prologue. */ + +void +alpha_start_function (FILE *file, const char *fnname, + tree decl ATTRIBUTE_UNUSED) +{ + unsigned long imask = 0; + unsigned long fmask = 0; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size; + /* Complete stack size needed. */ + unsigned HOST_WIDE_INT frame_size; + /* The maximum debuggable frame size. */ + unsigned HOST_WIDE_INT max_frame_size = 1UL << 31; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + char *entry_label = (char *) alloca (strlen (fnname) + 6); + char *tramp_label = (char *) alloca (strlen (fnname) + 6); + int i; + +#if TARGET_ABI_OPEN_VMS + vms_start_function (fnname); +#endif + + alpha_fnname = fnname; + sa_size = alpha_sa_size (); + frame_size = compute_frame_size (get_frame_size (), sa_size); + + if (TARGET_ABI_OPEN_VMS) + reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; + else + reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); + + alpha_sa_mask (&imask, &fmask); + + /* Issue function start and label. */ + if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive) + { + fputs ("\t.ent ", file); + assemble_name (file, fnname); + putc ('\n', file); + + /* If the function needs GP, we'll write the "..ng" label there. + Otherwise, do it here. */ + if (TARGET_ABI_OSF + && ! alpha_function_needs_gp + && ! cfun->is_thunk) + { + putc ('$', file); + assemble_name (file, fnname); + fputs ("..ng:\n", file); + } + } + /* Nested functions on VMS that are potentially called via trampoline + get a special transfer entry point that loads the called functions + procedure descriptor and static chain. */ + if (TARGET_ABI_OPEN_VMS + && !TREE_PUBLIC (decl) + && DECL_CONTEXT (decl) + && !TYPE_P (DECL_CONTEXT (decl)) + && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL) + { + strcpy (tramp_label, fnname); + strcat (tramp_label, "..tr"); + ASM_OUTPUT_LABEL (file, tramp_label); + fprintf (file, "\tldq $1,24($27)\n"); + fprintf (file, "\tldq $27,16($27)\n"); + } + + strcpy (entry_label, fnname); + if (TARGET_ABI_OPEN_VMS) + strcat (entry_label, "..en"); + + ASM_OUTPUT_LABEL (file, entry_label); + inside_function = TRUE; + + if (TARGET_ABI_OPEN_VMS) + fprintf (file, "\t.base $%d\n", vms_base_regno); + + if (TARGET_ABI_OSF + && TARGET_IEEE_CONFORMANT + && !flag_inhibit_size_directive) + { + /* Set flags in procedure descriptor to request IEEE-conformant + math-library routines. The value we set it to is PDSC_EXC_IEEE + (/usr/include/pdsc.h). */ + fputs ("\t.eflag 48\n", file); + } + + /* Set up offsets to alpha virtual arg/local debugging pointer. */ + alpha_auto_offset = -frame_size + crtl->args.pretend_args_size; + alpha_arg_offset = -frame_size + 48; + + /* Describe our frame. If the frame size is larger than an integer, + print it as zero to avoid an assembler error. We won't be + properly describing such a frame, but that's the best we can do. */ + if (TARGET_ABI_OPEN_VMS) + fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26," + HOST_WIDE_INT_PRINT_DEC "\n", + vms_unwind_regno, + frame_size >= (1UL << 31) ? 0 : frame_size, + reg_offset); + else if (!flag_inhibit_size_directive) + fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n", + (frame_pointer_needed + ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM), + frame_size >= max_frame_size ? 0 : frame_size, + crtl->args.pretend_args_size); + + /* Describe which registers were spilled. */ + if (TARGET_ABI_OPEN_VMS) + { + if (imask) + /* ??? Does VMS care if mask contains ra? The old code didn't + set it, so I don't here. */ + fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA)); + if (fmask) + fprintf (file, "\t.fmask 0x%lx,0\n", fmask); + if (alpha_procedure_type == PT_REGISTER) + fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno); + } + else if (!flag_inhibit_size_directive) + { + if (imask) + { + fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask, + frame_size >= max_frame_size ? 0 : reg_offset - frame_size); + + for (i = 0; i < 32; ++i) + if (imask & (1UL << i)) + reg_offset += 8; + } + + if (fmask) + fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask, + frame_size >= max_frame_size ? 0 : reg_offset - frame_size); + } + +#if TARGET_ABI_OPEN_VMS + /* If a user condition handler has been installed at some point, emit + the procedure descriptor bits to point the Condition Handling Facility + at the indirection wrapper, and state the fp offset at which the user + handler may be found. */ + if (cfun->machine->uses_condition_handler) + { + fprintf (file, "\t.handler __gcc_shell_handler\n"); + fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET); + } + +#ifdef TARGET_VMS_CRASH_DEBUG + /* Support of minimal traceback info. */ + switch_to_section (readonly_data_section); + fprintf (file, "\t.align 3\n"); + assemble_name (file, fnname); fputs ("..na:\n", file); + fputs ("\t.ascii \"", file); + assemble_name (file, fnname); + fputs ("\\0\"\n", file); + switch_to_section (text_section); +#endif +#endif /* TARGET_ABI_OPEN_VMS */ +} + +/* Emit the .prologue note at the scheduled end of the prologue. */ + +static void +alpha_output_function_end_prologue (FILE *file) +{ + if (TARGET_ABI_OPEN_VMS) + fputs ("\t.prologue\n", file); + else if (!flag_inhibit_size_directive) + fprintf (file, "\t.prologue %d\n", + alpha_function_needs_gp || cfun->is_thunk); +} + +/* Write function epilogue. */ + +void +alpha_expand_epilogue (void) +{ + /* Registers to save. */ + unsigned long imask = 0; + unsigned long fmask = 0; + /* Stack space needed for pushing registers clobbered by us. */ + HOST_WIDE_INT sa_size; + /* Complete stack size needed. */ + HOST_WIDE_INT frame_size; + /* Offset from base reg to register save area. */ + HOST_WIDE_INT reg_offset; + int fp_is_frame_pointer, fp_offset; + rtx sa_reg, sa_reg_exp = NULL; + rtx sp_adj1, sp_adj2, mem, reg, insn; + rtx eh_ofs; + rtx cfa_restores = NULL_RTX; + int i; + + sa_size = alpha_sa_size (); + frame_size = compute_frame_size (get_frame_size (), sa_size); + + if (TARGET_ABI_OPEN_VMS) + { + if (alpha_procedure_type == PT_STACK) + reg_offset = 8 + 8 * cfun->machine->uses_condition_handler; + else + reg_offset = 0; + } + else + reg_offset = ALPHA_ROUND (crtl->outgoing_args_size); + + alpha_sa_mask (&imask, &fmask); + + fp_is_frame_pointer + = (TARGET_ABI_OPEN_VMS + ? alpha_procedure_type == PT_STACK + : frame_pointer_needed); + fp_offset = 0; + sa_reg = stack_pointer_rtx; + + if (crtl->calls_eh_return) + eh_ofs = EH_RETURN_STACKADJ_RTX; + else + eh_ofs = NULL_RTX; + + if (sa_size) + { + /* If we have a frame pointer, restore SP from it. */ + if (TARGET_ABI_OPEN_VMS + ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM + : frame_pointer_needed) + emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); + + /* Cope with very large offsets to the register save area. */ + if (reg_offset + sa_size > 0x8000) + { + int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000; + HOST_WIDE_INT bias; + + if (low + sa_size <= 0x8000) + bias = reg_offset - low, reg_offset = low; + else + bias = reg_offset, reg_offset = 0; + + sa_reg = gen_rtx_REG (DImode, 22); + sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias); + + emit_move_insn (sa_reg, sa_reg_exp); + } + + /* Restore registers in order, excepting a true frame pointer. */ + + mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset)); + reg = gen_rtx_REG (DImode, REG_RA); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + + reg_offset += 8; + imask &= ~(1UL << REG_RA); + + for (i = 0; i < 31; ++i) + if (imask & (1UL << i)) + { + if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer) + fp_offset = reg_offset; + else + { + mem = gen_frame_mem (DImode, + plus_constant (Pmode, sa_reg, + reg_offset)); + reg = gen_rtx_REG (DImode, i); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, + cfa_restores); + } + reg_offset += 8; + } + + for (i = 0; i < 31; ++i) + if (fmask & (1UL << i)) + { + mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg, + reg_offset)); + reg = gen_rtx_REG (DFmode, i+32); + emit_move_insn (reg, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); + reg_offset += 8; + } + } + + if (frame_size || eh_ofs) + { + sp_adj1 = stack_pointer_rtx; + + if (eh_ofs) + { + sp_adj1 = gen_rtx_REG (DImode, 23); + emit_move_insn (sp_adj1, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs)); + } + + /* If the stack size is large, begin computation into a temporary + register so as not to interfere with a potential fp restore, + which must be consecutive with an SP restore. */ + if (frame_size < 32768 && !cfun->calls_alloca) + sp_adj2 = GEN_INT (frame_size); + else if (frame_size < 0x40007fffL) + { + int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000; + + sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low); + if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2)) + sp_adj1 = sa_reg; + else + { + sp_adj1 = gen_rtx_REG (DImode, 23); + emit_move_insn (sp_adj1, sp_adj2); + } + sp_adj2 = GEN_INT (low); + } + else + { + rtx tmp = gen_rtx_REG (DImode, 23); + sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false); + if (!sp_adj2) + { + /* We can't drop new things to memory this late, afaik, + so build it up by pieces. */ + sp_adj2 = alpha_emit_set_long_const (tmp, frame_size, + -(frame_size < 0)); + gcc_assert (sp_adj2); + } + } + + /* From now on, things must be in order. So emit blockages. */ + + /* Restore the frame pointer. */ + if (fp_is_frame_pointer) + { + emit_insn (gen_blockage ()); + mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, + fp_offset)); + emit_move_insn (hard_frame_pointer_rtx, mem); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + hard_frame_pointer_rtx, cfa_restores); + } + else if (TARGET_ABI_OPEN_VMS) + { + emit_insn (gen_blockage ()); + emit_move_insn (hard_frame_pointer_rtx, + gen_rtx_REG (DImode, vms_save_fp_regno)); + cfa_restores = alloc_reg_note (REG_CFA_RESTORE, + hard_frame_pointer_rtx, cfa_restores); + } + + /* Restore the stack pointer. */ + emit_insn (gen_blockage ()); + if (sp_adj2 == const0_rtx) + insn = emit_move_insn (stack_pointer_rtx, sp_adj1); + else + insn = emit_move_insn (stack_pointer_rtx, + gen_rtx_PLUS (DImode, sp_adj1, sp_adj2)); + REG_NOTES (insn) = cfa_restores; + add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + gcc_assert (cfa_restores == NULL); + + if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER) + { + emit_insn (gen_blockage ()); + insn = emit_move_insn (hard_frame_pointer_rtx, + gen_rtx_REG (DImode, vms_save_fp_regno)); + add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +} + +/* Output the rest of the textual info surrounding the epilogue. */ + +void +alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED) +{ + rtx insn; + + /* We output a nop after noreturn calls at the very end of the function to + ensure that the return address always remains in the caller's code range, + as not doing so might confuse unwinding engines. */ + insn = get_last_insn (); + if (!INSN_P (insn)) + insn = prev_active_insn (insn); + if (insn && CALL_P (insn)) + output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL); + +#if TARGET_ABI_OPEN_VMS + /* Write the linkage entries. */ + alpha_write_linkage (file, fnname); +#endif + + /* End the function. */ + if (TARGET_ABI_OPEN_VMS + || !flag_inhibit_size_directive) + { + fputs ("\t.end ", file); + assemble_name (file, fnname); + putc ('\n', file); + } + inside_function = FALSE; +} + +#if TARGET_ABI_OSF +/* Emit a tail call to FUNCTION after adjusting THIS by DELTA. + + In order to avoid the hordes of differences between generated code + with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating + lots of code loading up large constants, generate rtl and emit it + instead of going straight to text. + + Not sure why this idea hasn't been explored before... */ + +static void +alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) +{ + HOST_WIDE_INT hi, lo; + rtx this_rtx, insn, funexp; + + /* We always require a valid GP. */ + emit_insn (gen_prologue_ldgp ()); + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Find the "this" pointer. If the function returns a structure, + the structure return pointer is in $16. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) + this_rtx = gen_rtx_REG (Pmode, 17); + else + this_rtx = gen_rtx_REG (Pmode, 16); + + /* Add DELTA. When possible we use ldah+lda. Otherwise load the + entire constant for the add. */ + lo = ((delta & 0xffff) ^ 0x8000) - 0x8000; + hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (hi + lo == delta) + { + if (hi) + emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi))); + if (lo) + emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo))); + } + else + { + rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), + delta, -(delta < 0)); + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Add a delta stored in the vtable at VCALL_OFFSET. */ + if (vcall_offset) + { + rtx tmp, tmp2; + + tmp = gen_rtx_REG (Pmode, 0); + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); + + lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000; + hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (hi + lo == vcall_offset) + { + if (hi) + emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi))); + } + else + { + tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1), + vcall_offset, -(vcall_offset < 0)); + emit_insn (gen_adddi3 (tmp, tmp, tmp2)); + lo = 0; + } + if (lo) + tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo)); + else + tmp2 = tmp; + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2)); + + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Generate a tail call to the target function. */ + if (! TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + insn = emit_call_insn (gen_sibcall (funexp, const0_rtx)); + SIBLING_CALL_P (insn) = 1; + + /* Run just enough of rest_of_compilation to get the insns emitted. + There's not really enough bulk here to make other passes such as + instruction scheduling worth while. Note that use_thunk calls + assemble_start_function and assemble_end_function. */ + insn = get_insns (); + shorten_branches (insn); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); +} +#endif /* TARGET_ABI_OSF */ + +/* Debugging support. */ + +#include "gstab.h" + +/* Name of the file containing the current function. */ + +static const char *current_function_file = ""; + +/* Offsets to alpha virtual arg/local debugging pointers. */ + +long alpha_arg_offset; +long alpha_auto_offset; + +/* Emit a new filename to a stream. */ + +void +alpha_output_filename (FILE *stream, const char *name) +{ + static int first_time = TRUE; + + if (first_time) + { + first_time = FALSE; + ++num_source_filenames; + current_function_file = name; + fprintf (stream, "\t.file\t%d ", num_source_filenames); + output_quoted_string (stream, name); + fprintf (stream, "\n"); + } + + else if (name != current_function_file + && strcmp (name, current_function_file) != 0) + { + ++num_source_filenames; + current_function_file = name; + fprintf (stream, "\t.file\t%d ", num_source_filenames); + + output_quoted_string (stream, name); + fprintf (stream, "\n"); + } +} + +/* Structure to show the current status of registers and memory. */ + +struct shadow_summary +{ + struct { + unsigned int i : 31; /* Mask of int regs */ + unsigned int fp : 31; /* Mask of fp regs */ + unsigned int mem : 1; /* mem == imem | fpmem */ + } used, defd; +}; + +/* Summary the effects of expression X on the machine. Update SUM, a pointer + to the summary structure. SET is nonzero if the insn is setting the + object, otherwise zero. */ + +static void +summarize_insn (rtx x, struct shadow_summary *sum, int set) +{ + const char *format_ptr; + int i, j; + + if (x == 0) + return; + + switch (GET_CODE (x)) + { + /* ??? Note that this case would be incorrect if the Alpha had a + ZERO_EXTRACT in SET_DEST. */ + case SET: + summarize_insn (SET_SRC (x), sum, 0); + summarize_insn (SET_DEST (x), sum, 1); + break; + + case CLOBBER: + summarize_insn (XEXP (x, 0), sum, 1); + break; + + case USE: + summarize_insn (XEXP (x, 0), sum, 0); + break; + + case ASM_OPERANDS: + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) + summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0); + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + summarize_insn (XVECEXP (x, 0, i), sum, 0); + break; + + case SUBREG: + summarize_insn (SUBREG_REG (x), sum, 0); + break; + + case REG: + { + int regno = REGNO (x); + unsigned long mask = ((unsigned long) 1) << (regno % 32); + + if (regno == 31 || regno == 63) + break; + + if (set) + { + if (regno < 32) + sum->defd.i |= mask; + else + sum->defd.fp |= mask; + } + else + { + if (regno < 32) + sum->used.i |= mask; + else + sum->used.fp |= mask; + } + } + break; + + case MEM: + if (set) + sum->defd.mem = 1; + else + sum->used.mem = 1; + + /* Find the regs used in memory address computation: */ + summarize_insn (XEXP (x, 0), sum, 0); + break; + + case CONST_INT: case CONST_DOUBLE: + case SYMBOL_REF: case LABEL_REF: case CONST: + case SCRATCH: case ASM_INPUT: + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: case PLUS: case MINUS: case MULT: case DIV: + case MOD: case UDIV: case UMOD: case AND: case IOR: + case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: + case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: + case NE: case EQ: case GE: case GT: case LE: + case LT: case GEU: case GTU: case LEU: case LTU: + summarize_insn (XEXP (x, 0), sum, 0); + summarize_insn (XEXP (x, 1), sum, 0); + break; + + case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: + case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: + case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: + case SQRT: case FFS: + summarize_insn (XEXP (x, 0), sum, 0); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case 'e': + summarize_insn (XEXP (x, i), sum, 0); + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + summarize_insn (XVECEXP (x, i, j), sum, 0); + break; + + case 'i': + break; + + default: + gcc_unreachable (); + } + } +} + +/* Ensure a sufficient number of `trapb' insns are in the code when + the user requests code with a trap precision of functions or + instructions. + + In naive mode, when the user requests a trap-precision of + "instruction", a trapb is needed after every instruction that may + generate a trap. This ensures that the code is resumption safe but + it is also slow. + + When optimizations are turned on, we delay issuing a trapb as long + as possible. In this context, a trap shadow is the sequence of + instructions that starts with a (potentially) trap generating + instruction and extends to the next trapb or call_pal instruction + (but GCC never generates call_pal by itself). We can delay (and + therefore sometimes omit) a trapb subject to the following + conditions: + + (a) On entry to the trap shadow, if any Alpha register or memory + location contains a value that is used as an operand value by some + instruction in the trap shadow (live on entry), then no instruction + in the trap shadow may modify the register or memory location. + + (b) Within the trap shadow, the computation of the base register + for a memory load or store instruction may not involve using the + result of an instruction that might generate an UNPREDICTABLE + result. + + (c) Within the trap shadow, no register may be used more than once + as a destination register. (This is to make life easier for the + trap-handler.) + + (d) The trap shadow may not include any branch instructions. */ + +static void +alpha_handle_trap_shadows (void) +{ + struct shadow_summary shadow; + int trap_pending, exception_nesting; + rtx i, n; + + trap_pending = 0; + exception_nesting = 0; + shadow.used.i = 0; + shadow.used.fp = 0; + shadow.used.mem = 0; + shadow.defd = shadow.used; + + for (i = get_insns (); i ; i = NEXT_INSN (i)) + { + if (NOTE_P (i)) + { + switch (NOTE_KIND (i)) + { + case NOTE_INSN_EH_REGION_BEG: + exception_nesting++; + if (trap_pending) + goto close_shadow; + break; + + case NOTE_INSN_EH_REGION_END: + exception_nesting--; + if (trap_pending) + goto close_shadow; + break; + + case NOTE_INSN_EPILOGUE_BEG: + if (trap_pending && alpha_tp >= ALPHA_TP_FUNC) + goto close_shadow; + break; + } + } + else if (trap_pending) + { + if (alpha_tp == ALPHA_TP_FUNC) + { + if (JUMP_P (i) + && GET_CODE (PATTERN (i)) == RETURN) + goto close_shadow; + } + else if (alpha_tp == ALPHA_TP_INSN) + { + if (optimize > 0) + { + struct shadow_summary sum; + + sum.used.i = 0; + sum.used.fp = 0; + sum.used.mem = 0; + sum.defd = sum.used; + + switch (GET_CODE (i)) + { + case INSN: + /* Annoyingly, get_attr_trap will die on these. */ + if (GET_CODE (PATTERN (i)) == USE + || GET_CODE (PATTERN (i)) == CLOBBER) + break; + + summarize_insn (PATTERN (i), &sum, 0); + + if ((sum.defd.i & shadow.defd.i) + || (sum.defd.fp & shadow.defd.fp)) + { + /* (c) would be violated */ + goto close_shadow; + } + + /* Combine shadow with summary of current insn: */ + shadow.used.i |= sum.used.i; + shadow.used.fp |= sum.used.fp; + shadow.used.mem |= sum.used.mem; + shadow.defd.i |= sum.defd.i; + shadow.defd.fp |= sum.defd.fp; + shadow.defd.mem |= sum.defd.mem; + + if ((sum.defd.i & shadow.used.i) + || (sum.defd.fp & shadow.used.fp) + || (sum.defd.mem & shadow.used.mem)) + { + /* (a) would be violated (also takes care of (b)) */ + gcc_assert (get_attr_trap (i) != TRAP_YES + || (!(sum.defd.i & sum.used.i) + && !(sum.defd.fp & sum.used.fp))); + + goto close_shadow; + } + break; + + case JUMP_INSN: + case CALL_INSN: + case CODE_LABEL: + goto close_shadow; + + default: + gcc_unreachable (); + } + } + else + { + close_shadow: + n = emit_insn_before (gen_trapb (), i); + PUT_MODE (n, TImode); + PUT_MODE (i, TImode); + trap_pending = 0; + shadow.used.i = 0; + shadow.used.fp = 0; + shadow.used.mem = 0; + shadow.defd = shadow.used; + } + } + } + + if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC) + && NONJUMP_INSN_P (i) + && GET_CODE (PATTERN (i)) != USE + && GET_CODE (PATTERN (i)) != CLOBBER + && get_attr_trap (i) == TRAP_YES) + { + if (optimize && !trap_pending) + summarize_insn (PATTERN (i), &shadow, 0); + trap_pending = 1; + } + } +} + +/* Alpha can only issue instruction groups simultaneously if they are + suitably aligned. This is very processor-specific. */ +/* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe + that are marked "fake". These instructions do not exist on that target, + but it is possible to see these insns with deranged combinations of + command-line options, such as "-mtune=ev4 -mmax". Instead of aborting, + choose a result at random. */ + +enum alphaev4_pipe { + EV4_STOP = 0, + EV4_IB0 = 1, + EV4_IB1 = 2, + EV4_IBX = 4 +}; + +enum alphaev5_pipe { + EV5_STOP = 0, + EV5_NONE = 1, + EV5_E01 = 2, + EV5_E0 = 4, + EV5_E1 = 8, + EV5_FAM = 16, + EV5_FA = 32, + EV5_FM = 64 +}; + +static enum alphaev4_pipe +alphaev4_insn_pipe (rtx insn) +{ + if (recog_memoized (insn) < 0) + return EV4_STOP; + if (get_attr_length (insn) != 4) + return EV4_STOP; + + switch (get_attr_type (insn)) + { + case TYPE_ILD: + case TYPE_LDSYM: + case TYPE_FLD: + case TYPE_LD_L: + return EV4_IBX; + + case TYPE_IADD: + case TYPE_ILOG: + case TYPE_ICMOV: + case TYPE_ICMP: + case TYPE_FST: + case TYPE_SHIFT: + case TYPE_IMUL: + case TYPE_FBR: + case TYPE_MVI: /* fake */ + return EV4_IB0; + + case TYPE_IST: + case TYPE_MISC: + case TYPE_IBR: + case TYPE_JSR: + case TYPE_CALLPAL: + case TYPE_FCPYS: + case TYPE_FCMOV: + case TYPE_FADD: + case TYPE_FDIV: + case TYPE_FMUL: + case TYPE_ST_C: + case TYPE_MB: + case TYPE_FSQRT: /* fake */ + case TYPE_FTOI: /* fake */ + case TYPE_ITOF: /* fake */ + return EV4_IB1; + + default: + gcc_unreachable (); + } +} + +static enum alphaev5_pipe +alphaev5_insn_pipe (rtx insn) +{ + if (recog_memoized (insn) < 0) + return EV5_STOP; + if (get_attr_length (insn) != 4) + return EV5_STOP; + + switch (get_attr_type (insn)) + { + case TYPE_ILD: + case TYPE_FLD: + case TYPE_LDSYM: + case TYPE_IADD: + case TYPE_ILOG: + case TYPE_ICMOV: + case TYPE_ICMP: + return EV5_E01; + + case TYPE_IST: + case TYPE_FST: + case TYPE_SHIFT: + case TYPE_IMUL: + case TYPE_MISC: + case TYPE_MVI: + case TYPE_LD_L: + case TYPE_ST_C: + case TYPE_MB: + case TYPE_FTOI: /* fake */ + case TYPE_ITOF: /* fake */ + return EV5_E0; + + case TYPE_IBR: + case TYPE_JSR: + case TYPE_CALLPAL: + return EV5_E1; + + case TYPE_FCPYS: + return EV5_FAM; + + case TYPE_FBR: + case TYPE_FCMOV: + case TYPE_FADD: + case TYPE_FDIV: + case TYPE_FSQRT: /* fake */ + return EV5_FA; + + case TYPE_FMUL: + return EV5_FM; + + default: + gcc_unreachable (); + } +} + +/* IN_USE is a mask of the slots currently filled within the insn group. + The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then + the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1. + + LEN is, of course, the length of the group in bytes. */ + +static rtx +alphaev4_next_group (rtx insn, int *pin_use, int *plen) +{ + int len, in_use; + + len = in_use = 0; + + if (! INSN_P (insn) + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE) + goto next_and_done; + + while (1) + { + enum alphaev4_pipe pipe; + + pipe = alphaev4_insn_pipe (insn); + switch (pipe) + { + case EV4_STOP: + /* Force complex instructions to start new groups. */ + if (in_use) + goto done; + + /* If this is a completely unrecognized insn, it's an asm. + We don't know how long it is, so record length as -1 to + signal a needed realignment. */ + if (recog_memoized (insn) < 0) + len = -1; + else + len = get_attr_length (insn); + goto next_and_done; + + case EV4_IBX: + if (in_use & EV4_IB0) + { + if (in_use & EV4_IB1) + goto done; + in_use |= EV4_IB1; + } + else + in_use |= EV4_IB0 | EV4_IBX; + break; + + case EV4_IB0: + if (in_use & EV4_IB0) + { + if (!(in_use & EV4_IBX) || (in_use & EV4_IB1)) + goto done; + in_use |= EV4_IB1; + } + in_use |= EV4_IB0; + break; + + case EV4_IB1: + if (in_use & EV4_IB1) + goto done; + in_use |= EV4_IB1; + break; + + default: + gcc_unreachable (); + } + len += 4; + + /* Haifa doesn't do well scheduling branches. */ + if (JUMP_P (insn)) + goto next_and_done; + + next: + insn = next_nonnote_insn (insn); + + if (!insn || ! INSN_P (insn)) + goto done; + + /* Let Haifa tell us where it thinks insn group boundaries are. */ + if (GET_MODE (insn) == TImode) + goto done; + + if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) + goto next; + } + + next_and_done: + insn = next_nonnote_insn (insn); + + done: + *plen = len; + *pin_use = in_use; + return insn; +} + +/* IN_USE is a mask of the slots currently filled within the insn group. + The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then + the insn in EV5_E0 can be swapped by the hardware into EV5_E1. + + LEN is, of course, the length of the group in bytes. */ + +static rtx +alphaev5_next_group (rtx insn, int *pin_use, int *plen) +{ + int len, in_use; + + len = in_use = 0; + + if (! INSN_P (insn) + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE) + goto next_and_done; + + while (1) + { + enum alphaev5_pipe pipe; + + pipe = alphaev5_insn_pipe (insn); + switch (pipe) + { + case EV5_STOP: + /* Force complex instructions to start new groups. */ + if (in_use) + goto done; + + /* If this is a completely unrecognized insn, it's an asm. + We don't know how long it is, so record length as -1 to + signal a needed realignment. */ + if (recog_memoized (insn) < 0) + len = -1; + else + len = get_attr_length (insn); + goto next_and_done; + + /* ??? Most of the places below, we would like to assert never + happen, as it would indicate an error either in Haifa, or + in the scheduling description. Unfortunately, Haifa never + schedules the last instruction of the BB, so we don't have + an accurate TI bit to go off. */ + case EV5_E01: + if (in_use & EV5_E0) + { + if (in_use & EV5_E1) + goto done; + in_use |= EV5_E1; + } + else + in_use |= EV5_E0 | EV5_E01; + break; + + case EV5_E0: + if (in_use & EV5_E0) + { + if (!(in_use & EV5_E01) || (in_use & EV5_E1)) + goto done; + in_use |= EV5_E1; + } + in_use |= EV5_E0; + break; + + case EV5_E1: + if (in_use & EV5_E1) + goto done; + in_use |= EV5_E1; + break; + + case EV5_FAM: + if (in_use & EV5_FA) + { + if (in_use & EV5_FM) + goto done; + in_use |= EV5_FM; + } + else + in_use |= EV5_FA | EV5_FAM; + break; + + case EV5_FA: + if (in_use & EV5_FA) + goto done; + in_use |= EV5_FA; + break; + + case EV5_FM: + if (in_use & EV5_FM) + goto done; + in_use |= EV5_FM; + break; + + case EV5_NONE: + break; + + default: + gcc_unreachable (); + } + len += 4; + + /* Haifa doesn't do well scheduling branches. */ + /* ??? If this is predicted not-taken, slotting continues, except + that no more IBR, FBR, or JSR insns may be slotted. */ + if (JUMP_P (insn)) + goto next_and_done; + + next: + insn = next_nonnote_insn (insn); + + if (!insn || ! INSN_P (insn)) + goto done; + + /* Let Haifa tell us where it thinks insn group boundaries are. */ + if (GET_MODE (insn) == TImode) + goto done; + + if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE) + goto next; + } + + next_and_done: + insn = next_nonnote_insn (insn); + + done: + *plen = len; + *pin_use = in_use; + return insn; +} + +static rtx +alphaev4_next_nop (int *pin_use) +{ + int in_use = *pin_use; + rtx nop; + + if (!(in_use & EV4_IB0)) + { + in_use |= EV4_IB0; + nop = gen_nop (); + } + else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX) + { + in_use |= EV4_IB1; + nop = gen_nop (); + } + else if (TARGET_FP && !(in_use & EV4_IB1)) + { + in_use |= EV4_IB1; + nop = gen_fnop (); + } + else + nop = gen_unop (); + + *pin_use = in_use; + return nop; +} + +static rtx +alphaev5_next_nop (int *pin_use) +{ + int in_use = *pin_use; + rtx nop; + + if (!(in_use & EV5_E1)) + { + in_use |= EV5_E1; + nop = gen_nop (); + } + else if (TARGET_FP && !(in_use & EV5_FA)) + { + in_use |= EV5_FA; + nop = gen_fnop (); + } + else if (TARGET_FP && !(in_use & EV5_FM)) + { + in_use |= EV5_FM; + nop = gen_fnop (); + } + else + nop = gen_unop (); + + *pin_use = in_use; + return nop; +} + +/* The instruction group alignment main loop. */ + +static void +alpha_align_insns (unsigned int max_align, + rtx (*next_group) (rtx, int *, int *), + rtx (*next_nop) (int *)) +{ + /* ALIGN is the known alignment for the insn group. */ + unsigned int align; + /* OFS is the offset of the current insn in the insn group. */ + int ofs; + int prev_in_use, in_use, len, ldgp; + rtx i, next; + + /* Let shorten branches care for assigning alignments to code labels. */ + shorten_branches (get_insns ()); + + if (align_functions < 4) + align = 4; + else if ((unsigned int) align_functions < max_align) + align = align_functions; + else + align = max_align; + + ofs = prev_in_use = 0; + i = get_insns (); + if (NOTE_P (i)) + i = next_nonnote_insn (i); + + ldgp = alpha_function_needs_gp ? 8 : 0; + + while (i) + { + next = (*next_group) (i, &in_use, &len); + + /* When we see a label, resync alignment etc. */ + if (LABEL_P (i)) + { + unsigned int new_align = 1 << label_to_alignment (i); + + if (new_align >= align) + { + align = new_align < max_align ? new_align : max_align; + ofs = 0; + } + + else if (ofs & (new_align-1)) + ofs = (ofs | (new_align-1)) + 1; + gcc_assert (!len); + } + + /* Handle complex instructions special. */ + else if (in_use == 0) + { + /* Asms will have length < 0. This is a signal that we have + lost alignment knowledge. Assume, however, that the asm + will not mis-align instructions. */ + if (len < 0) + { + ofs = 0; + align = 4; + len = 0; + } + } + + /* If the known alignment is smaller than the recognized insn group, + realign the output. */ + else if ((int) align < len) + { + unsigned int new_log_align = len > 8 ? 4 : 3; + rtx prev, where; + + where = prev = prev_nonnote_insn (i); + if (!where || !LABEL_P (where)) + where = i; + + /* Can't realign between a call and its gp reload. */ + if (! (TARGET_EXPLICIT_RELOCS + && prev && CALL_P (prev))) + { + emit_insn_before (gen_realign (GEN_INT (new_log_align)), where); + align = 1 << new_log_align; + ofs = 0; + } + } + + /* We may not insert padding inside the initial ldgp sequence. */ + else if (ldgp > 0) + ldgp -= len; + + /* If the group won't fit in the same INT16 as the previous, + we need to add padding to keep the group together. Rather + than simply leaving the insn filling to the assembler, we + can make use of the knowledge of what sorts of instructions + were issued in the previous group to make sure that all of + the added nops are really free. */ + else if (ofs + len > (int) align) + { + int nop_count = (align - ofs) / 4; + rtx where; + + /* Insert nops before labels, branches, and calls to truly merge + the execution of the nops with the previous instruction group. */ + where = prev_nonnote_insn (i); + if (where) + { + if (LABEL_P (where)) + { + rtx where2 = prev_nonnote_insn (where); + if (where2 && JUMP_P (where2)) + where = where2; + } + else if (NONJUMP_INSN_P (where)) + where = i; + } + else + where = i; + + do + emit_insn_before ((*next_nop)(&prev_in_use), where); + while (--nop_count); + ofs = 0; + } + + ofs = (ofs + len) & (align - 1); + prev_in_use = in_use; + i = next; + } +} + +/* Insert an unop between sibcall or noreturn function call and GP load. */ + +static void +alpha_pad_function_end (void) +{ + rtx insn, next; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!CALL_P (insn) + || !(SIBLING_CALL_P (insn) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))) + continue; + + /* Make sure we do not split a call and its corresponding + CALL_ARG_LOCATION note. */ + next = NEXT_INSN (insn); + if (next == NULL) + continue; + if (BARRIER_P (next)) + { + next = NEXT_INSN (next); + if (next == NULL) + continue; + } + if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION) + insn = next; + + next = next_active_insn (insn); + if (next) + { + rtx pat = PATTERN (next); + + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE + && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1) + emit_insn_after (gen_unop (), insn); + } + } +} + +/* Machine dependent reorg pass. */ + +static void +alpha_reorg (void) +{ + /* Workaround for a linker error that triggers when an exception + handler immediatelly follows a sibcall or a noreturn function. + +In the sibcall case: + + The instruction stream from an object file: + + 1d8: 00 00 fb 6b jmp (t12) + 1dc: 00 00 ba 27 ldah gp,0(ra) + 1e0: 00 00 bd 23 lda gp,0(gp) + 1e4: 00 00 7d a7 ldq t12,0(gp) + 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec> + + was converted in the final link pass to: + + 12003aa88: 67 fa ff c3 br 120039428 <...> + 12003aa8c: 00 00 fe 2f unop + 12003aa90: 00 00 fe 2f unop + 12003aa94: 48 83 7d a7 ldq t12,-31928(gp) + 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec> + +And in the noreturn case: + + The instruction stream from an object file: + + 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58> + 58: 00 00 ba 27 ldah gp,0(ra) + 5c: 00 00 bd 23 lda gp,0(gp) + 60: 00 00 7d a7 ldq t12,0(gp) + 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68> + + was converted in the final link pass to: + + fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8> + fdb28: 00 00 fe 2f unop + fdb2c: 00 00 fe 2f unop + fdb30: 30 82 7d a7 ldq t12,-32208(gp) + fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68> + + GP load instructions were wrongly cleared by the linker relaxation + pass. This workaround prevents removal of GP loads by inserting + an unop instruction between a sibcall or noreturn function call and + exception handler prologue. */ + + if (current_function_has_exception_handlers ()) + alpha_pad_function_end (); + + if (alpha_tp != ALPHA_TP_PROG || flag_exceptions) + alpha_handle_trap_shadows (); + + /* Due to the number of extra trapb insns, don't bother fixing up + alignment when trap precision is instruction. Moreover, we can + only do our job when sched2 is run. */ + if (optimize && !optimize_size + && alpha_tp != ALPHA_TP_INSN + && flag_schedule_insns_after_reload) + { + if (alpha_tune == PROCESSOR_EV4) + alpha_align_insns (8, alphaev4_next_group, alphaev4_next_nop); + else if (alpha_tune == PROCESSOR_EV5) + alpha_align_insns (16, alphaev5_next_group, alphaev5_next_nop); + } +} + +static void +alpha_file_start (void) +{ + default_file_start (); + + fputs ("\t.set noreorder\n", asm_out_file); + fputs ("\t.set volatile\n", asm_out_file); + if (TARGET_ABI_OSF) + fputs ("\t.set noat\n", asm_out_file); + if (TARGET_EXPLICIT_RELOCS) + fputs ("\t.set nomacro\n", asm_out_file); + if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX) + { + const char *arch; + + if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX) + arch = "ev6"; + else if (TARGET_MAX) + arch = "pca56"; + else if (TARGET_BWX) + arch = "ev56"; + else if (alpha_cpu == PROCESSOR_EV5) + arch = "ev5"; + else + arch = "ev4"; + + fprintf (asm_out_file, "\t.arch %s\n", arch); + } +} + +/* Since we don't have a .dynbss section, we should not allow global + relocations in the .rodata section. */ + +static int +alpha_elf_reloc_rw_mask (void) +{ + return flag_pic ? 3 : 2; +} + +/* Return a section for X. The only special thing we do here is to + honor small data. */ + +static section * +alpha_elf_select_rtx_section (enum machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) +{ + if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value) + /* ??? Consider using mergeable sdata sections. */ + return sdata_section; + else + return default_elf_select_rtx_section (mode, x, align); +} + +static unsigned int +alpha_elf_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = 0; + + if (strcmp (name, ".sdata") == 0 + || strncmp (name, ".sdata.", 7) == 0 + || strncmp (name, ".gnu.linkonce.s.", 16) == 0 + || strcmp (name, ".sbss") == 0 + || strncmp (name, ".sbss.", 6) == 0 + || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) + flags = SECTION_SMALL; + + flags |= default_section_type_flags (decl, name, reloc); + return flags; +} + +/* Structure to collect function names for final output in link section. */ +/* Note that items marked with GTY can't be ifdef'ed out. */ + +enum reloc_kind +{ + KIND_LINKAGE, + KIND_CODEADDR +}; + +struct GTY(()) alpha_links +{ + rtx func; + rtx linkage; + enum reloc_kind rkind; +}; + +#if TARGET_ABI_OPEN_VMS + +/* Return the VMS argument type corresponding to MODE. */ + +enum avms_arg_type +alpha_arg_type (enum machine_mode mode) +{ + switch (mode) + { + case SFmode: + return TARGET_FLOAT_VAX ? FF : FS; + case DFmode: + return TARGET_FLOAT_VAX ? FD : FT; + default: + return I64; + } +} + +/* Return an rtx for an integer representing the VMS Argument Information + register value. */ + +rtx +alpha_arg_info_reg_val (CUMULATIVE_ARGS cum) +{ + unsigned HOST_WIDE_INT regval = cum.num_args; + int i; + + for (i = 0; i < 6; i++) + regval |= ((int) cum.atypes[i]) << (i * 3 + 8); + + return GEN_INT (regval); +} + + +/* Return a SYMBOL_REF representing the reference to the .linkage entry + of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if + this is the reference to the linkage pointer value, 0 if this is the + reference to the function entry value. RFLAG is 1 if this a reduced + reference (code address only), 0 if this is a full reference. */ + +rtx +alpha_use_linkage (rtx func, bool lflag, bool rflag) +{ + struct alpha_links *al = NULL; + const char *name = XSTR (func, 0); + + if (cfun->machine->links) + { + splay_tree_node lnode; + + /* Is this name already defined? */ + lnode = splay_tree_lookup (cfun->machine->links, (splay_tree_key) name); + if (lnode) + al = (struct alpha_links *) lnode->value; + } + else + cfun->machine->links = splay_tree_new_ggc + ((splay_tree_compare_fn) strcmp, + ggc_alloc_splay_tree_str_alpha_links_splay_tree_s, + ggc_alloc_splay_tree_str_alpha_links_splay_tree_node_s); + + if (al == NULL) + { + size_t buf_len; + char *linksym; + tree id; + + if (name[0] == '*') + name++; + + /* Follow transparent alias, as this is used for CRTL translations. */ + id = maybe_get_identifier (name); + if (id) + { + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + id = TREE_CHAIN (id); + name = IDENTIFIER_POINTER (id); + } + + buf_len = strlen (name) + 8 + 9; + linksym = (char *) alloca (buf_len); + snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name); + + al = ggc_alloc_alpha_links (); + al->func = func; + al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym)); + + splay_tree_insert (cfun->machine->links, + (splay_tree_key) ggc_strdup (name), + (splay_tree_value) al); + } + + al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE; + + if (lflag) + return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8)); + else + return al->linkage; +} + +static int +alpha_write_one_linkage (splay_tree_node node, void *data) +{ + const char *const name = (const char *) node->key; + struct alpha_links *link = (struct alpha_links *) node->value; + FILE *stream = (FILE *) data; + + ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0)); + if (link->rkind == KIND_CODEADDR) + { + /* External and used, request code address. */ + fprintf (stream, "\t.code_address "); + } + else + { + if (!SYMBOL_REF_EXTERNAL_P (link->func) + && SYMBOL_REF_LOCAL_P (link->func)) + { + /* Locally defined, build linkage pair. */ + fprintf (stream, "\t.quad %s..en\n", name); + fprintf (stream, "\t.quad "); + } + else + { + /* External, request linkage pair. */ + fprintf (stream, "\t.linkage "); + } + } + assemble_name (stream, name); + fputs ("\n", stream); + + return 0; +} + +static void +alpha_write_linkage (FILE *stream, const char *funname) +{ + fprintf (stream, "\t.link\n"); + fprintf (stream, "\t.align 3\n"); + in_section = NULL; + +#ifdef TARGET_VMS_CRASH_DEBUG + fputs ("\t.name ", stream); + assemble_name (stream, funname); + fputs ("..na\n", stream); +#endif + + ASM_OUTPUT_LABEL (stream, funname); + fprintf (stream, "\t.pdesc "); + assemble_name (stream, funname); + fprintf (stream, "..en,%s\n", + alpha_procedure_type == PT_STACK ? "stack" + : alpha_procedure_type == PT_REGISTER ? "reg" : "null"); + + if (cfun->machine->links) + { + splay_tree_foreach (cfun->machine->links, alpha_write_one_linkage, stream); + /* splay_tree_delete (func->links); */ + } +} + +/* Switch to an arbitrary section NAME with attributes as specified + by FLAGS. ALIGN specifies any known alignment requirements for + the section; 0 if the default should be used. */ + +static void +vms_asm_named_section (const char *name, unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + fputc ('\n', asm_out_file); + fprintf (asm_out_file, ".section\t%s", name); + + if (flags & SECTION_DEBUG) + fprintf (asm_out_file, ",NOWRT"); + + fputc ('\n', asm_out_file); +} + +/* Record an element in the table of global constructors. SYMBOL is + a SYMBOL_REF of the function to be called; PRIORITY is a number + between 0 and MAX_INIT_PRIORITY. + + Differs from default_ctors_section_asm_out_constructor in that the + width of the .ctors entry is always 64 bits, rather than the 32 bits + used by a normal pointer. */ + +static void +vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + switch_to_section (ctors_section); + assemble_align (BITS_PER_WORD); + assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); +} + +static void +vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + switch_to_section (dtors_section); + assemble_align (BITS_PER_WORD); + assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1); +} +#else +rtx +alpha_use_linkage (rtx func ATTRIBUTE_UNUSED, + bool lflag ATTRIBUTE_UNUSED, + bool rflag ATTRIBUTE_UNUSED) +{ + return NULL_RTX; +} + +#endif /* TARGET_ABI_OPEN_VMS */ + +static void +alpha_init_libfuncs (void) +{ + if (TARGET_ABI_OPEN_VMS) + { + /* Use the VMS runtime library functions for division and + remainder. */ + set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); + set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); + set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); + set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); + set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); + set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); + set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); + set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); + abort_libfunc = init_one_libfunc ("decc$abort"); + memcmp_libfunc = init_one_libfunc ("decc$memcmp"); +#ifdef MEM_LIBFUNCS_INIT + MEM_LIBFUNCS_INIT; +#endif + } +} + +/* On the Alpha, we use this to disable the floating-point registers + when they don't exist. */ + +static void +alpha_conditional_register_usage (void) +{ + int i; + if (! TARGET_FPREGS) + for (i = 32; i < 63; i++) + fixed_regs[i] = call_used_regs[i] = 1; +} + +/* Canonicalize a comparison from one we don't have to one we do have. */ + +static void +alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) +{ + if (!op0_preserve_value + && (*code == GE || *code == GT || *code == GEU || *code == GTU) + && (REG_P (*op1) || *op1 == const0_rtx)) + { + rtx tem = *op0; + *op0 = *op1; + *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } + + if ((*code == LT || *code == LTU) + && CONST_INT_P (*op1) && INTVAL (*op1) == 256) + { + *code = *code == LT ? LE : LEU; + *op1 = GEN_INT (255); + } +} + +/* Initialize the GCC target structure. */ +#if TARGET_ABI_OPEN_VMS +# undef TARGET_ATTRIBUTE_TABLE +# define TARGET_ATTRIBUTE_TABLE vms_attribute_table +# undef TARGET_CAN_ELIMINATE +# define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate +#endif + +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" + +/* Default unaligned ops are provided for ELF systems. To get unaligned + data for non-ELF systems, we have to turn off auto alignment. */ +#if TARGET_ABI_OPEN_VMS +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t" +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t" +#endif + +#undef TARGET_ASM_RELOC_RW_MASK +#define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags + +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS alpha_init_libfuncs + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address +#undef TARGET_MODE_DEPENDENT_ADDRESS_P +#define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START alpha_file_start + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ + alpha_multipass_dfa_lookahead + +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS HAVE_AS_TLS + +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL alpha_builtin_decl +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS alpha_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN alpha_expand_builtin +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN alpha_fold_builtin + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem + +#if TARGET_ABI_OSF +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true +#undef TARGET_STDARG_OPTIMIZE_HOOK +#define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook +#endif + +/* Use 16-bits anchor. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1 +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET 0x7fff +#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P +#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS alpha_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY alpha_return_in_memory +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED +#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true +#undef TARGET_SPLIT_COMPLEX_ARG +#define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG alpha_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT alpha_trampoline_init + +#undef TARGET_INSTANTIATE_DECLS +#define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD alpha_secondary_reload + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list + +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start + +/* The Alpha architecture does not require sequential consistency. See + http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html + for an example of how it can be violated in practice. */ +#undef TARGET_RELAXED_ORDERING +#define TARGET_RELAXED_ORDERING true + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE alpha_option_override + +#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE alpha_mangle_type +#endif + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage + +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison + +struct gcc_target targetm = TARGET_INITIALIZER; + + +#include "gt-alpha.h" diff --git a/gcc-4.9/gcc/config/alpha/alpha.h b/gcc-4.9/gcc/config/alpha/alpha.h new file mode 100644 index 000000000..0ff793f14 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/alpha.h @@ -0,0 +1,1074 @@ +/* Definitions of target machine for GNU compiler, for DEC Alpha. + Copyright (C) 1992-2014 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__alpha"); \ + builtin_define ("__alpha__"); \ + builtin_assert ("cpu=alpha"); \ + builtin_assert ("machine=alpha"); \ + if (TARGET_CIX) \ + { \ + builtin_define ("__alpha_cix__"); \ + builtin_assert ("cpu=cix"); \ + } \ + if (TARGET_FIX) \ + { \ + builtin_define ("__alpha_fix__"); \ + builtin_assert ("cpu=fix"); \ + } \ + if (TARGET_BWX) \ + { \ + builtin_define ("__alpha_bwx__"); \ + builtin_assert ("cpu=bwx"); \ + } \ + if (TARGET_MAX) \ + { \ + builtin_define ("__alpha_max__"); \ + builtin_assert ("cpu=max"); \ + } \ + if (alpha_cpu == PROCESSOR_EV6) \ + { \ + builtin_define ("__alpha_ev6__"); \ + builtin_assert ("cpu=ev6"); \ + } \ + else if (alpha_cpu == PROCESSOR_EV5) \ + { \ + builtin_define ("__alpha_ev5__"); \ + builtin_assert ("cpu=ev5"); \ + } \ + else /* Presumably ev4. */ \ + { \ + builtin_define ("__alpha_ev4__"); \ + builtin_assert ("cpu=ev4"); \ + } \ + if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT) \ + builtin_define ("_IEEE_FP"); \ + if (TARGET_IEEE_WITH_INEXACT) \ + builtin_define ("_IEEE_FP_INEXACT"); \ + if (TARGET_LONG_DOUBLE_128) \ + builtin_define ("__LONG_DOUBLE_128__"); \ + \ + /* Macros dependent on the C dialect. */ \ + SUBTARGET_LANGUAGE_CPP_BUILTINS(); \ +} while (0) + +#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS +#define SUBTARGET_LANGUAGE_CPP_BUILTINS() \ + do \ + { \ + if (preprocessing_asm_p ()) \ + builtin_define_std ("LANGUAGE_ASSEMBLY"); \ + else if (c_dialect_cxx ()) \ + { \ + builtin_define ("__LANGUAGE_C_PLUS_PLUS"); \ + builtin_define ("__LANGUAGE_C_PLUS_PLUS__"); \ + } \ + else \ + builtin_define_std ("LANGUAGE_C"); \ + if (c_dialect_objc ()) \ + { \ + builtin_define ("__LANGUAGE_OBJECTIVE_C"); \ + builtin_define ("__LANGUAGE_OBJECTIVE_C__"); \ + } \ + } \ + while (0) +#endif + +/* Run-time compilation parameters selecting different hardware subsets. */ + +/* Which processor to schedule for. The cpu attribute defines a list that + mirrors this list, so changes to alpha.md must be made at the same time. */ + +enum processor_type +{ + PROCESSOR_EV4, /* 2106[46]{a,} */ + PROCESSOR_EV5, /* 21164{a,pc,} */ + PROCESSOR_EV6, /* 21264 */ + PROCESSOR_MAX +}; + +extern enum processor_type alpha_cpu; +extern enum processor_type alpha_tune; + +enum alpha_trap_precision +{ + ALPHA_TP_PROG, /* No precision (default). */ + ALPHA_TP_FUNC, /* Trap contained within originating function. */ + ALPHA_TP_INSN /* Instruction accuracy and code is resumption safe. */ +}; + +enum alpha_fp_rounding_mode +{ + ALPHA_FPRM_NORM, /* Normal rounding mode. */ + ALPHA_FPRM_MINF, /* Round towards minus-infinity. */ + ALPHA_FPRM_CHOP, /* Chopped rounding mode (towards 0). */ + ALPHA_FPRM_DYN /* Dynamic rounding mode. */ +}; + +enum alpha_fp_trap_mode +{ + ALPHA_FPTM_N, /* Normal trap mode. */ + ALPHA_FPTM_U, /* Underflow traps enabled. */ + ALPHA_FPTM_SU, /* Software completion, w/underflow traps */ + ALPHA_FPTM_SUI /* Software completion, w/underflow & inexact traps */ +}; + +extern enum alpha_trap_precision alpha_tp; +extern enum alpha_fp_rounding_mode alpha_fprm; +extern enum alpha_fp_trap_mode alpha_fptm; + +/* Invert the easy way to make options work. */ +#define TARGET_FP (!TARGET_SOFT_FP) + +/* These are for target os support and cannot be changed at runtime. */ +#define TARGET_ABI_OPEN_VMS 0 +#define TARGET_ABI_OSF (!TARGET_ABI_OPEN_VMS) + +#ifndef TARGET_CAN_FAULT_IN_PROLOGUE +#define TARGET_CAN_FAULT_IN_PROLOGUE 0 +#endif +#ifndef TARGET_HAS_XFLOATING_LIBS +#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128 +#endif +#ifndef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 0 +#endif +#ifndef TARGET_FIXUP_EV5_PREFETCH +#define TARGET_FIXUP_EV5_PREFETCH 0 +#endif +#ifndef HAVE_AS_TLS +#define HAVE_AS_TLS 0 +#endif + +#define TARGET_DEFAULT MASK_FPREGS + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT 0 +#endif + +#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS +#ifdef HAVE_AS_EXPLICIT_RELOCS +#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS +#define TARGET_SUPPORT_ARCH 1 +#else +#define TARGET_DEFAULT_EXPLICIT_RELOCS 0 +#endif +#endif + +#ifndef TARGET_SUPPORT_ARCH +#define TARGET_SUPPORT_ARCH 0 +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-cpu is ignored if -mcpu is specified. + --with-tune is ignored if -mtune is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}" }, \ + {"tune", "%{!mtune=*:-mtune=%(VALUE)}" } + + +/* target machine storage layout */ + +/* Define the size of `int'. The default is the same as the word size. */ +#define INT_TYPE_SIZE 32 + +/* Define the size of `long long'. The default is the twice the word size. */ +#define LONG_LONG_TYPE_SIZE 64 + +/* The two floating-point formats we support are S-floating, which is + 4 bytes, and T-floating, which is 8 bytes. `float' is S and `double' + and `long double' are T. */ + +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64) + +/* Define this to set long double type size to use in libgcc2.c, which can + not depend on target_flags. */ +#ifdef __LONG_DOUBLE_128__ +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128 +#else +#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64 +#endif + +/* Work around target_flags dependency in ada/targtyps.c. */ +#define WIDEST_HARDWARE_FP_SIZE 64 + +#define WCHAR_TYPE "unsigned int" +#define WCHAR_TYPE_SIZE 32 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. + + For Alpha, we always store objects in a full register. 32-bit integers + are always sign-extended, but smaller objects retain their signedness. + + Note that small vector types can get mapped onto integer modes at the + whim of not appearing in alpha-modes.def. We never promoted these + values before; don't do so now that we've trimmed the set of modes to + those actually implemented in the backend. */ + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE) \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ + { \ + if ((MODE) == SImode) \ + (UNSIGNEDP) = 0; \ + (MODE) = DImode; \ + } + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. + + There are no such instructions on the Alpha, but the documentation + is little endian. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + This is false on the Alpha. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is lowest + numbered. + + For Alpha we can decide arbitrarily since there are no machine instructions + for them. Might as well be consistent with bytes. */ +#define WORDS_BIG_ENDIAN 0 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 8 + +/* Width in bits of a pointer. + See also the macro `Pmode' defined below. */ +#define POINTER_SIZE 64 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 64 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY 128 + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY 64 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#undef PCC_BITFILED_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 128 + +/* For atomic access to objects, must have at least 32-bit alignment + unless the machine has byte operations. */ +#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32)) + +/* Align all constants and variables to at least a word boundary so + we can pick up pieces of them faster. */ +/* ??? Only if block-move stuff knows about different source/destination + alignment. */ +#if 0 +#define CONSTANT_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD) +#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD) +#endif + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. + + Since we get an error message when we do one, call them invalid. */ + +#define STRICT_ALIGNMENT 1 + +/* Set this nonzero if unaligned move instructions are extremely slow. + + On the Alpha, they trap. */ + +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. + + We define all 32 integer registers, even though $31 is always zero, + and all 32 floating-point registers, even though $f31 is also + always zero. We do not bother defining the FP status register and + there are no other registers. + + Since $31 is always zero, we will use register number 31 as the + argument pointer. It will never appear in the generated code + because we will always be eliminating it in favor of the stack + pointer or hardware frame pointer. + + Likewise, we use $f31 for the frame pointer, which will always + be eliminated in favor of the hardware frame pointer or the + stack pointer. */ + +#define FIRST_PSEUDO_REGISTER 64 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ + +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ +#define CALL_USED_REGISTERS \ + {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +/* List the order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. */ + +#define REG_ALLOC_ORDER { \ + 1, 2, 3, 4, 5, 6, 7, 8, /* nonsaved integer registers */ \ + 22, 23, 24, 25, 28, /* likewise */ \ + 0, /* likewise, but return value */ \ + 21, 20, 19, 18, 17, 16, /* likewise, but input args */ \ + 27, /* likewise, but OSF procedure value */ \ + \ + 42, 43, 44, 45, 46, 47, /* nonsaved floating-point registers */ \ + 54, 55, 56, 57, 58, 59, /* likewise */ \ + 60, 61, 62, /* likewise */ \ + 32, 33, /* likewise, but return values */ \ + 53, 52, 51, 50, 49, 48, /* likewise, but input args */ \ + \ + 9, 10, 11, 12, 13, 14, /* saved integer registers */ \ + 26, /* return address */ \ + 15, /* hard frame pointer */ \ + \ + 34, 35, 36, 37, 38, 39, /* saved floating-point registers */ \ + 40, 41, /* likewise */ \ + \ + 29, 30, 31, 63 /* gp, sp, ap, sfp */ \ +} + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. + On Alpha, the integer registers can hold any mode. The floating-point + registers can hold 64-bit integers as well, but not smaller values. */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (IN_RANGE ((REGNO), 32, 62) \ + ? (MODE) == SFmode || (MODE) == DFmode || (MODE) == DImode \ + || (MODE) == SCmode || (MODE) == DCmode \ + : 1) + +/* A C expression that is nonzero if a value of mode + MODE1 is accessible in mode MODE2 without copying. + + This asymmetric test is true when MODE1 could be put + in an FP register but MODE2 could not. */ + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (HARD_REGNO_MODE_OK (32, (MODE1)) \ + ? HARD_REGNO_MODE_OK (32, (MODE2)) \ + : 1) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* Alpha pc isn't overloaded on a register that the compiler knows about. */ +/* #define PC_REGNUM */ + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 30 + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM 15 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 31 + +/* Base register for access to local variables of function. */ +#define FRAME_POINTER_REGNUM 63 + +/* Register in which static-chain is passed to a function. + + For the Alpha, this is based on an example; the calling sequence + doesn't seem to specify this. */ +#define STATIC_CHAIN_REGNUM 1 + +/* The register number of the register used to address a table of + static data addresses in memory. */ +#define PIC_OFFSET_TABLE_REGNUM 29 + +/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM' + is clobbered by calls. */ +/* ??? It is and it isn't. It's required to be valid for a given + function when the function returns. It isn't clobbered by + current_file functions. Moreover, we do not expose the ldgp + until after reload, so we're probably safe. */ +/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + +enum reg_class { + NO_REGS, R0_REG, R24_REG, R25_REG, R27_REG, + GENERAL_REGS, FLOAT_REGS, ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ + {"NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG", \ + "GENERAL_REGS", "FLOAT_REGS", "ALL_REGS" } + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ {0x00000000, 0x00000000}, /* NO_REGS */ \ + {0x00000001, 0x00000000}, /* R0_REG */ \ + {0x01000000, 0x00000000}, /* R24_REG */ \ + {0x02000000, 0x00000000}, /* R25_REG */ \ + {0x08000000, 0x00000000}, /* R27_REG */ \ + {0xffffffff, 0x80000000}, /* GENERAL_REGS */ \ + {0x00000000, 0x7fffffff}, /* FLOAT_REGS */ \ + {0xffffffff, 0xffffffff} } + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == 0 ? R0_REG \ + : (REGNO) == 24 ? R24_REG \ + : (REGNO) == 25 ? R25_REG \ + : (REGNO) == 27 ? R27_REG \ + : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS \ + : GENERAL_REGS) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS NO_REGS +#define BASE_REG_CLASS GENERAL_REGS + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ + +#define PREFERRED_RELOAD_CLASS alpha_preferred_reload_class + +/* If we are copying between general and FP registers, we need a memory + location unless the FIX extension is available. */ + +#define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE) \ + (! TARGET_FIX && (((CLASS1) == FLOAT_REGS && (CLASS2) != FLOAT_REGS) \ + || ((CLASS2) == FLOAT_REGS && (CLASS1) != FLOAT_REGS))) + +/* Specify the mode to be used for memory when a secondary memory + location is needed. If MODE is floating-point, use it. Otherwise, + widen to a word like the default. This is needed because we always + store integers in FP registers in quadword format. This whole + area is very tricky! */ +#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \ + (GET_MODE_CLASS (MODE) == MODE_FLOAT ? (MODE) \ + : GET_MODE_SIZE (MODE) >= 4 ? (MODE) \ + : mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (MODE), 0)) + +/* Return the class of registers that cannot change mode from FROM to TO. */ + +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) \ + ? reg_classes_intersect_p (FLOAT_REGS, CLASS) : 0) + +/* Define the cost of moving between registers of various classes. Moving + between FLOAT_REGS and anything else except float regs is expensive. + In fact, we make it quite expensive because we really don't want to + do these moves unless it is clearly worth it. Optimizations may + reduce the impact of not being able to allocate a pseudo to a + hard register. */ + +#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \ + (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) ? 2 \ + : TARGET_FIX ? ((CLASS1) == FLOAT_REGS ? 6 : 8) \ + : 4+2*alpha_memory_latency) + +/* A C expressions returning the cost of moving data of MODE from a register to + or from memory. + + On the Alpha, bump this up a bit. */ + +extern int alpha_memory_latency; +#define MEMORY_MOVE_COST(MODE,CLASS,IN) (2*alpha_memory_latency) + +/* Provide the cost of a branch. Exact meaning under development. */ +#define BRANCH_COST(speed_p, predictable_p) 5 + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +/* #define FRAME_GROWS_DOWNWARD 0 */ + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ + +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. + On Alpha, don't define this because there are no push insns. */ +/* #define PUSH_ROUNDING(BYTES) */ + +/* Define this to be nonzero if stack checking is built into the ABI. */ +#define STACK_CHECK_BUILTIN 1 + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset of first parameter from the argument pointer register value. */ + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +/* Definitions for register eliminations. + + We have two registers that can be eliminated on the Alpha. First, the + frame pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the argument pointer register can always be + eliminated; it is replaced with either the stack or frame pointer. */ + +/* This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} + +/* Round up to a multiple of 16 bytes. */ +#define ALPHA_ROUND(X) (((X) + 15) & ~ 15) + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = alpha_initial_elimination_offset(FROM, TO)) + +/* Define this if stack space is still allocated for a parameter passed + in a register. */ +/* #define REG_PARM_STACK_SPACE */ + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. + + On Alpha the value is found in $0 for integer functions and + $f0 for floating-point functions. */ + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + function_value (VALTYPE, FUNC, VOIDmode) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +#define LIBCALL_VALUE(MODE) \ + function_value (NULL, NULL, MODE) + +/* 1 if N is a possible register number for a function value + as seen by the caller. */ + +#define FUNCTION_VALUE_REGNO_P(N) \ + ((N) == 0 || (N) == 1 || (N) == 32 || (N) == 33) + +/* 1 if N is a possible register number for function argument passing. + On Alpha, these are $16-$21 and $f16-$f21. */ + +#define FUNCTION_ARG_REGNO_P(N) \ + (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32)) + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On Alpha, this is a single integer, which is a number of words + of arguments scanned so far. + Thus 6 or more means all following args should go on the stack. */ + +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM) = 0 + +/* Define intermediate macro to compute the size (in registers) of an argument + for the Alpha. */ + +#define ALPHA_ARG_SIZE(MODE, TYPE, NAMED) \ + ((MODE) == TFmode || (MODE) == TCmode ? 1 \ + : (((MODE) == BLKmode ? int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) \ + + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD) + +/* Make (or fake) .linkage entry for function call. + IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */ + +/* This macro defines the start of an assembly comment. */ + +#define ASM_COMMENT_START " #" + +/* This macro produces the initial definition of a function. */ + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \ + alpha_start_function(FILE,NAME,DECL); + +/* This macro closes up a function definition for the assembler. */ + +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE,NAME,DECL) \ + alpha_end_function(FILE,NAME,DECL) + +/* Output any profiling code before the prologue. */ + +#define PROFILE_BEFORE_PROLOGUE 1 + +/* Never use profile counters. */ + +#define NO_PROFILE_COUNTERS 1 + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. Under OSF/1, profiling is enabled + by simply passing -pg to the assembler and linker. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ + +#define EXIT_IGNORE_STACK 1 + +/* Define registers used by the epilogue and return instruction. */ + +#define EPILOGUE_USES(REGNO) ((REGNO) == 26) + +/* Length in units of the trampoline for entering a nested function. */ + +#define TRAMPOLINE_SIZE 32 + +/* The alignment of a trampoline, in bits. */ + +#define TRAMPOLINE_ALIGNMENT 64 + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. + FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of + the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined. */ + +#define RETURN_ADDR_RTX alpha_return_addr + +/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders + can use DWARF_ALT_FRAME_RETURN_COLUMN defined below. This is just the same + as the default definition in dwarf2out.c. */ +#undef DWARF_FRAME_REGNUM +#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG) + +/* Before the prologue, RA lives in $26. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 26) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26) +#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64) +#define DWARF_ZERO_REG 31 + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 16 : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 28) +#define EH_RETURN_HANDLER_RTX \ + gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx, \ + crtl->outgoing_args_size)) + +/* Addressing modes, and classification of registers for them. */ + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in reginfo.c during register + allocation. */ + +#define REGNO_OK_FOR_INDEX_P(REGNO) 0 +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32 \ + || (REGNO) == 63 || reg_renumber[REGNO] == 63) + +/* Maximum number of registers that can appear in a valid memory address. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* Recognize any constant value that is a valid address. For the Alpha, + there are only constants none since we want to use LDA to load any + symbolic addresses into registers. */ + +#define CONSTANT_ADDRESS_P(X) \ + (CONST_INT_P (X) \ + && (unsigned HOST_WIDE_INT) (INTVAL (X) + 0x8000) < 0x10000) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P(X) 0 + +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define NONSTRICT_REG_OK_FOR_BASE_P(X) \ + (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER) + +/* ??? Nonzero if X is the frame pointer, or some virtual register + that may eliminate to the frame pointer. These will be allowed to + have offsets greater than 32K. This is done because register + elimination offsets will change the hi/lo split, and if we split + before reload, we will require additional instructions. */ +#define NONSTRICT_REG_OK_FP_BASE_P(X) \ + (REGNO (X) == 31 || REGNO (X) == 63 \ + || (REGNO (X) >= FIRST_PSEUDO_REGISTER \ + && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER)) + +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P (X) +#else +#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P (X) +#endif + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN) \ +do { \ + rtx new_x = alpha_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ +} while (0) + + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE SImode + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + + Do not define this if the table should contain absolute addresses. + On the Alpha, the table is really GP-relative, not relative to the PC + of the table, but we pretend that it is PC-relative; this should be OK, + but we should try to find some better way sometime. */ +#define CASE_VECTOR_PC_RELATIVE 1 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Max number of bytes we can move to or from memory + in one reasonably fast instruction. */ + +#define MOVE_MAX 8 + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. + + Without byte/word accesses, we want no more than four instructions; + with, several single byte accesses are better. */ + +#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2) + +/* Largest number of bytes of an object that can be placed in a register. + On the Alpha we have plenty of registers, so use TImode. */ +#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode) + +/* Nonzero if access to memory by bytes is no faster than for words. + Also nonzero if doing byte operations (specifically shifts) in registers + is undesirable. + + On the Alpha, we want to not use the byte operation and instead use + masking operations to access fields; these will save instructions. */ + +#define SLOW_BYTE_ACCESS 1 + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND) + +/* Define if loading short immediate values into registers sign extends. */ +#define SHORT_IMMEDIATES_SIGN_EXTEND + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* The CIX ctlz and cttz instructions return 64 for zero. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, TARGET_CIX) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, TARGET_CIX) + +/* Define the value returned by a floating-point comparison instruction. */ + +#define FLOAT_STORE_FLAG_VALUE(MODE) \ + REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE)) + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +#define Pmode DImode + +/* Mode of a function address in a call instruction (for indexing purposes). */ + +#define FUNCTION_MODE Pmode + +/* Define this if addresses of constant functions + shouldn't be put through pseudo regs where they can be cse'd. + Desirable on machines where ordinary constants are expensive + but a CALL with constant address is cheap. + + We define this on the Alpha so that gen_call and gen_call_value + get to see the SYMBOL_REF (for the hint field of the jsr). It will + then copy it into a register, thus actually letting the address be + cse'ed. */ + +#define NO_FUNCTION_CSE + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Control the assembler format that we output. */ + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "") + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "") + +#define TEXT_SECTION_ASM_OP "\t.text" + +/* Output before writable data. */ + +#define DATA_SECTION_ASM_OP "\t.data" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ +{"$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", \ + "$9", "$10", "$11", "$12", "$13", "$14", "$15", \ + "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", \ + "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP", \ + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", \ + "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", \ + "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",\ + "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "FP"} + +/* Strip name encoding when emitting labels. */ + +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ +do { \ + const char *name_ = NAME; \ + if (*name_ == '@' || *name_ == '%') \ + name_ += 2; \ + if (*name_ == '*') \ + name_++; \ + else \ + fputs (user_label_prefix, STREAM); \ + fputs (name_, STREAM); \ +} while (0) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +/* Use dollar signs rather than periods in special g++ assembler names. */ + +#undef NO_DOLLAR_IN_LABEL + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf ((LABEL), "*$%s%ld", (PREFIX), (long)(NUM)) + +/* This is how to output an element of a case-vector that is relative. */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + fprintf (FILE, "\t.gprel32 $L%d\n", (VALUE)) + + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +#define PRINT_OPERAND(FILE, X, CODE) print_operand (FILE, X, CODE) + +/* Determine which codes are valid without a following integer. These must + not be alphabetic. + + ~ Generates the name of the current function. + + / Generates the instruction suffix. The TRAP_SUFFIX and ROUND_SUFFIX + attributes are examined to determine what is appropriate. + + , Generates single precision suffix for floating point + instructions (s for IEEE, f for VAX) + + - Generates double precision suffix for floating point + instructions (t for IEEE, g for VAX) + */ + +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \ + ((CODE) == '/' || (CODE) == ',' || (CODE) == '-' || (CODE) == '~' \ + || (CODE) == '#' || (CODE) == '*' || (CODE) == '&') + +/* Print a memory address as an operand to reference that memory location. */ + +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ + print_operand_address((FILE), (ADDR)) + +/* If we use NM, pass -g to it so it only lists globals. */ +#define NM_FLAGS "-pg" + +/* Definitions for debugging. */ + +/* Correct the offset of automatic variables and arguments. Note that + the Alpha debug format wants all automatic variables and arguments + to be in terms of two different offsets from the virtual frame pointer, + which is the stack pointer before any adjustment in the function. + The offset for the argument pointer is fixed for the native compiler, + it is either zero (for the no arguments case) or large enough to hold + all argument registers. + The offset for the auto pointer is the fourth argument to the .frame + directive (local_offset). + To stay compatible with the native tools we use the same offsets + from the virtual frame pointer and adjust the debugger arg/auto offsets + accordingly. These debugger offsets are set up in output_prolog. */ + +extern long alpha_arg_offset; +extern long alpha_auto_offset; +#define DEBUGGER_AUTO_OFFSET(X) \ + ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + alpha_auto_offset) +#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + alpha_arg_offset) + +#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME) \ + alpha_output_filename (STREAM, NAME) + +/* By default, turn on GDB extensions. */ +#define DEFAULT_GDB_EXTENSIONS 1 + +/* The system headers under Alpha systems are generally C++-aware. */ +#define NO_IMPLICIT_EXTERN_C diff --git a/gcc-4.9/gcc/config/alpha/alpha.md b/gcc-4.9/gcc/config/alpha/alpha.md new file mode 100644 index 000000000..795b4df3f --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/alpha.md @@ -0,0 +1,6113 @@ +;; Machine description for DEC Alpha for GNU C compiler +;; Copyright (C) 1992-2014 Free Software Foundation, Inc. +;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; Uses of UNSPEC in this file: + +(define_c_enum "unspec" [ + UNSPEC_XFLT_COMPARE + UNSPEC_ARG_HOME + UNSPEC_LDGP1 + UNSPEC_INSXH + UNSPEC_MSKXH + UNSPEC_CVTQL + UNSPEC_CVTLQ + UNSPEC_LDGP2 + UNSPEC_LITERAL + UNSPEC_LITUSE + UNSPEC_SIBCALL + UNSPEC_SYMBOL + + ;; TLS Support + UNSPEC_TLSGD_CALL + UNSPEC_TLSLDM_CALL + UNSPEC_TLSGD + UNSPEC_TLSLDM + UNSPEC_DTPREL + UNSPEC_TPREL + UNSPEC_TP + + ;; Builtins + UNSPEC_CMPBGE + UNSPEC_ZAP + UNSPEC_AMASK + UNSPEC_IMPLVER + UNSPEC_PERR + UNSPEC_COPYSIGN + + ;; Atomic operations + UNSPEC_MB + UNSPEC_ATOMIC + UNSPEC_CMPXCHG + UNSPEC_XCHG +]) + +;; UNSPEC_VOLATILE: + +(define_c_enum "unspecv" [ + UNSPECV_IMB + UNSPECV_BLOCKAGE + UNSPECV_SETJMPR ; builtin_setjmp_receiver + UNSPECV_LONGJMP ; builtin_longjmp + UNSPECV_TRAPB + UNSPECV_PSPL ; prologue_stack_probe_loop + UNSPECV_REALIGN + UNSPECV_EHR ; exception_receiver + UNSPECV_MCOUNT + UNSPECV_FORCE_MOV + UNSPECV_LDGP1 + UNSPECV_PLDGP2 ; prologue ldgp + UNSPECV_SET_TP + UNSPECV_RPCC + UNSPECV_SETJMPR_ER ; builtin_setjmp_receiver fragment + UNSPECV_LL ; load-locked + UNSPECV_SC ; store-conditional + UNSPECV_CMPXCHG +]) + +;; On non-BWX targets, CQImode must be handled the similarly to HImode +;; when generating reloads. +(define_mode_iterator RELOAD12 [QI HI CQI]) +(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")]) + +;; Other mode iterators +(define_mode_iterator IMODE [QI HI SI DI]) +(define_mode_iterator I12MODE [QI HI]) +(define_mode_iterator I124MODE [QI HI SI]) +(define_mode_iterator I24MODE [HI SI]) +(define_mode_iterator I248MODE [HI SI DI]) +(define_mode_iterator I48MODE [SI DI]) + +(define_mode_attr DWI [(SI "DI") (DI "TI")]) +(define_mode_attr modesuffix [(QI "b") (HI "w") (SI "l") (DI "q") + (V8QI "b8") (V4HI "w4") + (SF "%,") (DF "%-")]) +(define_mode_attr vecmodesuffix [(QI "b8") (HI "w4")]) + +(define_code_iterator any_maxmin [smax smin umax umin]) + +(define_code_attr maxmin [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) + +;; Where necessary, the suffixes _le and _be are used to distinguish between +;; little-endian and big-endian patterns. +;; +;; Note that the Unicos/Mk assembler does not support the following +;; opcodes: mov, fmov, nop, fnop, unop. + +;; Processor type -- this attribute must exactly match the processor_type +;; enumeration in alpha.h. + +(define_attr "tune" "ev4,ev5,ev6" + (const (symbol_ref "((enum attr_tune) alpha_tune)"))) + +;; Define an insn type attribute. This is used in function unit delay +;; computations, among other purposes. For the most part, we use the names +;; defined in the EV4 documentation, but add a few that we have to know about +;; separately. + +(define_attr "type" + "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,jsr,iadd,ilog,shift,icmov,fcmov, + icmp,imul,fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c, + multi,none" + (const_string "iadd")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "multi")]) + +;; Define the operand size an insn operates on. Used primarily by mul +;; and div operations that have size dependent timings. + +(define_attr "opsize" "si,di,udi" + (const_string "di")) + +;; The TRAP attribute marks instructions that may generate traps +;; (which are imprecise and may need a trapb if software completion +;; is desired). + +(define_attr "trap" "no,yes" + (const_string "no")) + +;; The ROUND_SUFFIX attribute marks which instructions require a +;; rounding-mode suffix. The value NONE indicates no suffix, +;; the value NORMAL indicates a suffix controlled by alpha_fprm. + +(define_attr "round_suffix" "none,normal,c" + (const_string "none")) + +;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix: +;; NONE no suffix +;; SU accepts only /su (cmpt et al) +;; SUI accepts only /sui (cvtqt and cvtqs) +;; V_SV accepts /v and /sv (cvtql only) +;; V_SV_SVI accepts /v, /sv and /svi (cvttq only) +;; U_SU_SUI accepts /u, /su and /sui (most fp instructions) +;; +;; The actual suffix emitted is controlled by alpha_fptm. + +(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui" + (const_string "none")) + +;; The length of an instruction sequence in bytes. + +(define_attr "length" "" + (const_int 4)) + +;; The USEGP attribute marks instructions that have relocations that use +;; the GP. + +(define_attr "usegp" "no,yes" + (cond [(eq_attr "type" "ldsym,jsr") + (const_string "yes") + (eq_attr "type" "ild,fld,ist,fst") + (symbol_ref "((enum attr_usegp) alpha_find_lo_sum_using_gp (insn))") + ] + (const_string "no"))) + +;; The CANNOT_COPY attribute marks instructions with relocations that +;; cannot easily be duplicated. This includes insns with gpdisp relocs +;; since they have to stay in 1-1 correspondence with one another. This +;; also includes jsr insns, since they must stay in correspondence with +;; the immediately following gpdisp instructions. + +(define_attr "cannot_copy" "false,true" + (const_string "false")) + +;; Used to control the "enabled" attribute on a per-instruction basis. +;; For convenience, conflate ABI issues re loading of addresses with +;; an "isa". +(define_attr "isa" "base,bwx,max,fix,cix,vms,ner,er" + (const_string "base")) + +(define_attr "enabled" "" + (cond [(eq_attr "isa" "bwx") (symbol_ref "TARGET_BWX") + (eq_attr "isa" "max") (symbol_ref "TARGET_MAX") + (eq_attr "isa" "fix") (symbol_ref "TARGET_FIX") + (eq_attr "isa" "cix") (symbol_ref "TARGET_CIX") + (eq_attr "isa" "vms") (symbol_ref "TARGET_ABI_OPEN_VMS") + (eq_attr "isa" "ner") (symbol_ref "!TARGET_EXPLICIT_RELOCS") + (eq_attr "isa" "er") (symbol_ref "TARGET_EXPLICIT_RELOCS") + ] + (const_int 1))) + +;; Include scheduling descriptions. + +(include "ev4.md") +(include "ev5.md") +(include "ev6.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; First define the arithmetic insns. Note that the 32-bit forms also +;; sign-extend. + +;; Handle 32-64 bit extension from memory to a floating point register +;; specially, since this occurs frequently in int->double conversions. +;; +;; Note that while we must retain the =f case in the insn for reload's +;; benefit, it should be eliminated after reload, so we should never emit +;; code for that case. But we don't reject the possibility. + +(define_expand "extendsidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]) + +(define_insn "*cvtlq" + [(set (match_operand:DI 0 "register_operand" "=f") + (unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTLQ))] + "" + "cvtlq %1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*extendsidi2_1" + [(set (match_operand:DI 0 "register_operand" "=r,r,!*f") + (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))] + "" + "@ + addl $31,%1,%0 + ldl %0,%1 + lds %0,%1\;cvtlq %0,%0" + [(set_attr "type" "iadd,ild,fld") + (set_attr "length" "*,*,8")]) + +(define_split + [(set (match_operand:DI 0 "hard_fp_register_operand") + (sign_extend:DI (match_operand:SI 1 "memory_operand")))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0])); +}) + +;; Optimize sign-extension of SImode loads. This shows up in the wake of +;; reload when converting fp->int. + +(define_peephole2 + [(set (match_operand:SI 0 "hard_int_register_operand") + (match_operand:SI 1 "memory_operand")) + (set (match_operand:DI 2 "hard_int_register_operand") + (sign_extend:DI (match_dup 0)))] + "true_regnum (operands[0]) == true_regnum (operands[2]) + || peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (sign_extend:DI (match_dup 1)))]) + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ") + (match_operand:SI 2 "add_operand" "rI,O,K,L")))] + "" + "@ + addl %r1,%2,%0 + subl %r1,%n2,%0 + lda %0,%2(%r1) + ldah %0,%h2(%r1)") + +(define_split + [(set (match_operand:SI 0 "register_operand") + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "! add_operand (operands[2], SImode)" + [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000); + HOST_WIDE_INT rest = val - low; + + operands[3] = GEN_INT (rest); + operands[4] = GEN_INT (low); +}) + +(define_insn "*addsi_se" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "sext_add_operand" "rI,O"))))] + "" + "@ + addl %r1,%2,%0 + subl %r1,%n2,%0") + +(define_insn "*addsi_se2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "sext_add_operand" "rI,O")) + 0)))] + "" + "@ + addl %r1,%2,%0 + subl %r1,%n2,%0") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI + (plus:SI (match_operand:SI 1 "reg_not_elim_operand") + (match_operand:SI 2 "const_int_operand")))) + (clobber (match_operand:SI 3 "reg_not_elim_operand"))] + "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) % 4 == 0" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3) + (match_dup 5)) + (match_dup 1))))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]) / 4; + int mult = 4; + + if (val % 2 == 0) + val /= 2, mult = 8; + + operands[4] = GEN_INT (val); + operands[5] = GEN_INT (mult); +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI + (plus:SI (match_operator:SI 1 "comparison_operator" + [(match_operand 2) + (match_operand 3)]) + (match_operand:SI 4 "add_operand")))) + (clobber (match_operand:DI 5 "register_operand"))] + "" + [(set (match_dup 5) (match_dup 6)) + (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))] +{ + operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, + operands[2], operands[3]); + operands[7] = gen_lowpart (SImode, operands[5]); +}) + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "add_operand")))]) + +(define_insn "*adddi_er_lo16_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "dtp16_symbolic_operand")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!dtprel") + +(define_insn "*adddi_er_hi32_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "dtp32_symbolic_operand"))))] + "HAVE_AS_TLS" + "ldah %0,%2(%1)\t\t!dtprelhi") + +(define_insn "*adddi_er_lo32_dtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "dtp32_symbolic_operand")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!dtprello") + +(define_insn "*adddi_er_lo16_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "tp16_symbolic_operand")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!tprel") + +(define_insn "*adddi_er_hi32_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "tp32_symbolic_operand"))))] + "HAVE_AS_TLS" + "ldah %0,%2(%1)\t\t!tprelhi") + +(define_insn "*adddi_er_lo32_tp" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "tp32_symbolic_operand")))] + "HAVE_AS_TLS" + "lda %0,%2(%1)\t\t!tprello") + +(define_insn "*adddi_er_high_l" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (high:DI (match_operand:DI 2 "local_symbolic_operand"))))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + "ldah %0,%2(%1)\t\t!gprelhigh" + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (high:DI (match_operand:DI 1 "local_symbolic_operand")))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:DI (match_dup 2) (high:DI (match_dup 1))))] + "operands[2] = pic_offset_table_rtx;") + +;; We used to expend quite a lot of effort choosing addq/subq/lda. +;; With complications like +;; +;; The NT stack unwind code can't handle a subq to adjust the stack +;; (that's a bug, but not one we can do anything about). As of NT4.0 SP3, +;; the exception handling code will loop if a subq is used and an +;; exception occurs. +;; +;; The 19980616 change to emit prologues as RTL also confused some +;; versions of GDB, which also interprets prologues. This has been +;; fixed as of GDB 4.18, but it does not harm to unconditionally +;; use lda here. +;; +;; and the fact that the three insns schedule exactly the same, it's +;; just not worth the effort. + +(define_insn "*adddi_internal" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%r,r,r") + (match_operand:DI 2 "add_operand" "r,K,L")))] + "" + "@ + addq %1,%2,%0 + lda %0,%2(%1) + ldah %0,%h2(%1)") + +;; ??? Allow large constants when basing off the frame pointer or some +;; virtual register that may eliminate to the frame pointer. This is +;; done because register elimination offsets will change the hi/lo split, +;; and if we split before reload, we will require additional instructions. + +(define_insn "*adddi_fp_hack" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r") + (match_operand:DI 2 "const_int_operand" "K,L,n")))] + "NONSTRICT_REG_OK_FP_BASE_P (operands[1]) + && INTVAL (operands[2]) >= 0 + /* This is the largest constant an lda+ldah pair can add, minus + an upper bound on the displacement between SP and AP during + register elimination. See INITIAL_ELIMINATION_OFFSET. */ + && INTVAL (operands[2]) + < (0x7fff8000 + - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD + - ALPHA_ROUND(crtl->outgoing_args_size) + - (ALPHA_ROUND (get_frame_size () + + max_reg_num () * UNITS_PER_WORD + + crtl->args.pretend_args_size) + - crtl->args.pretend_args_size))" + "@ + lda %0,%2(%1) + ldah %0,%h2(%1) + #") + +;; Don't do this if we are adjusting SP since we don't want to do it +;; in two steps. Don't split FP sources for the reason listed above. +(define_split + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_int_operand")))] + "! add_operand (operands[2], DImode) + && operands[0] != stack_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[1] != arg_pointer_rtx" + [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))] +{ + HOST_WIDE_INT val = INTVAL (operands[2]); + HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000); + HOST_WIDE_INT rest = val - low; + rtx rest_rtx = GEN_INT (rest); + + operands[4] = GEN_INT (low); + if (satisfies_constraint_L (rest_rtx)) + operands[3] = rest_rtx; + else if (can_create_pseudo_p ()) + { + operands[3] = gen_reg_rtx (DImode); + emit_move_insn (operands[3], operands[2]); + emit_insn (gen_adddi3 (operands[0], operands[1], operands[3])); + DONE; + } + else + FAIL; +}) + +(define_insn "*sadd" + [(set (match_operand:I48MODE 0 "register_operand" "=r,r") + (plus:I48MODE + (mult:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r,r") + (match_operand:I48MODE 2 "const48_operand" "I,I")) + (match_operand:I48MODE 3 "sext_add_operand" "rI,O")))] + "" + "@ + s%2add %1,%3,%0 + s%2sub %1,%n3,%0") + +(define_insn "*saddl_se" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") + (match_operand:SI 2 "const48_operand" "I,I")) + (match_operand:SI 3 "sext_add_operand" "rI,O"))))] + "" + "@ + s%2addl %1,%3,%0 + s%2subl %1,%n3,%0") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI + (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator" + [(match_operand 2) + (match_operand 3)]) + (match_operand:SI 4 "const48_operand")) + (match_operand:SI 5 "sext_add_operand")))) + (clobber (match_operand:DI 6 "reg_not_elim_operand"))] + "" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4)) + (match_dup 5))))] +{ + operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, + operands[2], operands[3]); + operands[8] = gen_lowpart (SImode, operands[6]); +}) + +(define_insn "addv3" + [(set (match_operand:I48MODE 0 "register_operand" "=r,r") + (plus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:I48MODE 2 "sext_add_operand" "rI,O"))) + (trap_if (ne (plus: (sign_extend: (match_dup 1)) + (sign_extend: (match_dup 2))) + (sign_extend: (plus:I48MODE (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "@ + addv %r1,%2,%0 + subv %r1,%n2,%0") + +(define_insn "neg2" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (neg:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")))] + "" + "sub $31,%1,%0") + +(define_insn "*negsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (neg:SI + (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))] + "" + "subl $31,%1,%0") + +(define_insn "negv2" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (neg:I48MODE (match_operand:I48MODE 1 "register_operand" "r"))) + (trap_if (ne (neg: (sign_extend: (match_dup 1))) + (sign_extend: (neg:I48MODE (match_dup 1)))) + (const_int 0))] + "" + "subv $31,%1,%0") + +(define_insn "sub3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ") + (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))] + "" + "sub %r1,%2,%0") + +(define_insn "*subsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))] + "" + "subl %r1,%2,%0") + +(define_insn "*subsi_se2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")) + 0)))] + "" + "subl %r1,%2,%0") + +(define_insn "*ssub" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (minus:I48MODE + (mult:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r") + (match_operand:I48MODE 2 "const48_operand" "I")) + (match_operand:I48MODE 3 "reg_or_8bit_operand" "rI")))] + "" + "s%2sub %1,%3,%0") + +(define_insn "*ssubl_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r") + (match_operand:SI 2 "const48_operand" "I")) + (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))] + "" + "s%2subl %1,%3,%0") + +(define_insn "subv3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ") + (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI"))) + (trap_if (ne (minus: (sign_extend: (match_dup 1)) + (sign_extend: (match_dup 2))) + (sign_extend: (minus:I48MODE (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "subv %r1,%2,%0") + +(define_insn "mul3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ") + (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))] + "" + "mul %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "")]) + +(define_insn "*mulsi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ") + (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))] + "" + "mull %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "si")]) + +(define_insn "mulv3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ") + (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI"))) + (trap_if (ne (mult: (sign_extend: (match_dup 1)) + (sign_extend: (match_dup 2))) + (sign_extend: (mult:I48MODE (match_dup 1) + (match_dup 2)))) + (const_int 0))] + "" + "mulv %r1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "")]) + +(define_expand "umuldi3_highpart" + [(set (match_operand:DI 0 "register_operand") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand")) + (match_operand:DI 2 "reg_or_8bit_operand")) + (const_int 64))))] + "" +{ + if (REG_P (operands[2])) + operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]); +}) + +(define_insn "*umuldi3_highpart_reg" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand" "r")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r"))) + (const_int 64))))] + "" + "umulh %1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "udi")]) + +(define_insn "*umuldi3_highpart_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) + (match_operand:TI 2 "cint8_operand" "I")) + (const_int 64))))] + "" + "umulh %1,%2,%0" + [(set_attr "type" "imul") + (set_attr "opsize" "udi")]) + +(define_expand "umulditi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI + (zero_extend:TI (match_operand:DI 1 "reg_no_subreg_operand")) + (zero_extend:TI (match_operand:DI 2 "reg_no_subreg_operand"))))] + "" +{ + rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (l, operands[1], operands[2])); + emit_insn (gen_umuldi3_highpart (h, operands[1], operands[2])); + emit_move_insn (gen_lowpart (DImode, operands[0]), l); + emit_move_insn (gen_highpart (DImode, operands[0]), h); + DONE; +}) + +;; The divide and remainder operations take their inputs from r24 and +;; r25, put their output in r27, and clobber r23 and r28 on all systems. +;; +;; ??? Force sign-extension here because some versions of OSF/1 and +;; Interix/NT don't do the right thing if the inputs are not properly +;; sign-extended. But Linux, for instance, does not have this +;; problem. Is it worth the complication here to eliminate the sign +;; extension? + +(define_code_iterator any_divmod [div mod udiv umod]) + +(define_expand "si3" + [(set (match_dup 3) + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand"))) + (set (match_dup 4) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand"))) + (parallel [(set (match_dup 5) + (sign_extend:DI + (any_divmod:SI (match_dup 3) (match_dup 4)))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))]) + (set (match_operand:SI 0 "nonimmediate_operand") + (subreg:SI (match_dup 5) 0))] + "TARGET_ABI_OSF" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DImode); +}) + +(define_expand "di3" + [(parallel [(set (match_operand:DI 0 "register_operand") + (any_divmod:DI + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "register_operand"))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] + "TARGET_ABI_OSF") + +;; Lengths of 8 for ldq $t12,__divq($gp); jsr $t9,($t12),__divq as +;; expanded by the assembler. + +(define_insn_and_split "*divmodsi_internal_er" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (sign_extend:DI (match_dup 3))) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] +{ + const char *str; + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divl"; + break; + case UDIV: + str = "__divlu"; + break; + case MOD: + str = "__reml"; + break; + case UMOD: + str = "__remlu"; + break; + default: + gcc_unreachable (); + } + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx, + gen_rtx_SYMBOL_REF (DImode, str), + operands[4])); +} + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +(define_insn "*divmodsi_internal_er_1" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (use (match_operand:DI 4 "register_operand" "c")) + (use (match_operand 5 "const_int_operand")) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $23,($27),__%E3%j5" + [(set_attr "type" "jsr") + (set_attr "length" "4")]) + +(define_insn "*divmodsi_internal" + [(set (match_operand:DI 0 "register_operand" "=c") + (sign_extend:DI (match_operator:SI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")]))) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_ABI_OSF" + "%E3 %1,%2,%0" + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +(define_insn_and_split "*divmoddi_internal_er" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (match_dup 3)) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))])] +{ + const char *str; + switch (GET_CODE (operands[3])) + { + case DIV: + str = "__divq"; + break; + case UDIV: + str = "__divqu"; + break; + case MOD: + str = "__remq"; + break; + case UMOD: + str = "__remqu"; + break; + default: + gcc_unreachable (); + } + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx, + gen_rtx_SYMBOL_REF (DImode, str), + operands[4])); +} + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +(define_insn "*divmoddi_internal_er_1" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (use (match_operand:DI 4 "register_operand" "c")) + (use (match_operand 5 "const_int_operand")) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $23,($27),__%E3%j5" + [(set_attr "type" "jsr") + (set_attr "length" "4")]) + +(define_insn "*divmoddi_internal" + [(set (match_operand:DI 0 "register_operand" "=c") + (match_operator:DI 3 "divmod_operator" + [(match_operand:DI 1 "register_operand" "a") + (match_operand:DI 2 "register_operand" "b")])) + (clobber (reg:DI 23)) + (clobber (reg:DI 28))] + "TARGET_ABI_OSF" + "%E3 %1,%2,%0" + [(set_attr "type" "jsr") + (set_attr "length" "8")]) + +;; Next are the basic logical operations. We only expose the DImode operations +;; to the rtl expanders, but SImode versions exist for combine as well as for +;; the atomic operation splitters. + +(define_insn "*andsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ") + (match_operand:SI 2 "and_operand" "rI,N,MH")))] + "" + "@ + and %r1,%2,%0 + bic %r1,%N2,%0 + zapnot %r1,%m2,%0" + [(set_attr "type" "ilog,ilog,shift")]) + +(define_insn "anddi3" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ") + (match_operand:DI 2 "and_operand" "rI,N,MH")))] + "" + "@ + and %r1,%2,%0 + bic %r1,%N2,%0 + zapnot %r1,%m2,%0" + [(set_attr "type" "ilog,ilog,shift")]) + +;; There are times when we can split an AND into two AND insns. This occurs +;; when we can first clear any bytes and then clear anything else. For +;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07". +;; Only do this when running on 64-bit host since the computations are +;; too messy otherwise. + +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_int_operand")))] + "HOST_BITS_PER_WIDE_INT == 64 && ! and_operand (operands[2], DImode)" + [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))] +{ + unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]); + unsigned HOST_WIDE_INT mask2 = mask1; + int i; + + /* For each byte that isn't all zeros, make it all ones. */ + for (i = 0; i < 64; i += 8) + if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0) + mask1 |= (HOST_WIDE_INT) 0xff << i; + + /* Now turn on any bits we've just turned off. */ + mask2 |= ~ mask1; + + operands[3] = GEN_INT (mask1); + operands[4] = GEN_INT (mask2); +}) + +(define_insn "zero_extendqi2" + [(set (match_operand:I248MODE 0 "register_operand" "=r,r") + (zero_extend:I248MODE + (match_operand:QI 1 "reg_or_bwx_memory_operand" "r,m")))] + "" + "@ + and %1,0xff,%0 + ldbu %0,%1" + [(set_attr "type" "ilog,ild") + (set_attr "isa" "*,bwx")]) + +(define_insn "zero_extendhi2" + [(set (match_operand:I48MODE 0 "register_operand" "=r,r") + (zero_extend:I48MODE + (match_operand:HI 1 "reg_or_bwx_memory_operand" "r,m")))] + "" + "@ + zapnot %1,3,%0 + ldwu %0,%1" + [(set_attr "type" "shift,ild") + (set_attr "isa" "*,bwx")]) + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "" + "zapnot %1,15,%0" + [(set_attr "type" "shift")]) + +(define_insn "andnot3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (and:I48MODE + (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")) + (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))] + "" + "bic %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iorsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "or_operand" "rI,N")))] + "" + "@ + bis %r1,%2,%0 + ornot %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "or_operand" "rI,N")))] + "" + "@ + bis %r1,%2,%0 + ornot %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*one_cmplsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "one_cmpldi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornot3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (ior:I48MODE + (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")) + (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))] + "" + "ornot %r2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xorsi_internal" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:SI 2 "or_operand" "rI,N")))] + "" + "@ + xor %r1,%2,%0 + eqv %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ") + (match_operand:DI 2 "or_operand" "rI,N")))] + "" + "@ + xor %r1,%2,%0 + eqv %r1,%N2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornot3" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (not:I48MODE (xor:I48MODE + (match_operand:I48MODE 1 "register_operand" "%rJ") + (match_operand:I48MODE 2 "register_operand" "rI"))))] + "" + "eqv %r1,%2,%0" + [(set_attr "type" "ilog")]) + +;; Handle FFS and related insns iff we support CIX. + +(define_expand "ffsdi2" + [(set (match_dup 2) + (ctz:DI (match_operand:DI 1 "register_operand"))) + (set (match_dup 3) + (plus:DI (match_dup 2) (const_int 1))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (eq (match_dup 1) (const_int 0)) + (const_int 0) (match_dup 3)))] + "TARGET_CIX" +{ + operands[2] = gen_reg_rtx (DImode); + operands[3] = gen_reg_rtx (DImode); +}) + +(define_insn "clzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "ctlz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "cttz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "ctpop %1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand") + (bswap:SI (match_operand:SI 1 "register_operand")))] + "!optimize_size" +{ + rtx t0, t1; + + t0 = gen_reg_rtx (DImode); + t1 = gen_reg_rtx (DImode); + + emit_insn (gen_inslh (t0, gen_lowpart (DImode, operands[1]), GEN_INT (7))); + emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]), + GEN_INT (24))); + emit_insn (gen_iordi3 (t1, t0, t1)); + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16))); + emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x5))); + emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xa))); + emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0), + gen_lowpart (SImode, t1))); + DONE; +}) + +(define_expand "bswapdi2" + [(set (match_operand:DI 0 "register_operand") + (bswap:DI (match_operand:DI 1 "register_operand")))] + "!optimize_size" +{ + rtx t0, t1; + + t0 = gen_reg_rtx (DImode); + t1 = gen_reg_rtx (DImode); + + /* This method of shifting and masking is not specific to Alpha, but + is only profitable on Alpha because of our handy byte zap insn. */ + + emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32))); + emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32))); + emit_insn (gen_iordi3 (t1, t0, t1)); + + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16))); + emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16))); + emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xcc))); + emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x33))); + emit_insn (gen_iordi3 (t1, t0, t1)); + + emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8))); + emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8))); + emit_insn (gen_anddi3 (t0, t0, alpha_expand_zap_mask (0xaa))); + emit_insn (gen_anddi3 (t1, t1, alpha_expand_zap_mask (0x55))); + emit_insn (gen_iordi3 (operands[0], t0, t1)); + DONE; +}) + +;; Next come the shifts and the various extract and insert operations. + +(define_insn "ashldi3" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))] + "" +{ + switch (which_alternative) + { + case 0: + if (operands[2] == const1_rtx) + return "addq %r1,%r1,%0"; + else + return "s%P2addq %r1,0,%0"; + case 1: + return "sll %r1,%2,%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "iadd,shift")]) + +(define_insn "*ashldi_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "const_int_operand" "P")) + 0)))] + "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3" +{ + if (operands[2] == const1_rtx) + return "addl %r1,%r1,%0"; + else + return "s%P2addl %r1,0,%0"; +} + [(set_attr "type" "iadd")]) + +(define_insn "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "" + "srl %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_6bit_operand" "rS")))] + "" + "sra %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendqi2" + [(set (match_operand:I24MODE 0 "register_operand" "=r") + (sign_extend:I24MODE + (match_operand:QI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_expand "extendqidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:QI 1 "some_operand")))] + "" +{ + if (TARGET_BWX) + operands[1] = force_reg (QImode, operands[1]); + else + { + rtx x, t1, t2, i56; + + if (unaligned_memory_operand (operands[1], QImode)) + { + x = gen_unaligned_extendqidi (operands[0], XEXP (operands[1], 0)); + alpha_set_memflags (x, operands[1]); + emit_insn (x); + DONE; + } + + t1 = gen_reg_rtx (DImode); + t2 = gen_reg_rtx (DImode); + i56 = GEN_INT (56); + + x = gen_lowpart (DImode, force_reg (QImode, operands[1])); + emit_move_insn (t1, x); + emit_insn (gen_ashldi3 (t2, t1, i56)); + emit_insn (gen_ashrdi3 (operands[0], t2, i56)); + DONE; + } +}) + +(define_insn "*extendqidi2_bwx" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextb %1,%0" + [(set_attr "type" "shift")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextw %1,%0" + [(set_attr "type" "shift")]) + +(define_expand "extendhidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:HI 1 "some_operand")))] + "" +{ + if (TARGET_BWX) + operands[1] = force_reg (HImode, operands[1]); + else + { + rtx x, t1, t2, i48; + + if (unaligned_memory_operand (operands[1], HImode)) + { + x = gen_unaligned_extendhidi (operands[0], XEXP (operands[1], 0)); + alpha_set_memflags (x, operands[1]); + emit_insn (x); + DONE; + } + + t1 = gen_reg_rtx (DImode); + t2 = gen_reg_rtx (DImode); + i48 = GEN_INT (48); + + x = gen_lowpart (DImode, force_reg (HImode, operands[1])); + emit_move_insn (t1, x); + emit_insn (gen_ashldi3 (t2, t1, i48)); + emit_insn (gen_ashrdi3 (operands[0], t2, i48)); + DONE; + } +}) + +(define_insn "*extendhidi2_bwx" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "TARGET_BWX" + "sextw %1,%0" + [(set_attr "type" "shift")]) + +;; Here's how we sign extend an unaligned byte and halfword. Doing this +;; as a pattern saves one instruction. The code is similar to that for +;; the unaligned loads (see below). +;; +;; Operand 1 is the address, operand 0 is the result. + +(define_expand "unaligned_extendqidi" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (minus:DI (const_int 64) + (ashift:DI + (and:DI (match_dup 2) (const_int 7)) + (const_int 3))))) + (set (match_operand:QI 0 "register_operand") + (ashiftrt:DI (match_dup 4) (const_int 56)))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[2] = get_unaligned_offset (operands[1], 1); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_expand "unaligned_extendhidi" + [(set (match_dup 3) + (mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8)))) + (set (match_dup 4) + (ashift:DI (match_dup 3) + (minus:DI (const_int 64) + (ashift:DI + (and:DI (match_dup 2) (const_int 7)) + (const_int 3))))) + (set (match_operand:HI 0 "register_operand") + (ashiftrt:DI (match_dup 4) (const_int 48)))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[2] = get_unaligned_offset (operands[1], 2); + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); +}) + +(define_insn "*extxl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "mul8_operand" "I")))] + "" + "ext%M2l %r1,%s3,%0" + [(set_attr "type" "shift")]) + +(define_insn "extxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "mode_width_operand" "n") + (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "ext%M2l %r1,%3,%0" + [(set_attr "type" "shift")]) + +;; Combine has some strange notion of preserving existing undefined behavior +;; in shifts larger than a word size. So capture these patterns that it +;; should have turned into zero_extracts. + +(define_insn "*extxl_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))) + (match_operand:DI 3 "mode_mask_operand" "n")))] + "" + "ext%U3l %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "*extql_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "extql %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extqh" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (match_operand:DI 1 "reg_or_0_operand" "rJ") + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "" + "extqh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extwh" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 65535)) + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "" + "extwh %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "extlh" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI + (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 2147483647)) + (minus:DI (const_int 64) + (ashift:DI + (and:DI + (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 7)) + (const_int 3)))))] + "" + "extlh %r1,%2,%0" + [(set_attr "type" "shift")]) + +;; This converts an extXl into an extXh with an appropriate adjustment +;; to the address calculation. + +;;(define_split +;; [(set (match_operand:DI 0 "register_operand") +;; (ashift:DI (zero_extract:DI (match_operand:DI 1 "register_operand") +;; (match_operand:DI 2 "mode_width_operand") +;; (ashift:DI (match_operand:DI 3) +;; (const_int 3))) +;; (match_operand:DI 4 "const_int_operand"))) +;; (clobber (match_operand:DI 5 "register_operand"))] +;; "INTVAL (operands[4]) == 64 - INTVAL (operands[2])" +;; [(set (match_dup 5) (match_dup 6)) +;; (set (match_dup 0) +;; (ashift:DI (zero_extract:DI (match_dup 1) (match_dup 2) +;; (ashift:DI (plus:DI (match_dup 5) +;; (match_dup 7)) +;; (const_int 3))) +;; (match_dup 4)))] +;; " +;;{ +;; operands[6] = plus_constant (DImode, operands[3], +;; INTVAL (operands[2]) / BITS_PER_UNIT); +;; operands[7] = GEN_INT (- INTVAL (operands[2]) / BITS_PER_UNIT); +;;}") + +(define_insn "insl_const" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:I124MODE 1 "register_operand" "r")) + (match_operand:DI 2 "mul8_operand" "I")))] + "" + "insl %1,%s2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insl" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (zero_extend:DI + (match_operand:I124MODE 1 "register_operand" "r")) + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "insl %1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "insql" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI") + (const_int 3))))] + "" + "insql %1,%2,%0" + [(set_attr "type" "shift")]) + +;; Combine has this sometimes habit of moving the and outside of the +;; shift, making life more interesting. + +(define_insn "*insxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mul8_operand" "I")) + (match_operand:DI 3 "immediate_operand" "i")))] + "HOST_BITS_PER_WIDE_INT == 64 + && CONST_INT_P (operands[3]) + && (((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])))" +{ +#if HOST_BITS_PER_WIDE_INT == 64 + if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "insbl %1,%s2,%0"; + if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "inswl %1,%s2,%0"; + if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2]) + == (unsigned HOST_WIDE_INT) INTVAL (operands[3])) + return "insll %1,%s2,%0"; +#endif + gcc_unreachable (); +} + [(set_attr "type" "shift")]) + +;; We do not include the insXh insns because they are complex to express +;; and it does not appear that we would ever want to generate them. +;; +;; Since we need them for block moves, though, cop out and use unspec. + +(define_insn "insxh" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")] + UNSPEC_INSXH))] + "" + "ins%M2h %1,%3,%0" + [(set_attr "type" "shift")]) + +(define_insn "mskxl" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (not:DI (ashift:DI + (match_operand:DI 2 "mode_mask_operand" "n") + (ashift:DI + (match_operand:DI 3 "reg_or_8bit_operand" "rI") + (const_int 3)))) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "" + "msk%U2l %r1,%3,%0" + [(set_attr "type" "shift")]) + +;; We do not include the mskXh insns because it does not appear we would +;; ever generate one. +;; +;; Again, we do for block moves and we use unspec again. + +(define_insn "mskxh" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "mode_width_operand" "n") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")] + UNSPEC_MSKXH))] + "" + "msk%M2h %1,%3,%0" + [(set_attr "type" "shift")]) + +;; Prefer AND + NE over LSHIFTRT + AND. + +(define_insn_and_split "*ze_and_ne" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (match_operand 2 "const_int_operand" "I")))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + "#" + "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 3))) + (set (match_dup 0) + (ne:DI (match_dup 0) (const_int 0)))] + "operands[3] = GEN_INT (1 << INTVAL (operands[2]));") + +;; Floating-point operations. All the double-precision insns can extend +;; from single, so indicate that. The exception are the ones that simply +;; play with the sign bits; it's not clear what to do there. + +(define_mode_iterator FMODE [SF DF]) + +(define_mode_attr opmode [(SF "si") (DF "di")]) + +(define_insn "abs2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cpys $f31,%R1,%0" + [(set_attr "type" "fcpys")]) + +(define_insn "*nabs2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (neg:FMODE + (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG"))))] + "TARGET_FP" + "cpysn $f31,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_expand "abstf2" + [(parallel [(set (match_operand:TF 0 "register_operand") + (abs:TF (match_operand:TF 1 "reg_or_0_operand"))) + (use (match_dup 2))])] + "TARGET_HAS_XFLOATING_LIBS" +{ +#if HOST_BITS_PER_WIDE_INT >= 64 + operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63)); +#else + operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode)); +#endif +}) + +(define_insn_and_split "*abstf_internal" + [(set (match_operand:TF 0 "register_operand" "=r") + (abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG"))) + (use (match_operand:DI 2 "register_operand" "r"))] + "TARGET_HAS_XFLOATING_LIBS" + "#" + "&& reload_completed" + [(const_int 0)] + "alpha_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;") + +(define_insn "neg2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (neg:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cpysn %R1,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_expand "negtf2" + [(parallel [(set (match_operand:TF 0 "register_operand") + (neg:TF (match_operand:TF 1 "reg_or_0_operand"))) + (use (match_dup 2))])] + "TARGET_HAS_XFLOATING_LIBS" +{ +#if HOST_BITS_PER_WIDE_INT >= 64 + operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63)); +#else + operands[2] = force_reg (DImode, immed_double_const (0, 0x80000000, DImode)); +#endif +}) + +(define_insn_and_split "*negtf_internal" + [(set (match_operand:TF 0 "register_operand" "=r") + (neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG"))) + (use (match_operand:DI 2 "register_operand" "r"))] + "TARGET_HAS_XFLOATING_LIBS" + "#" + "&& reload_completed" + [(const_int 0)] + "alpha_split_tfmode_frobsign (operands, gen_xordi3); DONE;") + +(define_insn "copysign3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN))] + "TARGET_FP" + "cpys %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*ncopysign3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (neg:FMODE + (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")] + UNSPEC_COPYSIGN)))] + "TARGET_FP" + "cpysn %R2,%R1,%0" + [(set_attr "type" "fadd")]) + +(define_insn "*add3_ieee" + [(set (match_operand:FMODE 0 "register_operand" "=&f") + (plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "add%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "add3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "add%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "add%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*adddf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (plus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "add%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "addtf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (PLUS, operands); DONE;") + +(define_insn "*sub3_ieee" + [(set (match_operand:FMODE 0 "register_operand" "=&f") + (minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "sub%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "sub3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "sub%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*subdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=f") + (minus:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "sub%-%/ %R1,%R2,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "subtf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (MINUS, operands); DONE;") + +(define_insn "*mul3_ieee" + [(set (match_operand:FMODE 0 "register_operand" "=&f") + (mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "mul%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "mul3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "mul%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*muldf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "mul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*muldf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (mult:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "%fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "mul%-%/ %R1,%R2,%0" + [(set_attr "type" "fmul") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "multf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (MULT, operands); DONE;") + +(define_insn "*div3_ieee" + [(set (match_operand:FMODE 0 "register_operand" "=&f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "div%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "div3" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG") + (match_operand:FMODE 2 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "div%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG")) + (match_operand:DF 2 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (match_operand:DF 1 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "*divdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=f") + (div:DF (float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG"))))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "div%-%/ %R1,%R2,%0" + [(set_attr "type" "fdiv") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "divtf3" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand")) + (use (match_operand:TF 2 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_arith (DIV, operands); DONE;") + +(define_insn "*sqrt2_ieee" + [(set (match_operand:FMODE 0 "register_operand" "=&f") + (sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && TARGET_FIX && alpha_fptm >= ALPHA_FPTM_SU" + "sqrt%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "sqrt2" + [(set (match_operand:FMODE 0 "register_operand" "=f") + (sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && TARGET_FIX" + "sqrt%/ %R1,%0" + [(set_attr "type" "fsqrt") + (set_attr "opsize" "") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +;; Define conversion operators between DFmode and SImode, using the cvtql +;; instruction. To allow combine et al to do useful things, we keep the +;; operation as a unit until after reload, at which point we split the +;; instructions. +;; +;; Note that we (attempt to) only consider this optimization when the +;; ultimate destination is memory. If we will be doing further integer +;; processing, it is cheaper to do the truncation in the int regs. + +(define_insn "*cvtql" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")] + UNSPEC_CVTQL))] + "TARGET_FP" + "cvtql%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "v_sv")]) + +(define_insn_and_split "*fix_truncdfsi_ieee" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] +{ + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn_and_split "*fix_truncdfsi_internal" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 3 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0)) + (clobber (match_scratch:DI 2 "=f"))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 3 [(match_dup 1)])) + (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 4))] +{ + operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2])); + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*fix_truncdfdi_ieee" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f") + (match_operator:DI 2 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_insn "*fix_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f") + (match_operator:DI 2 "fix_operator" + [(match_operand:DF 1 "reg_or_0_operand" "fG")]))] + "TARGET_FP" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_expand "fix_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (fix:DI (match_operand:DF 1 "reg_or_0_operand")))] + "TARGET_FP") + +(define_expand "fixuns_truncdfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand")))] + "TARGET_FP") + +;; Likewise between SFmode and SImode. + +(define_insn_and_split "*fix_truncsfsi_ieee" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 4 "fix_operator" + [(float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0)) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))])) + (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 3))] + "operands[5] = adjust_address (operands[0], SFmode, 0);" + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn_and_split "*fix_truncsfsi_internal" + [(set (match_operand:SI 0 "memory_operand" "=m") + (subreg:SI + (match_operator:DI 3 "fix_operator" + [(float_extend:DF + (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0)) + (clobber (match_scratch:DI 2 "=f"))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_op_dup 3 [(float_extend:DF (match_dup 1))])) + (set (match_dup 4) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL)) + (set (match_dup 5) (match_dup 4))] +{ + operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2])); + operands[5] = adjust_address (operands[0], SFmode, 0); +} + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*fix_truncsfdi_ieee" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f") + (match_operator:DI 2 "fix_operator" + [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_insn "*fix_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f") + (match_operator:DI 2 "fix_operator" + [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))]))] + "TARGET_FP" + "cvt%-q%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "c") + (set_attr "trap_suffix" "v_sv_svi")]) + +(define_expand "fix_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))] + "TARGET_FP") + +(define_expand "fixuns_truncsfdi2" + [(set (match_operand:DI 0 "reg_no_subreg_operand") + (unsigned_fix:DI + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))] + "TARGET_FP") + +(define_expand "fix_trunctfdi2" + [(use (match_operand:DI 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FIX, operands); DONE;") + +(define_expand "fixuns_trunctfdi2" + [(use (match_operand:DI 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;") + +(define_insn "*floatdisf_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvtq%,%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP" + "cvtq%,%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn_and_split "*floatsisf2_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float:SF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:SF (match_dup 2)))] + "operands[1] = adjust_address (operands[1], SFmode, 0);") + +(define_insn_and_split "*floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float:SF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_FP" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 0)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:SF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[0])); +}) + +(define_insn "*floatdidf_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvtq%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f")))] + "TARGET_FP" + "cvtq%-%/ %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "sui")]) + +(define_insn_and_split "*floatsidf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float:DF (match_operand:SI 1 "memory_operand" "m"))) + (clobber (match_scratch:DI 2 "=&f")) + (clobber (match_scratch:SF 3 "=&f"))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:DF (match_dup 2)))] + "operands[1] = adjust_address (operands[1], SFmode, 0);") + +(define_insn_and_split "*floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (float:DF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_FP" + "#" + "&& reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ)) + (set (match_dup 0) (float:DF (match_dup 2)))] +{ + operands[1] = adjust_address (operands[1], SFmode, 0); + operands[2] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0])); +}) + +(define_expand "floatditf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:DI 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FLOAT, operands); DONE;") + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand")) + (use (match_operand:DI 1 "register_operand"))] + "TARGET_FP" + "alpha_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand")) + (use (match_operand:DI 1 "register_operand"))] + "TARGET_FP" + "alpha_emit_floatuns (operands); DONE;") + +(define_expand "floatunsditf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:DI 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_FP" +{ + if (alpha_fptm >= ALPHA_FPTM_SU) + operands[1] = force_reg (SFmode, operands[1]); +}) + +;; The Unicos/Mk assembler doesn't support cvtst, but we've already +;; asserted that alpha_fptm == ALPHA_FPTM_N. + +(define_insn "*extendsfdf2_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (float_extend:DF (match_operand:SF 1 "register_operand" "f")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvtsts %1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes")]) + +(define_insn "*extendsfdf2_internal" + [(set (match_operand:DF 0 "register_operand" "=f,f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + cpys %1,%1,%0 + ld%, %0,%1 + st%- %1,%0" + [(set_attr "type" "fcpys,fld,fst")]) + +;; Use register_operand for operand 1 to prevent compress_float_constant +;; from doing something silly. When optimizing we'll put things back +;; together anyway. +(define_expand "extendsftf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:SF 1 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" +{ + rtx tmp = gen_reg_rtx (DFmode); + emit_insn (gen_extendsfdf2 (tmp, operands[1])); + emit_insn (gen_extenddftf2 (operands[0], tmp)); + DONE; +}) + +(define_expand "extenddftf2" + [(use (match_operand:TF 0 "register_operand")) + (use (match_operand:DF 1 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;") + +(define_insn "*truncdfsf2_ieee" + [(set (match_operand:SF 0 "register_operand" "=&f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cvt%-%,%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG")))] + "TARGET_FP" + "cvt%-%,%/ %R1,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "round_suffix" "normal") + (set_attr "trap_suffix" "u_su_sui")]) + +(define_expand "trunctfdf2" + [(use (match_operand:DF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;") + +(define_expand "trunctfsf2" + [(use (match_operand:SF 0 "register_operand")) + (use (match_operand:TF 1 "general_operand"))] + "TARGET_FP && TARGET_HAS_XFLOATING_LIBS" +{ + rtx tmpf, sticky, arg, lo, hi; + + tmpf = gen_reg_rtx (DFmode); + sticky = gen_reg_rtx (DImode); + arg = copy_to_mode_reg (TFmode, operands[1]); + lo = gen_lowpart (DImode, arg); + hi = gen_highpart (DImode, arg); + + /* Convert the low word of the TFmode value into a sticky rounding bit, + then or it into the low bit of the high word. This leaves the sticky + bit at bit 48 of the fraction, which is representable in DFmode, + which prevents rounding error in the final conversion to SFmode. */ + + emit_insn (gen_rtx_SET (VOIDmode, sticky, + gen_rtx_NE (DImode, lo, const0_rtx))); + emit_insn (gen_iordi3 (hi, hi, sticky)); + emit_insn (gen_trunctfdf2 (tmpf, arg)); + emit_insn (gen_truncdfsf2 (operands[0], tmpf)); + DONE; +}) + +;; Next are all the integer comparisons, and conditional moves and branches +;; and some of the related define_expand's and define_split's. + +(define_insn "*setcc_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "alpha_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmp%C1 %2,%3,%0" + [(set_attr "type" "icmp")]) + +;; Yes, we can technically support reg_or_8bit_operand in operand 2, +;; but that's non-canonical rtl and allowing that causes inefficiencies +;; from cse on. +(define_insn "*setcc_swapped_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "alpha_swapped_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "reg_or_0_operand" "rJ")]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmp%c1 %r3,%2,%0" + [(set_attr "type" "icmp")]) + +;; Use match_operator rather than ne directly so that we can match +;; multiple integer modes. +(define_insn "*setne_internal" + [(set (match_operand 0 "register_operand" "=r") + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (const_int 0)]))] + "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8 + && GET_CODE (operands[1]) == NE + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "cmpult $31,%2,%0" + [(set_attr "type" "icmp")]) + +;; The mode folding trick can't be used with const_int operands, since +;; reload needs to know the proper mode. +;; +;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand +;; in order to create more pairs of constants. As long as we're allowing +;; two constants at the same time, and will have to reload one of them... + +(define_insn "*movcc_internal" + [(set (match_operand:IMODE 0 "register_operand" "=r,r,r,r") + (if_then_else:IMODE + (match_operator 2 "signed_comparison_operator" + [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J") + (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")]) + (match_operand:IMODE 1 "add_operand" "rI,0,rI,0") + (match_operand:IMODE 5 "add_operand" "0,rI,0,rI")))] + "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)" + "@ + cmov%C2 %r3,%1,%0 + cmov%D2 %r3,%5,%0 + cmov%c2 %r4,%1,%0 + cmov%d2 %r4,%5,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movcc_lbc" + [(set (match_operand:IMODE 0 "register_operand" "=r,r") + (if_then_else:IMODE + (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0") + (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbc %r2,%1,%0 + cmovlbs %r2,%3,%0" + [(set_attr "type" "icmov")]) + +(define_insn "*movcc_lbs" + [(set (match_operand:IMODE 0 "register_operand" "=r,r") + (if_then_else:IMODE + (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0") + (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))] + "" + "@ + cmovlbs %r2,%1,%0 + cmovlbc %r2,%3,%0" + [(set_attr "type" "icmov")]) + +;; For ABS, we have two choices, depending on whether the input and output +;; registers are the same or not. +(define_expand "absdi2" + [(set (match_operand:DI 0 "register_operand") + (abs:DI (match_operand:DI 1 "register_operand")))] + "" +{ + if (rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode))); + else + emit_insn (gen_absdi2_diff (operands[0], operands[1])); + DONE; +}) + +(define_expand "absdi2_same" + [(set (match_operand:DI 1 "register_operand") + (neg:DI (match_operand:DI 0 "register_operand"))) + (set (match_dup 0) + (if_then_else:DI (ge (match_dup 0) (const_int 0)) + (match_dup 0) + (match_dup 1)))]) + +(define_expand "absdi2_diff" + [(set (match_operand:DI 0 "register_operand") + (neg:DI (match_operand:DI 1 "register_operand"))) + (set (match_dup 0) + (if_then_else:DI (lt (match_dup 1) (const_int 0)) + (match_dup 0) + (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (abs:DI (match_dup 0))) + (clobber (match_operand:DI 1 "register_operand"))] + "" + [(set (match_dup 1) (neg:DI (match_dup 0))) + (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (abs:DI (match_operand:DI 1 "register_operand")))] + "! rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 0) (neg:DI (match_dup 1))) + (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (neg:DI (abs:DI (match_dup 0)))) + (clobber (match_operand:DI 1 "register_operand"))] + "" + [(set (match_dup 1) (neg:DI (match_dup 0))) + (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (neg:DI (abs:DI (match_operand:DI 1 "register_operand"))))] + "! rtx_equal_p (operands[0], operands[1])" + [(set (match_dup 0) (neg:DI (match_dup 1))) + (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0)) + (match_dup 0) (match_dup 1)))]) + +(define_insn "3" + [(set (match_operand:I12MODE 0 "register_operand" "=r") + (any_maxmin:I12MODE + (match_operand:I12MODE 1 "reg_or_0_operand" "%rJ") + (match_operand:I12MODE 2 "reg_or_8bit_operand" "rI")))] + "TARGET_MAX" + " %r1,%2,%0" + [(set_attr "type" "mvi")]) + +(define_expand "smaxdi3" + [(set (match_dup 3) + (le:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (smax:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_insn "*smax_const0" + [(set (match_operand:DI 0 "register_operand" "=r") + (smax:DI (match_operand:DI 1 "register_operand" "0") + (const_int 0)))] + "" + "cmovlt %0,0,%0" + [(set_attr "type" "icmov")]) + +(define_expand "smindi3" + [(set (match_dup 3) + (lt:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (smin:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_insn "*smin_const0" + [(set (match_operand:DI 0 "register_operand" "=r") + (smin:DI (match_operand:DI 1 "register_operand" "0") + (const_int 0)))] + "" + "cmovgt %0,0,%0" + [(set_attr "type" "icmov")]) + +(define_expand "umaxdi3" + [(set (match_dup 3) + (leu:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (umax:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_expand "umindi3" + [(set (match_dup 3) + (ltu:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (set (match_operand:DI 0 "register_operand") + (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))] + "" + "operands[3] = gen_reg_rtx (DImode);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (umin:DI (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] + "operands[2] != const0_rtx" + [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0)) + (match_dup 1) (match_dup 2)))]) + +(define_insn "*bcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (label_ref (match_operand 0)) + (pc)))] + "" + "b%C1 %r2,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*bcc_reverse" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "register_operand" "r") + (const_int 0)]) + + (pc) + (label_ref (match_operand 0))))] + "" + "b%c1 %2,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*blbs_normal" + [(set (pc) + (if_then_else + (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "" + "blbs %r1,%0" + [(set_attr "type" "ibr")]) + +(define_insn "*blbc_normal" + [(set (pc) + (if_then_else + (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") + (const_int 1) + (const_int 0)) + (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "" + "blbc %r1,%0" + [(set_attr "type" "ibr")]) + +(define_split + [(parallel + [(set (pc) + (if_then_else + (match_operator 1 "comparison_operator" + [(zero_extract:DI (match_operand:DI 2 "register_operand") + (const_int 1) + (match_operand:DI 3 "const_int_operand")) + (const_int 0)]) + (label_ref (match_operand 0)) + (pc))) + (clobber (match_operand:DI 4 "register_operand"))])] + "INTVAL (operands[3]) != 0" + [(set (match_dup 4) + (lshiftrt:DI (match_dup 2) (match_dup 3))) + (set (pc) + (if_then_else (match_op_dup 1 + [(zero_extract:DI (match_dup 4) + (const_int 1) + (const_int 0)) + (const_int 0)]) + (label_ref (match_dup 0)) + (pc)))] + ) + +;; The following are the corresponding floating-point insns. Recall +;; we need to have variants that expand the arguments from SFmode +;; to DFmode. + +(define_insn "*cmpdf_ieee" + [(set (match_operand:DF 0 "register_operand" "=&f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP && alpha_fptm >= ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_internal" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext1" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "reg_or_0_operand" "fG")]))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext2" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*cmpdf_ext3" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 1 "alpha_fp_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (float_extend:DF + (match_operand:SF 3 "reg_or_0_operand" "fG"))]))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "cmp%-%C1%/ %R2,%R3,%0" + [(set_attr "type" "fadd") + (set_attr "trap" "yes") + (set_attr "trap_suffix" "su")]) + +(define_insn "*movcc_internal" + [(set (match_operand:FMODE 0 "register_operand" "=f,f") + (if_then_else:FMODE + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:FMODE 1 "reg_or_0_operand" "fG,0") + (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext1" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(match_operand:DF 4 "reg_or_0_operand" "fG,fG") + (match_operand:DF 2 "const0_operand" "G,G")]) + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0")) + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext2" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:DF 1 "reg_or_0_operand" "fG,0") + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext3" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (if_then_else:SF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (match_operand:SF 1 "reg_or_0_operand" "fG,0") + (match_operand:SF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_insn "*movdfcc_ext4" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (if_then_else:DF + (match_operator 3 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 4 "reg_or_0_operand" "fG,fG")) + (match_operand:DF 2 "const0_operand" "G,G")]) + (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0")) + (match_operand:DF 5 "reg_or_0_operand" "0,fG")))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" + "@ + fcmov%C3 %R4,%R1,%0 + fcmov%D3 %R4,%R5,%0" + [(set_attr "type" "fcmov")]) + +(define_expand "smaxdf3" + [(set (match_dup 3) + (le:DF (match_operand:DF 1 "reg_or_0_operand") + (match_operand:DF 2 "reg_or_0_operand"))) + (set (match_operand:DF 0 "register_operand") + (if_then_else:DF (eq (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "smindf3" + [(set (match_dup 3) + (lt:DF (match_operand:DF 1 "reg_or_0_operand") + (match_operand:DF 2 "reg_or_0_operand"))) + (set (match_operand:DF 0 "register_operand") + (if_then_else:DF (ne (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "smaxsf3" + [(set (match_dup 3) + (le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand")))) + (set (match_operand:SF 0 "register_operand") + (if_then_else:SF (eq (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_expand "sminsf3" + [(set (match_dup 3) + (lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand")) + (float_extend:DF (match_operand:SF 2 "reg_or_0_operand")))) + (set (match_operand:SF 0 "register_operand") + (if_then_else:SF (ne (match_dup 3) (match_dup 4)) + (match_dup 1) (match_dup 2)))] + "TARGET_FP && alpha_fptm < ALPHA_FPTM_SU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = CONST0_RTX (DFmode); +}) + +(define_insn "*fbcc_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(match_operand:DF 2 "reg_or_0_operand" "fG") + (match_operand:DF 3 "const0_operand" "G")]) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_FP" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +(define_insn "*fbcc_ext_normal" + [(set (pc) + (if_then_else + (match_operator 1 "signed_comparison_operator" + [(float_extend:DF + (match_operand:SF 2 "reg_or_0_operand" "fG")) + (match_operand:DF 3 "const0_operand" "G")]) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_FP" + "fb%C1 %R2,%0" + [(set_attr "type" "fbr")]) + +;; These are the main define_expand's used to make conditional branches +;; and compares. + +(define_expand "cbranchdf4" + [(use (match_operator 0 "alpha_cbranch_operator" + [(match_operand:DF 1 "reg_or_0_operand") + (match_operand:DF 2 "reg_or_0_operand")])) + (use (match_operand 3))] + "TARGET_FP" + "alpha_emit_conditional_branch (operands, DFmode); DONE;") + +(define_expand "cbranchtf4" + [(use (match_operator 0 "alpha_cbranch_operator" + [(match_operand:TF 1 "general_operand") + (match_operand:TF 2 "general_operand")])) + (use (match_operand 3))] + "TARGET_HAS_XFLOATING_LIBS" + "alpha_emit_conditional_branch (operands, TFmode); DONE;") + +(define_expand "cbranchdi4" + [(use (match_operator 0 "alpha_cbranch_operator" + [(match_operand:DI 1 "some_operand") + (match_operand:DI 2 "some_operand")])) + (use (match_operand 3))] + "" + "alpha_emit_conditional_branch (operands, DImode); DONE;") + +(define_expand "cstoredf4" + [(use (match_operator:DI 1 "alpha_cbranch_operator" + [(match_operand:DF 2 "reg_or_0_operand") + (match_operand:DF 3 "reg_or_0_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "TARGET_FP" +{ + if (alpha_emit_setcc (operands, DFmode)) + DONE; + else + FAIL; +}) + +(define_expand "cstoretf4" + [(use (match_operator:DI 1 "alpha_cbranch_operator" + [(match_operand:TF 2 "general_operand") + (match_operand:TF 3 "general_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "TARGET_HAS_XFLOATING_LIBS" +{ + if (alpha_emit_setcc (operands, TFmode)) + DONE; + else + FAIL; +}) + +(define_expand "cstoredi4" + [(use (match_operator:DI 1 "alpha_cbranch_operator" + [(match_operand:DI 2 "some_operand") + (match_operand:DI 3 "some_operand")])) + (clobber (match_operand:DI 0 "register_operand"))] + "" +{ + if (alpha_emit_setcc (operands, DImode)) + DONE; + else + FAIL; +}) + +;; These are the main define_expand's used to make conditional moves. + +(define_expand "movcc" + [(set (match_operand:I48MODE 0 "register_operand") + (if_then_else:I48MODE + (match_operand 1 "comparison_operator") + (match_operand:I48MODE 2 "reg_or_8bit_operand") + (match_operand:I48MODE 3 "reg_or_8bit_operand")))] + "" +{ + operands[1] = alpha_emit_conditional_move (operands[1], mode); + if (operands[1] == 0) + FAIL; +}) + +(define_expand "movcc" + [(set (match_operand:FMODE 0 "register_operand") + (if_then_else:FMODE + (match_operand 1 "comparison_operator") + (match_operand:FMODE 2 "reg_or_8bit_operand") + (match_operand:FMODE 3 "reg_or_8bit_operand")))] + "" +{ + operands[1] = alpha_emit_conditional_move (operands[1], mode); + if (operands[1] == 0) + FAIL; +}) + +;; These define_split definitions are used in cases when comparisons have +;; not be stated in the correct way and we need to reverse the second +;; comparison. For example, x >= 7 has to be done as x < 6 with the +;; comparison that tests the result being reversed. We have one define_split +;; for each use of a comparison. They do not match valid insns and need +;; not generate valid insns. +;; +;; We can also handle equality comparisons (and inequality comparisons in +;; cases where the resulting add cannot overflow) by doing an add followed by +;; a comparison with zero. This is faster since the addition takes one +;; less cycle than a compare when feeding into a conditional move. +;; For this case, we also have an SImode pattern since we can merge the add +;; and sign extend and the order doesn't matter. +;; +;; We do not do this for floating-point, since it isn't clear how the "wrong" +;; operation could have been generated. + +(define_split + [(set (match_operand:DI 0 "register_operand") + (if_then_else:DI + (match_operator 1 "comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand") + (match_operand:DI 3 "reg_or_cint_operand")]) + (match_operand:DI 4 "reg_or_cint_operand") + (match_operand:DI 5 "reg_or_cint_operand"))) + (clobber (match_operand:DI 6 "register_operand"))] + "operands[3] != const0_rtx" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))] +{ + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU); + + /* If we are comparing for equality with a constant and that constant + appears in the arm when the register equals the constant, use the + register since that is more likely to match (and to produce better code + if both would). */ + + if (code == EQ && CONST_INT_P (operands[3]) + && rtx_equal_p (operands[4], operands[3])) + operands[4] = operands[2]; + + else if (code == NE && CONST_INT_P (operands[3]) + && rtx_equal_p (operands[5], operands[3])) + operands[5] = operands[2]; + + if (code == NE || code == EQ + || (extended_count (operands[2], DImode, unsignedp) >= 1 + && extended_count (operands[3], DImode, unsignedp) >= 1)) + { + if (CONST_INT_P (operands[3])) + operands[7] = gen_rtx_PLUS (DImode, operands[2], + GEN_INT (- INTVAL (operands[3]))); + else + operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]); + + operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx); + } + + else if (code == EQ || code == LE || code == LT + || code == LEU || code == LTU) + { + operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]); + operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx); + } + else + { + operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode, + operands[2], operands[3]); + operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx); + } +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (if_then_else:DI + (match_operator 1 "comparison_operator" + [(match_operand:SI 2 "reg_or_0_operand") + (match_operand:SI 3 "reg_or_cint_operand")]) + (match_operand:DI 4 "reg_or_8bit_operand") + (match_operand:DI 5 "reg_or_8bit_operand"))) + (clobber (match_operand:DI 6 "register_operand"))] + "operands[3] != const0_rtx + && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)" + [(set (match_dup 6) (match_dup 7)) + (set (match_dup 0) + (if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))] +{ + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU); + rtx tem; + + if ((code != NE && code != EQ + && ! (extended_count (operands[2], DImode, unsignedp) >= 1 + && extended_count (operands[3], DImode, unsignedp) >= 1))) + FAIL; + + if (CONST_INT_P (operands[3])) + tem = gen_rtx_PLUS (SImode, operands[2], + GEN_INT (- INTVAL (operands[3]))); + else + tem = gen_rtx_MINUS (SImode, operands[2], operands[3]); + + operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem); + operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode, + operands[6], const0_rtx); +}) + +;; Prefer to use cmp and arithmetic when possible instead of a cmove. + +(define_split + [(set (match_operand 0 "register_operand") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand") + (const_int 0)]) + (match_operand 3 "const_int_operand") + (match_operand 4 "const_int_operand")))] + "" + [(const_int 0)] +{ + if (alpha_split_conditional_move (GET_CODE (operands[1]), operands[0], + operands[2], operands[3], operands[4])) + DONE; + else + FAIL; +}) + +;; ??? Why combine is allowed to create such non-canonical rtl, I don't know. +;; Oh well, we match it in movcc, so it must be partially our fault. +(define_split + [(set (match_operand 0 "register_operand") + (if_then_else (match_operator 1 "signed_comparison_operator" + [(const_int 0) + (match_operand:DI 2 "reg_or_0_operand")]) + (match_operand 3 "const_int_operand") + (match_operand 4 "const_int_operand")))] + "" + [(const_int 0)] +{ + if (alpha_split_conditional_move (swap_condition (GET_CODE (operands[1])), + operands[0], operands[2], operands[3], + operands[4])) + DONE; + else + FAIL; +}) + +(define_insn_and_split "*cmp_sadd_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (if_then_else:DI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}) + +(define_insn_and_split "*cmp_sadd_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = gen_lowpart (DImode, operands[0]); + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_sadd_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (plus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "sext_add_operand" "rIO")))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4))))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_ssub_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (if_then_else:DI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:DI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:DI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:DI (mult:DI (match_dup 5) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; +}) + +(define_insn_and_split "*cmp_ssub_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI"))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4)))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = gen_lowpart (DImode, operands[0]); + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +(define_insn_and_split "*cmp_ssub_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI (if_then_else:SI + (match_operator 1 "alpha_zero_comparison_operator" + [(match_operand:DI 2 "reg_or_0_operand" "rJ") + (const_int 0)]) + (match_operand:SI 3 "const48_operand" "I") + (const_int 0)) + (match_operand:SI 4 "reg_or_8bit_operand" "rI")))) + (clobber (match_scratch:DI 5 "=r"))] + "" + "#" + "" + [(set (match_dup 5) + (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) + (set (match_dup 0) + (sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (match_dup 4))))] +{ + if (can_create_pseudo_p ()) + operands[5] = gen_reg_rtx (DImode); + else if (reg_overlap_mentioned_p (operands[5], operands[4])) + operands[5] = operands[0]; + + operands[6] = gen_lowpart (SImode, operands[5]); +}) + +;; Here are the CALL and unconditional branch insns. Calls on NT and OSF +;; work differently, so we have different patterns for each. + +(define_expand "call" + [(use (match_operand:DI 0)) + (use (match_operand 1)) + (use (match_operand 2)) + (use (match_operand 3))] + "" +{ + if (TARGET_ABI_OPEN_VMS) + emit_call_insn (gen_call_vms (operands[0], operands[2])); + else + emit_call_insn (gen_call_osf (operands[0], operands[1])); + DONE; +}) + +(define_expand "sibcall" + [(parallel [(call (mem:DI (match_operand 0)) + (match_operand 1)) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])] + "TARGET_ABI_OSF" +{ + gcc_assert (MEM_P (operands[0])); + operands[0] = XEXP (operands[0], 0); +}) + +(define_expand "call_osf" + [(parallel [(call (mem:DI (match_operand 0)) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + operands[0] = XEXP (operands[0], 0); + if (! call_operand (operands[0], Pmode)) + operands[0] = copy_to_mode_reg (Pmode, operands[0]); +}) + +;; +;; call openvms/alpha +;; op 0: symbol ref for called function +;; op 1: next_arg_reg (argument information value for R25) +;; +(define_expand "call_vms" + [(parallel [(call (mem:DI (match_operand 0)) + (match_operand 1)) + (use (match_dup 2)) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + + operands[0] = XEXP (operands[0], 0); + + /* Always load AI with argument information, then handle symbolic and + indirect call differently. Load RA and set operands[2] to PV in + both cases. */ + + emit_move_insn (gen_rtx_REG (DImode, 25), operands[1]); + if (GET_CODE (operands[0]) == SYMBOL_REF) + { + operands[2] = const0_rtx; + } + else + { + emit_move_insn (gen_rtx_REG (Pmode, 26), + gen_rtx_MEM (Pmode, plus_constant (Pmode, + operands[0], 8))); + operands[2] = operands[0]; + } +}) + +(define_expand "call_value" + [(use (match_operand 0)) + (use (match_operand:DI 1)) + (use (match_operand 2)) + (use (match_operand 3)) + (use (match_operand 4))] + "" +{ + if (TARGET_ABI_OPEN_VMS) + emit_call_insn (gen_call_value_vms (operands[0], operands[1], + operands[3])); + else + emit_call_insn (gen_call_value_osf (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand 1)) + (match_operand 2))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])] + "TARGET_ABI_OSF" +{ + gcc_assert (MEM_P (operands[1])); + operands[1] = XEXP (operands[1], 0); +}) + +(define_expand "call_value_osf" + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand 1)) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + if (! call_operand (operands[1], Pmode)) + operands[1] = copy_to_mode_reg (Pmode, operands[1]); +}) + +(define_expand "call_value_vms" + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1)) + (match_operand 2))) + (use (match_dup 3)) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + + operands[1] = XEXP (operands[1], 0); + + /* Always load AI with argument information, then handle symbolic and + indirect call differently. Load RA and set operands[3] to PV in + both cases. */ + + emit_move_insn (gen_rtx_REG (DImode, 25), operands[2]); + if (GET_CODE (operands[1]) == SYMBOL_REF) + { + operands[3] = const0_rtx; + } + else + { + emit_move_insn (gen_rtx_REG (Pmode, 26), + gen_rtx_MEM (Pmode, plus_constant (Pmode, + operands[1], 8))); + operands[3] = operands[1]; + } +}) + +(define_insn "*call_osf_1_er_noreturn" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,%0\t\t!samegp + ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1_er" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,(%0),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%* + bsr $26,%0\t\t!samegp + ldq $27,%0($29)\t\t!literal!%#\;jsr $26,($27),%0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. Consider the case of { bar(); while (1); }. +(define_peephole2 + [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[0], Pmode) + && (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(call (mem:DI (match_dup 2)) + (match_dup 1)) + (use (reg:DI 29)) + (use (match_dup 0)) + (use (match_dup 3)) + (clobber (reg:DI 26))])] +{ + if (CONSTANT_P (operands[0])) + { + operands[2] = gen_rtx_REG (Pmode, 27); + operands[3] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx, + operands[0], operands[3])); + } + else + { + operands[2] = operands[0]; + operands[0] = const0_rtx; + operands[3] = const0_rtx; + } +}) + +(define_peephole2 + [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[0], Pmode) + && ! (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(call (mem:DI (match_dup 2)) + (match_dup 1)) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1)) + (use (match_dup 0)) + (use (match_dup 4)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))] +{ + if (CONSTANT_P (operands[0])) + { + operands[2] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx, + operands[0], operands[4])); + } + else + { + operands[2] = operands[0]; + operands[0] = const0_rtx; + operands[4] = const0_rtx; + } + operands[3] = GEN_INT (alpha_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +}) + +(define_insn "*call_osf_2_er_nogp" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1)) + (use (reg:DI 29)) + (use (match_operand 2)) + (use (match_operand 3 "const_int_operand")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%0),%2%J3" + [(set_attr "type" "jsr")]) + +(define_insn "*call_osf_2_er" + [(call (mem:DI (match_operand:DI 0 "register_operand" "c")) + (match_operand 1)) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")] + UNSPEC_LDGP1)) + (use (match_operand 2)) + (use (match_operand 3 "const_int_operand")) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%0),%2%J3\;ldah $29,0($26)\t\t!gpdisp!%4" + [(set_attr "type" "jsr") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_osf_1_noreturn" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,$%0..ng + jsr $26,%0" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_osf_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s")) + (match_operand 1)) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,($27),0\;ldgp $29,0($26) + bsr $26,$%0..ng + jsr $26,%0\;ldgp $29,0($26)" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +(define_insn "*sibcall_osf_1_er" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s")) + (match_operand 1)) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,%0\t\t!samegp + ldq $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +;; Note that the DEC assembler expands "jmp foo" with $at, which +;; doesn't do what we want. +(define_insn "*sibcall_osf_1" + [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s")) + (match_operand 1)) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,$%0..ng + lda $27,%0\;jmp $31,($27),%0" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +; GAS relies on the order and position of instructions output below in order +; to generate relocs for VMS link to potentially optimize the call. +; Please do not molest. +(define_insn "*call_vms_1" + [(call (mem:DI (match_operand:DI 0 "call_operand" "r,s")) + (match_operand 1)) + (use (match_operand:DI 2 "nonmemory_operand" "r,n")) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + switch (which_alternative) + { + case 0: + return "mov %2,$27\;jsr $26,0\;ldq $27,0($29)"; + case 1: + operands [2] = alpha_use_linkage (operands [0], true, false); + operands [3] = alpha_use_linkage (operands [0], false, false); + return "ldq $26,%3\;ldq $27,%2\;jsr $26,%0\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "jsr") + (set_attr "length" "12,16")]) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0) + (const_int 0)) + (match_operand 1) + (match_operand 2)])] + "" +{ + int i; + + emit_call_insn (GEN_CALL (operands[0], const0_rtx, NULL, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0") + (set_attr "type" "none")]) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0)))] + "" + "br $31,%l0" + [(set_attr "type" "ibr")]) + +(define_expand "return" + [(return)] + "direct_return ()") + +(define_insn "*return_internal" + [(return)] + "reload_completed" + "ret $31,($26),1" + [(set_attr "type" "ibr")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:DI 0 "register_operand" "r"))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +(define_expand "tablejump" + [(parallel [(set (pc) + (match_operand 0 "register_operand")) + (use (label_ref:DI (match_operand 1)))])] + "" +{ + if (TARGET_ABI_OSF) + { + rtx dest = gen_reg_rtx (DImode); + emit_insn (gen_extendsidi2 (dest, operands[0])); + emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest)); + operands[0] = dest; + } +}) + +(define_insn "*tablejump_internal" + [(set (pc) + (match_operand:DI 0 "register_operand" "r")) + (use (label_ref (match_operand 1)))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +;; Cache flush. Used by alpha_trampoline_init. 0x86 is PAL_imb, but we don't +;; want to have to include pal.h in our .s file. +(define_insn "imb" + [(unspec_volatile [(const_int 0)] UNSPECV_IMB)] + "" + "call_pal 0x86" + [(set_attr "type" "callpal")]) + +(define_expand "clear_cache" + [(match_operand:DI 0) ; region start + (match_operand:DI 1)] ; region end + "" +{ + emit_insn (gen_imb ()); + DONE; +}) + +;; BUGCHK is documented common to OSF/1 and VMS PALcode. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "call_pal 0x81" + [(set_attr "type" "callpal")]) + +;; For userland, we load the thread pointer from the TCB. +;; For the kernel, we load the per-cpu private value. + +(define_insn "get_thread_pointerdi" + [(set (match_operand:DI 0 "register_operand" "=v") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "TARGET_ABI_OSF" +{ + if (TARGET_TLS_KERNEL) + return "call_pal 0x32"; + else + return "call_pal 0x9e"; +} + [(set_attr "type" "callpal")]) + +;; For completeness, and possibly a __builtin function, here's how to +;; set the thread pointer. Since we don't describe enough of this +;; quantity for CSE, we have to use a volatile unspec, and then there's +;; not much point in creating an R16_REG register class. + +(define_expand "set_thread_pointerdi" + [(set (reg:DI 16) (match_operand:DI 0 "input_operand")) + (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)] + "TARGET_ABI_OSF") + +(define_insn "*set_tp" + [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)] + "TARGET_ABI_OSF" +{ + if (TARGET_TLS_KERNEL) + return "call_pal 0x31"; + else + return "call_pal 0x9f"; +} + [(set_attr "type" "callpal")]) + +;; Special builtins for establishing and reverting VMS condition handlers. + +(define_expand "builtin_establish_vms_condition_handler" + [(set (reg:DI 0) (match_operand:DI 0 "register_operand")) + (use (match_operand:DI 1 "address_operand"))] + "TARGET_ABI_OPEN_VMS" +{ + alpha_expand_builtin_establish_vms_condition_handler (operands[0], + operands[1]); +}) + +(define_expand "builtin_revert_vms_condition_handler" + [(set (reg:DI 0) (match_operand:DI 0 "register_operand"))] + "TARGET_ABI_OPEN_VMS" + "alpha_expand_builtin_revert_vms_condition_handler (operands[0]);") + +;; Finally, we have the basic data motion insns. The byte and word insns +;; are done via define_expand. Start with the floating-point insns, since +;; they are simpler. + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand") + (match_operand:SF 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], SFmode)) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn "*movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r") + (match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))] + "register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode)" + "@ + cpys %R1,%R1,%0 + ld%, %0,%1 + bis $31,%r1,%0 + ldl %0,%1 + st%, %R1,%0 + stl %r1,%0 + itofs %1,%0 + ftois %1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi") + (set_attr "isa" "*,*,*,*,*,*,fix,fix")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand") + (match_operand:DF 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], DFmode)) + operands[1] = force_reg (DFmode, operands[1]); +}) + +(define_insn "*movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r") + (match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))] + "register_operand (operands[0], DFmode) + || reg_or_0_operand (operands[1], DFmode)" + "@ + cpys %R1,%R1,%0 + ld%- %0,%1 + bis $31,%r1,%0 + ldq %0,%1 + st%- %R1,%0 + stq %r1,%0 + itoft %1,%0 + ftoit %1,%0" + [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi") + (set_attr "isa" "*,*,*,*,*,*,fix,fix")]) + +;; Subregs suck for register allocation. Pretend we can move TFmode +;; data between general registers until after reload. +;; ??? Is this still true now that we have the lower-subreg pass? + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand") + (match_operand:TF 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], TFmode)) + operands[1] = force_reg (TFmode, operands[1]); +}) + +(define_insn_and_split "*movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o") + (match_operand:TF 1 "input_operand" "roG,rG"))] + "register_operand (operands[0], TFmode) + || reg_or_0_operand (operands[1], TFmode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + "alpha_split_tmode_pair (operands, TFmode, true);") + +;; We do two major things here: handle mem->mem and construct long +;; constants. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand") + (match_operand:SI 1 "general_operand"))] + "" +{ + if (alpha_expand_mov (SImode, operands)) + DONE; +}) + +(define_insn "*movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,m,r") + (match_operand:SI 1 "input_operand" "rJ,K,L,n,m,rJ,s"))] + "register_operand (operands[0], SImode) + || reg_or_0_operand (operands[1], SImode)" + "@ + bis $31,%r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + # + ldl %0,%1 + stl %r1,%0 + lda %0,%1" + [(set_attr "type" "ilog,iadd,iadd,multi,ild,ist,ldsym") + (set_attr "isa" "*,*,*,*,*,*,vms")]) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "non_add_const_operand"))] + "" + [(const_int 0)] +{ + if (alpha_split_const_mov (SImode, operands)) + DONE; + else + FAIL; +}) + +(define_insn "*movdi_er_low_l" + [(set (match_operand:DI 0 "register_operand" "=r") + (lo_sum:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "local_symbolic_operand")))] + "TARGET_EXPLICIT_RELOCS" +{ + if (true_regnum (operands[1]) == 29) + return "lda %0,%2(%1)\t\t!gprel"; + else + return "lda %0,%2(%1)\t\t!gprellow"; +} + [(set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "small_symbolic_operand"))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (lo_sum:DI (match_dup 2) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "local_symbolic_operand"))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (plus:DI (match_dup 2) (high:DI (match_dup 1)))) + (set (match_dup 0) + (lo_sum:DI (match_dup 0) (match_dup 1)))] + "operands[2] = pic_offset_table_rtx;") + +(define_split + [(match_operand 0 "some_small_symbolic_operand")] + "" + [(match_dup 0)] + "operands[0] = split_small_symbolic_operand (operands[0]);") + +;; Accepts any symbolic, not just global, since function calls that +;; don't go via bsr still use !literal in hopes of linker relaxation. +(define_insn "movdi_er_high_g" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_LITERAL))] + "TARGET_EXPLICIT_RELOCS" +{ + if (INTVAL (operands[3]) == 0) + return "ldq %0,%2(%1)\t\t!literal"; + else + return "ldq %0,%2(%1)\t\t!literal!%3"; +} + [(set_attr "type" "ldsym")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "global_symbolic_operand"))] + "TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1) + (const_int 0)] UNSPEC_LITERAL))] + "operands[2] = pic_offset_table_rtx;") + +(define_insn "movdi_er_tlsgd" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand") + (match_operand 3 "const_int_operand")] + UNSPEC_TLSGD))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[3]) == 0) + return "lda %0,%2(%1)\t\t!tlsgd"; + else + return "lda %0,%2(%1)\t\t!tlsgd!%3"; +}) + +(define_insn "movdi_er_tlsldm" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPEC_TLSLDM))] + "HAVE_AS_TLS" +{ + if (INTVAL (operands[2]) == 0) + return "lda %0,%&(%1)\t\t!tlsldm"; + else + return "lda %0,%&(%1)\t\t!tlsldm!%2"; +}) + +(define_insn "*movdi_er_gotdtp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand")] + UNSPEC_DTPREL))] + "HAVE_AS_TLS" + "ldq %0,%2(%1)\t\t!gotdtprel" + [(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gotdtp_symbolic_operand"))] + "HAVE_AS_TLS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1)] UNSPEC_DTPREL))] +{ + operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0); + operands[2] = pic_offset_table_rtx; +}) + +(define_insn "*movdi_er_gottp" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "symbolic_operand")] + UNSPEC_TPREL))] + "HAVE_AS_TLS" + "ldq %0,%2(%1)\t\t!gottprel" + [(set_attr "type" "ild") + (set_attr "usegp" "yes")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gottp_symbolic_operand"))] + "HAVE_AS_TLS && reload_completed" + [(set (match_dup 0) + (unspec:DI [(match_dup 2) + (match_dup 1)] UNSPEC_TPREL))] +{ + operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0); + operands[2] = pic_offset_table_rtx; +}) + +(define_insn "*movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r,r,r,r,r,r,r, m, *f,*f, Q, r,*f") + (match_operand:DI 1 "input_operand" + "rJ,K,L,T,s,n,s,m,rJ,*fJ, Q,*f,*f, r"))] + "register_operand (operands[0], DImode) + || reg_or_0_operand (operands[1], DImode)" + "@ + mov %r1,%0 + lda %0,%1($31) + ldah %0,%h1($31) + # + # + # + lda %0,%1 + ldq%A1 %0,%1 + stq%A0 %r1,%0 + fmov %R1,%0 + ldt %0,%1 + stt %R1,%0 + ftoit %1,%0 + itoft %1,%0" + [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof") + (set_attr "isa" "*,*,*,er,er,*,ner,*,*,*,*,*,fix,fix") + (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*,*")]) + +;; VMS needs to set up "vms_base_regno" for unwinding. This move +;; often appears dead to the life analysis code, at which point we +;; die for emitting dead prologue instructions. Force this live. + +(define_insn "force_movdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")] + UNSPECV_FORCE_MOV))] + "" + "mov %1,%0" + [(set_attr "type" "ilog")]) + +;; We do three major things here: handle mem->mem, put 64-bit constants in +;; memory, and construct long 32-bit constants. + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand") + (match_operand:DI 1 "general_operand"))] + "" +{ + if (alpha_expand_mov (DImode, operands)) + DONE; +}) + +;; Split a load of a large constant into the appropriate two-insn +;; sequence. + +(define_split + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "non_add_const_operand"))] + "" + [(const_int 0)] +{ + if (alpha_split_const_mov (DImode, operands)) + DONE; + else + FAIL; +}) + +;; We need to prevent reload from splitting TImode moves, because it +;; might decide to overwrite a pointer with the value it points to. +;; In that case we have to do the loads in the appropriate order so +;; that the pointer is not destroyed too early. + +(define_insn_and_split "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (match_operand:TI 1 "input_operand" "roJ,rJ"))] + "(register_operand (operands[0], TImode) + /* Prevent rematerialization of constants. */ + && ! CONSTANT_P (operands[1])) + || reg_or_0_operand (operands[1], TImode)" + "#" + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))] + "alpha_split_tmode_pair (operands, TImode, true);") + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand") + (match_operand:TI 1 "general_operand"))] + "" +{ + if (MEM_P (operands[0]) + && ! reg_or_0_operand (operands[1], TImode)) + operands[1] = force_reg (TImode, operands[1]); + + if (operands[1] == const0_rtx) + ; + /* We must put 64-bit constants in memory. We could keep the + 32-bit constants in TImode and rely on the splitter, but + this doesn't seem to be worth the pain. */ + else if (CONST_INT_P (operands[1]) + || GET_CODE (operands[1]) == CONST_DOUBLE) + { + rtx in[2], out[2], target; + + gcc_assert (can_create_pseudo_p ()); + + split_double (operands[1], &in[0], &in[1]); + + if (in[0] == const0_rtx) + out[0] = const0_rtx; + else + { + out[0] = gen_reg_rtx (DImode); + emit_insn (gen_movdi (out[0], in[0])); + } + + if (in[1] == const0_rtx) + out[1] = const0_rtx; + else + { + out[1] = gen_reg_rtx (DImode); + emit_insn (gen_movdi (out[1], in[1])); + } + + if (!REG_P (operands[0])) + target = gen_reg_rtx (TImode); + else + target = operands[0]; + + emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0])); + emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1])); + + if (target != operands[0]) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], target)); + + DONE; + } +}) + +;; These are the partial-word cases. +;; +;; First we have the code to load an aligned word. Operand 0 is the register +;; in which to place the result. It's mode is QImode or HImode. Operand 1 +;; is an SImode MEM at the low-order byte of the proper word. Operand 2 is the +;; number of bits within the word that the value is. Operand 3 is an SImode +;; scratch register. If operand 0 is a hard register, operand 3 may be the +;; same register. It is allowed to conflict with operand 1 as well. + +(define_expand "aligned_loadqi" + [(set (match_operand:SI 3 "register_operand") + (match_operand:SI 1 "memory_operand")) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (subreg:DI (match_dup 3) 0) + (const_int 8) + (match_operand:DI 2 "const_int_operand")))]) + +(define_expand "aligned_loadhi" + [(set (match_operand:SI 3 "register_operand") + (match_operand:SI 1 "memory_operand")) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (subreg:DI (match_dup 3) 0) + (const_int 16) + (match_operand:DI 2 "const_int_operand")))]) + +;; Similar for unaligned loads, where we use the sequence from the +;; Alpha Architecture manual. We have to distinguish between little-endian +;; and big-endian systems as the sequences are different. +;; +;; Operand 1 is the address. Operands 2 and 3 are temporaries, where +;; operand 3 can overlap the input and output registers. + +(define_expand "unaligned_loadqi" + [(set (match_operand:DI 2 "register_operand") + (mem:DI (and:DI (match_operand:DI 1 "address_operand") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_dup 2) + (const_int 8) + (ashift:DI (match_dup 3) (const_int 3))))]) + +(define_expand "unaligned_loadhi" + [(set (match_operand:DI 2 "register_operand") + (mem:DI (and:DI (match_operand:DI 1 "address_operand") + (const_int -8)))) + (set (match_operand:DI 3 "register_operand") + (match_dup 1)) + (set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_dup 2) + (const_int 16) + (ashift:DI (match_dup 3) (const_int 3))))]) + +;; Storing an aligned byte or word requires two temporaries. Operand 0 is the +;; aligned SImode MEM. Operand 1 is the register containing the +;; byte or word to store. Operand 2 is the number of bits within the word that +;; the value should be placed. Operands 3 and 4 are SImode temporaries. + +(define_expand "aligned_store" + [(set (match_operand:SI 3 "register_operand") + (match_operand:SI 0 "memory_operand")) + (set (subreg:DI (match_dup 3) 0) + (and:DI (subreg:DI (match_dup 3) 0) (match_dup 5))) + (set (subreg:DI (match_operand:SI 4 "register_operand") 0) + (ashift:DI (zero_extend:DI (match_operand 1 "register_operand")) + (match_operand:DI 2 "const_int_operand"))) + (set (subreg:DI (match_dup 4) 0) + (ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0))) + (set (match_dup 0) (match_dup 4))] + "" +{ + operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1])) + << INTVAL (operands[2]))); +}) + +;; For the unaligned byte and halfword cases, we use code similar to that +;; in the ;; Architecture book, but reordered to lower the number of registers +;; required. Operand 0 is the address. Operand 1 is the data to store. +;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may +;; be the same temporary, if desired. If the address is in a register, +;; operand 2 can be that register. + +(define_expand "unaligned_store" + [(set (match_operand:DI 3 "register_operand") + (mem:DI (and:DI (match_operand:DI 0 "address_operand") + (const_int -8)))) + (set (match_operand:DI 2 "register_operand") + (match_dup 0)) + (set (match_dup 3) + (and:DI (not:DI (ashift:DI (match_dup 5) + (ashift:DI (match_dup 2) (const_int 3)))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand") + (ashift:DI (zero_extend:DI + (match_operand:I12MODE 1 "register_operand")) + (ashift:DI (match_dup 2) (const_int 3)))) + (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3))) + (set (mem:DI (and:DI (match_dup 0) (const_int -8))) + (match_dup 4))] + "" + "operands[5] = GEN_INT (GET_MODE_MASK (mode));") + +;; Here are the define_expand's for QI and HI moves that use the above +;; patterns. We have the normal sets, plus the ones that need scratch +;; registers for reload. + +(define_expand "mov" + [(set (match_operand:I12MODE 0 "nonimmediate_operand") + (match_operand:I12MODE 1 "general_operand"))] + "" +{ + if (TARGET_BWX + ? alpha_expand_mov (mode, operands) + : alpha_expand_mov_nobwx (mode, operands)) + DONE; +}) + +(define_insn "*movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))] + "register_operand (operands[0], QImode) + || reg_or_0_operand (operands[1], QImode)" + "@ + bis $31,%r1,%0 + lda %0,%L1($31) + ldbu %0,%1 + stb %r1,%0" + [(set_attr "type" "ilog,iadd,ild,ist") + (set_attr "isa" "*,*,bwx,bwx")]) + +(define_insn "*movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))] + "register_operand (operands[0], HImode) + || reg_or_0_operand (operands[1], HImode)" + "@ + bis $31,%r1,%0 + lda %0,%L1($31) + ldwu %0,%1 + stw %r1,%0" + [(set_attr "type" "ilog,iadd,ild,ist") + (set_attr "isa" "*,*,bwx,bwx")]) + +;; We need to hook into the extra support that we have for HImode +;; reloads when BWX insns are not available. +(define_expand "movcqi" + [(set (match_operand:CQI 0 "nonimmediate_operand") + (match_operand:CQI 1 "general_operand"))] + "!TARGET_BWX" +{ + if (GET_CODE (operands[0]) == CONCAT || GET_CODE (operands[1]) == CONCAT) + ; + else if (!any_memory_operand (operands[0], CQImode)) + { + if (!any_memory_operand (operands[1], CQImode)) + { + emit_move_insn (gen_lowpart (HImode, operands[0]), + gen_lowpart (HImode, operands[1])); + DONE; + } + if (aligned_memory_operand (operands[1], CQImode)) + { + bool done; + do_aligned1: + operands[1] = gen_lowpart (HImode, operands[1]); + do_aligned2: + operands[0] = gen_lowpart (HImode, operands[0]); + done = alpha_expand_mov_nobwx (HImode, operands); + gcc_assert (done); + DONE; + } + } + else if (aligned_memory_operand (operands[0], CQImode)) + { + if (MEM_P (operands[1])) + { + rtx x = gen_reg_rtx (HImode); + emit_move_insn (gen_lowpart (CQImode, x), operands[1]); + operands[1] = x; + goto do_aligned2; + } + goto do_aligned1; + } + + gcc_assert (!reload_in_progress); + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +;; Here are the versions for reload. +;; +;; The aligned input case is recognized early in alpha_secondary_reload +;; in order to avoid allocating an unnecessary scratch register. +;; +;; Note that in the unaligned cases we know that the operand must not be +;; a pseudo-register because stack slots are always aligned references. + +(define_expand "reload_in" + [(parallel [(match_operand:RELOAD12 0 "register_operand" "=r") + (match_operand:RELOAD12 1 "any_memory_operand" "m") + (match_operand:TI 2 "register_operand" "=&r")])] + "!TARGET_BWX" +{ + rtx scratch, seq, addr; + unsigned regno = REGNO (operands[2]); + + /* It is possible that one of the registers we got for operands[2] + might coincide with that of operands[0] (which is why we made + it TImode). Pick the other one to use as our scratch. */ + if (regno == REGNO (operands[0])) + regno++; + scratch = gen_rtx_REG (DImode, regno); + + addr = get_unaligned_address (operands[1]); + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + seq = gen_unaligned_load (operands[0], addr, + scratch, operands[0]); + alpha_set_memflags (seq, operands[1]); + + emit_insn (seq); + DONE; +}) + +(define_expand "reload_out" + [(parallel [(match_operand:RELOAD12 0 "any_memory_operand" "=m") + (match_operand:RELOAD12 1 "register_operand" "r") + (match_operand:TI 2 "register_operand" "=&r")])] + "!TARGET_BWX" +{ + unsigned regno = REGNO (operands[2]); + + if (mode == CQImode) + { + operands[0] = gen_lowpart (HImode, operands[0]); + operands[1] = gen_lowpart (HImode, operands[1]); + } + + if (aligned_memory_operand (operands[0], mode)) + { + emit_insn (gen_reload_out_aligned + (operands[0], operands[1], + gen_rtx_REG (SImode, regno), + gen_rtx_REG (SImode, regno + 1))); + } + else + { + rtx addr = get_unaligned_address (operands[0]); + rtx scratch1 = gen_rtx_REG (DImode, regno); + rtx scratch2 = gen_rtx_REG (DImode, regno + 1); + rtx scratch3 = scratch1; + rtx seq; + + if (REG_P (addr)) + scratch1 = addr; + + seq = gen_unaligned_store (addr, operands[1], scratch1, + scratch2, scratch3); + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + } + DONE; +}) + +;; Helpers for the above. The way reload is structured, we can't +;; always get a proper address for a stack slot during reload_foo +;; expansion, so we must delay our address manipulations until after. + +(define_insn_and_split "reload_in_aligned" + [(set (match_operand:I12MODE 0 "register_operand" "=r") + (match_operand:I12MODE 1 "memory_operand" "m"))] + "!TARGET_BWX && (reload_in_progress || reload_completed)" + "#" + "!TARGET_BWX && reload_completed" + [(const_int 0)] +{ + rtx aligned_mem, bitnum; + get_aligned_mem (operands[1], &aligned_mem, &bitnum); + emit_insn (gen_aligned_load + (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum, + gen_rtx_REG (SImode, REGNO (operands[0])))); + DONE; +}) + +(define_insn_and_split "reload_out_aligned" + [(set (match_operand:I12MODE 0 "memory_operand" "=m") + (match_operand:I12MODE 1 "register_operand" "r")) + (clobber (match_operand:SI 2 "register_operand" "=r")) + (clobber (match_operand:SI 3 "register_operand" "=r"))] + "!TARGET_BWX && (reload_in_progress || reload_completed)" + "#" + "!TARGET_BWX && reload_completed" + [(const_int 0)] +{ + rtx aligned_mem, bitnum; + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, + operands[2], operands[3])); + DONE; +}) + +;; Vector operations + +(define_mode_iterator VEC [V8QI V4HI V2SI]) +(define_mode_iterator VEC12 [V8QI V4HI]) + +(define_expand "mov" + [(set (match_operand:VEC 0 "nonimmediate_operand") + (match_operand:VEC 1 "general_operand"))] + "" +{ + if (alpha_expand_mov (mode, operands)) + DONE; +}) + +(define_split + [(set (match_operand:VEC 0 "register_operand") + (match_operand:VEC 1 "non_zero_const_operand"))] + "" + [(const_int 0)] +{ + if (alpha_split_const_mov (mode, operands)) + DONE; + else + FAIL; +}) + + +(define_expand "movmisalign" + [(set (match_operand:VEC 0 "nonimmediate_operand") + (match_operand:VEC 1 "general_operand"))] + "" +{ + alpha_expand_movmisalign (mode, operands); + DONE; +}) + +(define_insn "*mov_fix" + [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f") + (match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))] + "register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode)" + "@ + bis $31,%r1,%0 + # + ldq %0,%1 + stq %r1,%0 + cpys %R1,%R1,%0 + ldt %0,%1 + stt %R1,%0 + ftoit %1,%0 + itoft %1,%0" + [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof") + (set_attr "isa" "*,*,*,*,*,*,*,fix,fix")]) + +(define_insn "3" + [(set (match_operand:VEC12 0 "register_operand" "=r") + (any_maxmin:VEC12 + (match_operand:VEC12 1 "reg_or_0_operand" "rW") + (match_operand:VEC12 2 "reg_or_0_operand" "rW")))] + "TARGET_MAX" + " %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_insn "one_cmpl2" + [(set (match_operand:VEC 0 "register_operand" "=r") + (not:VEC (match_operand:VEC 1 "register_operand" "r")))] + "" + "ornot $31,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "and3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (and:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "and %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*andnot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r")) + (match_operand:VEC 2 "register_operand" "r")))] + "" + "bic %2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "ior3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (ior:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "bis %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*iornot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r")) + (match_operand:VEC 2 "register_operand" "r")))] + "" + "ornot %2,%1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "xor3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (xor:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r")))] + "" + "xor %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_insn "*xornot3" + [(set (match_operand:VEC 0 "register_operand" "=r") + (not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r") + (match_operand:VEC 2 "register_operand" "r"))))] + "" + "eqv %1,%2,%0" + [(set_attr "type" "ilog")]) + +(define_expand "vec_shl_" + [(set (match_operand:VEC 0 "register_operand") + (ashift:DI (match_operand:VEC 1 "register_operand") + (match_operand:DI 2 "reg_or_6bit_operand")))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +(define_expand "vec_shr_" + [(set (match_operand:VEC 0 "register_operand") + (lshiftrt:DI (match_operand:VEC 1 "register_operand") + (match_operand:DI 2 "reg_or_6bit_operand")))] + "" +{ + operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); +}) + +;; Bit field extract patterns which use ext[wlq][lh] + +(define_expand "extvmisaligndi" + [(set (match_operand:DI 0 "register_operand") + (sign_extract:DI (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "const_int_operand")))] + "" +{ + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; + + alpha_expand_unaligned_load (operands[0], operands[1], + INTVAL (operands[2]) / 8, + INTVAL (operands[3]) / 8, 1); + DONE; +}) + +(define_expand "extzvdi" + [(set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "const_int_operand")))] + "" +{ + /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 8 + && INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; +}) + +(define_expand "extzvmisaligndi" + [(set (match_operand:DI 0 "register_operand") + (zero_extract:DI (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "const_int_operand")))] + "" +{ + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. + We fail 8-bit fields, falling back on a simple byte load. */ + if (INTVAL (operands[3]) % 8 != 0 + || (INTVAL (operands[2]) != 16 + && INTVAL (operands[2]) != 32 + && INTVAL (operands[2]) != 64)) + FAIL; + + alpha_expand_unaligned_load (operands[0], operands[1], + INTVAL (operands[2]) / 8, + INTVAL (operands[3]) / 8, 0); + DONE; +}) + +(define_expand "insvmisaligndi" + [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand") + (match_operand:DI 1 "const_int_operand") + (match_operand:DI 2 "const_int_operand")) + (match_operand:DI 3 "register_operand"))] + "" +{ + /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries. */ + if (INTVAL (operands[2]) % 8 != 0 + || (INTVAL (operands[1]) != 16 + && INTVAL (operands[1]) != 32 + && INTVAL (operands[1]) != 64)) + FAIL; + + alpha_expand_unaligned_store (operands[0], operands[3], + INTVAL (operands[1]) / 8, + INTVAL (operands[2]) / 8); + DONE; +}) + +;; Block move/clear, see alpha.c for more details. +;; Argument 0 is the destination +;; Argument 1 is the source +;; Argument 2 is the length +;; Argument 3 is the alignment + +(define_expand "movmemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand")) + (use (match_operand:DI 2 "immediate_operand")) + (use (match_operand:DI 3 "immediate_operand"))])] + "" +{ + if (alpha_expand_block_move (operands)) + DONE; + else + FAIL; +}) + +(define_expand "movmemdi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand")) + (use (match_operand:DI 2 "immediate_operand")) + (use (match_operand:DI 3 "immediate_operand")) + (use (match_dup 4)) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 18)) + (clobber (reg:DI 19)) + (clobber (reg:DI 20)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))])] + "TARGET_ABI_OPEN_VMS" + "operands[4] = gen_rtx_SYMBOL_REF (Pmode, \"OTS$MOVE\");") + +(define_insn "*movmemdi_1" + [(set (match_operand:BLK 0 "memory_operand" "=m,=m") + (match_operand:BLK 1 "memory_operand" "m,m")) + (use (match_operand:DI 2 "nonmemory_operand" "r,i")) + (use (match_operand:DI 3 "immediate_operand")) + (use (match_operand:DI 4 "call_operand" "i,i")) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 18)) + (clobber (reg:DI 19)) + (clobber (reg:DI 20)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + operands [5] = alpha_use_linkage (operands [4], false, true); + switch (which_alternative) + { + case 0: + return "lda $16,%0\;bis $31,%2,$17\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)"; + case 1: + return "lda $16,%0\;lda $17,%2($31)\;lda $18,%1\;ldq $26,%5\;lda $25,3($31)\;jsr $26,%4\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "multi") + (set_attr "length" "28")]) + +(define_expand "setmemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand 2 "const_int_operand")) + (use (match_operand:DI 1 "immediate_operand")) + (use (match_operand:DI 3 "immediate_operand"))])] + "" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (alpha_expand_block_clear (operands)) + DONE; + else + FAIL; +}) + +(define_expand "setmemdi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand 2 "const_int_operand")) + (use (match_operand:DI 1 "immediate_operand")) + (use (match_operand:DI 3 "immediate_operand")) + (use (match_dup 4)) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))])] + "TARGET_ABI_OPEN_VMS" +{ + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + operands[4] = gen_rtx_SYMBOL_REF (Pmode, "OTS$ZERO"); +}) + +(define_insn "*clrmemdi_1" + [(set (match_operand:BLK 0 "memory_operand" "=m,=m") + (const_int 0)) + (use (match_operand:DI 1 "nonmemory_operand" "r,i")) + (use (match_operand:DI 2 "immediate_operand")) + (use (match_operand:DI 3 "call_operand" "i,i")) + (clobber (reg:DI 25)) + (clobber (reg:DI 16)) + (clobber (reg:DI 17)) + (clobber (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + operands [4] = alpha_use_linkage (operands [3], false, true); + switch (which_alternative) + { + case 0: + return "lda $16,%0\;bis $31,%1,$17\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)"; + case 1: + return "lda $16,%0\;lda $17,%1($31)\;ldq $26,%4\;lda $25,2($31)\;jsr $26,%3\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "multi") + (set_attr "length" "24")]) + + +;; Subroutine of stack space allocation. Perform a stack probe. +(define_expand "probe_stack" + [(set (match_dup 1) (match_operand:DI 0 "const_int_operand"))] + "" +{ + operands[1] = gen_rtx_MEM (DImode, plus_constant (Pmode, stack_pointer_rtx, + INTVAL (operands[0]))); + MEM_VOLATILE_P (operands[1]) = 1; + + operands[0] = const0_rtx; +}) + +;; This is how we allocate stack space. If we are allocating a +;; constant amount of space and we know it is less than 4096 +;; bytes, we need do nothing. +;; +;; If it is more than 4096 bytes, we need to probe the stack +;; periodically. +(define_expand "allocate_stack" + [(set (reg:DI 30) + (plus:DI (reg:DI 30) + (match_operand:DI 1 "reg_or_cint_operand"))) + (set (match_operand:DI 0 "register_operand" "=r") + (match_dup 2))] + "" +{ + if (CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < 32768) + { + if (INTVAL (operands[1]) >= 4096) + { + /* We do this the same way as in the prologue and generate explicit + probes. Then we update the stack by the constant. */ + + int probed = 4096; + + emit_insn (gen_probe_stack (GEN_INT (- probed))); + while (probed + 8192 < INTVAL (operands[1])) + emit_insn (gen_probe_stack (GEN_INT (- (probed += 8192)))); + + if (probed + 4096 < INTVAL (operands[1])) + emit_insn (gen_probe_stack (GEN_INT (- INTVAL(operands[1])))); + } + + operands[1] = GEN_INT (- INTVAL (operands[1])); + operands[2] = virtual_stack_dynamic_rtx; + } + else + { + rtx out_label = 0; + rtx loop_label = gen_label_rtx (); + rtx want = gen_reg_rtx (Pmode); + rtx tmp = gen_reg_rtx (Pmode); + rtx memref, test; + + emit_insn (gen_subdi3 (want, stack_pointer_rtx, + force_reg (Pmode, operands[1]))); + + if (!CONST_INT_P (operands[1])) + { + rtx limit = GEN_INT (4096); + out_label = gen_label_rtx (); + test = gen_rtx_LTU (VOIDmode, operands[1], limit); + emit_jump_insn + (gen_cbranchdi4 (test, operands[1], limit, out_label)); + } + + emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096))); + emit_label (loop_label); + memref = gen_rtx_MEM (DImode, tmp); + MEM_VOLATILE_P (memref) = 1; + emit_move_insn (memref, const0_rtx); + emit_insn (gen_adddi3 (tmp, tmp, GEN_INT(-8192))); + test = gen_rtx_GTU (VOIDmode, tmp, want); + emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label)); + + memref = gen_rtx_MEM (DImode, want); + MEM_VOLATILE_P (memref) = 1; + emit_move_insn (memref, const0_rtx); + + if (out_label) + emit_label (out_label); + + emit_move_insn (stack_pointer_rtx, want); + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; + } +}) + +;; This is used by alpha_expand_prolog to do the same thing as above, +;; except we cannot at that time generate new basic blocks, so we hide +;; the loop in this one insn. + +(define_insn "prologue_stack_probe_loop" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r") + (match_operand:DI 1 "register_operand" "r")] + UNSPECV_PSPL)] + "" +{ + operands[2] = gen_label_rtx (); + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[2])); + + return "stq $31,-8192(%1)\;subq %0,1,%0\;lda %1,-8192(%1)\;bne %0,%l2"; +} + [(set_attr "length" "16") + (set_attr "type" "multi")]) + +(define_expand "prologue" + [(const_int 0)] + "" +{ + alpha_expand_prologue (); + DONE; +}) + +;; These take care of emitting the ldgp insn in the prologue. This will be +;; an lda/ldah pair and we want to align them properly. So we have two +;; unspec_volatile insns, the first of which emits the ldgp assembler macro +;; and the second of which emits nothing. However, both are marked as type +;; IADD (the default) so the alignment code in alpha.c does the right thing +;; with them. + +(define_expand "prologue_ldgp" + [(set (match_dup 0) + (unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))] + "" +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 27); + operands[2] = (TARGET_EXPLICIT_RELOCS + ? GEN_INT (alpha_next_sequence_number++) + : const0_rtx); +}) + +(define_insn "*ldgp_er_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP1))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "ldah %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPEC_LDGP2))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "lda %0,0(%1)\t\t!gpdisp!%2" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_er_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_PLDGP2))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "lda %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_LDGP1))] + "" + "ldgp %0,0(%1)\n$%~..ng:" + [(set_attr "cannot_copy" "true")]) + +(define_insn "*prologue_ldgp_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r") + (match_operand 2 "const_int_operand")] + UNSPECV_PLDGP2))] + "" + ) + +;; The _mcount profiling hook has special calling conventions, and +;; does not clobber all the registers that a normal call would. So +;; hide the fact this is a call at all. + +(define_insn "prologue_mcount" + [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)] + "" +{ + if (TARGET_EXPLICIT_RELOCS) + /* Note that we cannot use a lituse_jsr reloc, since _mcount + cannot be called via the PLT. */ + return "ldq $28,_mcount($29)\t\t!literal\;jsr $28,($28),_mcount"; + else + return "lda $28,_mcount\;jsr $28,($28),_mcount"; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "init_fp" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "register_operand" "r")) + (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))] + "" + "bis $31,%1,%0") + +(define_expand "epilogue" + [(return)] + "" + "alpha_expand_epilogue ();") + +(define_expand "sibcall_epilogue" + [(return)] + "TARGET_ABI_OSF" +{ + alpha_expand_epilogue (); + DONE; +}) + +(define_expand "builtin_longjmp" + [(use (match_operand:DI 0 "register_operand" "r"))] + "TARGET_ABI_OSF" +{ + /* The elements of the buffer are, in order: */ + rtx fp = gen_rtx_MEM (Pmode, operands[0]); + rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 8)); + rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 16)); + rtx pv = gen_rtx_REG (Pmode, 27); + + /* This bit is the same as expand_builtin_longjmp. */ + emit_move_insn (hard_frame_pointer_rtx, fp); + emit_move_insn (pv, lab); + emit_stack_restore (SAVE_NONLOCAL, stack); + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + /* Load the label we are jumping through into $27 so that we know + where to look for it when we get back to setjmp's function for + restoring the gp. */ + emit_jump_insn (gen_builtin_longjmp_internal (pv)); + emit_barrier (); + DONE; +}) + +;; This is effectively a copy of indirect_jump, but constrained such +;; that register renaming cannot foil our cunning plan with $27. +(define_insn "builtin_longjmp_internal" + [(set (pc) + (unspec_volatile [(match_operand:DI 0 "register_operand" "c")] + UNSPECV_LONGJMP))] + "" + "jmp $31,(%0),0" + [(set_attr "type" "ibr")]) + +(define_expand "builtin_setjmp_receiver" + [(unspec_volatile [(label_ref (match_operand 0))] UNSPECV_SETJMPR)] + "TARGET_ABI_OSF") + +(define_insn_and_split "*builtin_setjmp_receiver_1" + [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR)] + "TARGET_ABI_OSF" +{ + if (TARGET_EXPLICIT_RELOCS) + return "#"; + else + return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)"; +} + "&& TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 1) + (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1)) + (set (match_dup 1) + (unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))] +{ + if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0)) + emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]), + UNSPECV_SETJMPR_ER)); + operands[1] = pic_offset_table_rtx; + operands[2] = gen_rtx_REG (Pmode, 27); + operands[3] = GEN_INT (alpha_next_sequence_number++); +} + [(set_attr "length" "12") + (set_attr "type" "multi")]) + +(define_insn "*builtin_setjmp_receiver_er_sl_1" + [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR_ER)] + "TARGET_ABI_OSF && TARGET_EXPLICIT_RELOCS" + "lda $27,$LSJ%=-%l0($27)\n$LSJ%=:") + +;; When flag_reorder_blocks_and_partition is in effect, compiler puts +;; exception landing pads in a cold section. To prevent inter-section offset +;; calculation, a jump to original landing pad is emitted in the place of the +;; original landing pad. Since landing pad is moved, RA-relative GP +;; calculation in the prologue of landing pad breaks. To solve this problem, +;; we use alternative GP load approach. + +(define_expand "exception_receiver" + [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)] + "TARGET_ABI_OSF" +{ + if (flag_reorder_blocks_and_partition) + operands[0] = alpha_gp_save_rtx (); + else + operands[0] = const0_rtx; +}) + +(define_insn "*exception_receiver_2" + [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)] + "TARGET_ABI_OSF && flag_reorder_blocks_and_partition" + "ldq $29,%0" + [(set_attr "type" "ild")]) + +(define_insn_and_split "*exception_receiver_1" + [(unspec_volatile [(const_int 0)] UNSPECV_EHR)] + "TARGET_ABI_OSF" +{ + if (TARGET_EXPLICIT_RELOCS) + return "ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*"; + else + return "ldgp $29,0($26)"; +} + "&& TARGET_EXPLICIT_RELOCS && reload_completed" + [(set (match_dup 0) + (unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1)) + (set (match_dup 0) + (unspec:DI [(match_dup 0) (match_dup 2)] UNSPEC_LDGP2))] +{ + operands[0] = pic_offset_table_rtx; + operands[1] = gen_rtx_REG (Pmode, 26); + operands[2] = GEN_INT (alpha_next_sequence_number++); +} + [(set_attr "length" "8") + (set_attr "type" "multi")]) + +(define_expand "nonlocal_goto_receiver" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) + (set (reg:DI 27) (mem:DI (reg:DI 29))) + (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) + (use (reg:DI 27))] + "TARGET_ABI_OPEN_VMS") + +(define_insn "arg_home" + [(unspec [(const_int 0)] UNSPEC_ARG_HOME) + (use (reg:DI 1)) + (use (reg:DI 25)) + (use (reg:DI 16)) + (use (reg:DI 17)) + (use (reg:DI 18)) + (use (reg:DI 19)) + (use (reg:DI 20)) + (use (reg:DI 21)) + (use (reg:DI 48)) + (use (reg:DI 49)) + (use (reg:DI 50)) + (use (reg:DI 51)) + (use (reg:DI 52)) + (use (reg:DI 53)) + (clobber (mem:BLK (const_int 0))) + (clobber (reg:DI 24)) + (clobber (reg:DI 25)) + (clobber (reg:DI 0))] + "TARGET_ABI_OPEN_VMS" + "lda $0,OTS$HOME_ARGS\;ldq $0,8($0)\;jsr $0,OTS$HOME_ARGS" + [(set_attr "length" "16") + (set_attr "type" "multi")]) + +;; Prefetch data. +;; +;; On EV4, these instructions are nops -- no load occurs. +;; +;; On EV5, these instructions act as a normal load, and thus can trap +;; if the address is invalid. The OS may (or may not) handle this in +;; the entMM fault handler and suppress the fault. If so, then this +;; has the effect of a read prefetch instruction. +;; +;; On EV6, these become official prefetch instructions. + +(define_insn "prefetch" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:DI 1 "const_int_operand" "n") + (match_operand:DI 2 "const_int_operand" "n"))] + "TARGET_FIXUP_EV5_PREFETCH || alpha_cpu == PROCESSOR_EV6" +{ + /* Interpret "no temporal locality" as this data should be evicted once + it is used. The "evict next" alternatives load the data into the cache + and leave the LRU eviction counter pointing to that block. */ + static const char * const alt[2][2] = { + { + "ldq $31,%a0", /* read, evict next */ + "ldl $31,%a0", /* read, evict last */ + }, + { + "ldt $f31,%a0", /* write, evict next */ + "lds $f31,%a0", /* write, evict last */ + } + }; + + bool write = INTVAL (operands[1]) != 0; + bool lru = INTVAL (operands[2]) != 0; + + return alt[write][lru]; +} + [(set_attr "type" "ild")]) + +;; Close the trap shadow of preceding instructions. This is generated +;; by alpha_reorg. + +(define_insn "trapb" + [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)] + "" + "trapb" + [(set_attr "type" "misc")]) + +;; No-op instructions used by machine-dependent reorg to preserve +;; alignment for instruction issue. +;; The Unicos/Mk assembler does not support these opcodes. + +(define_insn "nop" + [(const_int 0)] + "" + "bis $31,$31,$31" + [(set_attr "type" "ilog")]) + +(define_insn "fnop" + [(const_int 1)] + "TARGET_FP" + "cpys $f31,$f31,$f31" + [(set_attr "type" "fcpys")]) + +(define_insn "unop" + [(const_int 2)] + "" + "ldq_u $31,0($30)") + +(define_insn "realign" + [(unspec_volatile [(match_operand 0 "immediate_operand" "i")] + UNSPECV_REALIGN)] + "" + ".align %0 #realign") + +;; Instructions to be emitted from __builtins. + +(define_insn "builtin_cmpbge" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rI")] + UNSPEC_CMPBGE))] + "" + "cmpbge %r1,%2,%0" + ;; The EV6 data sheets list this as ILOG. OTOH, EV6 doesn't + ;; actually differentiate between ILOG and ICMP in the schedule. + [(set_attr "type" "icmp")]) + +(define_expand "extbl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (8), operands[2])); + DONE; +}) + +(define_expand "extwl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "extll" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "extql" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_insbl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + operands[1] = gen_lowpart (QImode, operands[1]); + emit_insn (gen_insbl (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_inswl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + operands[1] = gen_lowpart (HImode, operands[1]); + emit_insn (gen_inswl (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "builtin_insll" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + operands[1] = gen_lowpart (SImode, operands[1]); + emit_insn (gen_insll (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "inswh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "inslh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "insqh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "mskbl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = GEN_INT (0xff); + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskwl" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = GEN_INT (0xffff); + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskll" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = immed_double_const (0xffffffff, 0, DImode); + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskql" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + rtx mask = constm1_rtx; + emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2])); + DONE; +}) + +(define_expand "mskwh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2])); + DONE; +}) + +(define_expand "msklh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2])); + DONE; +}) + +(define_expand "mskqh" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_8bit_operand")] + "" +{ + emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2])); + DONE; +}) + +(define_expand "builtin_zap" + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(match_operand:DI 2 "reg_or_cint_operand")] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx mask = alpha_expand_zap_mask (INTVAL (operands[2])); + + if (mask == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (mask == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_anddi3 (operands[0], operands[1], mask)); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_insn "*builtin_zap_1" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") + (and:DI (unspec:DI + [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))] + "" + "@ + # + # + bis $31,$31,%0 + zap %r1,%2,%0" + [(set_attr "type" "shift,shift,ilog,shift")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(match_operand:QI 2 "const_int_operand")] + UNSPEC_ZAP) + (match_operand:DI 1 "const_int_operand")))] + "" + [(const_int 0)] +{ + rtx mask = alpha_expand_zap_mask (INTVAL (operands[2])); + if (HOST_BITS_PER_WIDE_INT >= 64 || CONST_INT_P (mask)) + operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode); + else + { + HOST_WIDE_INT c_lo = INTVAL (operands[1]); + HOST_WIDE_INT c_hi = (c_lo < 0 ? -1 : 0); + operands[1] = immed_double_const (c_lo & CONST_DOUBLE_LOW (mask), + c_hi & CONST_DOUBLE_HIGH (mask), + DImode); + } + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(match_operand:QI 2 "const_int_operand")] + UNSPEC_ZAP) + (match_operand:DI 1 "register_operand")))] + "" + [(set (match_dup 0) + (and:DI (match_dup 1) (match_dup 2)))] +{ + operands[2] = alpha_expand_zap_mask (INTVAL (operands[2])); + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (operands[2] == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } +}) + +(define_expand "builtin_zapnot" + [(set (match_operand:DI 0 "register_operand") + (and:DI (unspec:DI + [(not:QI (match_operand:DI 2 "reg_or_cint_operand"))] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_cint_operand")))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx mask = alpha_expand_zap_mask (~ INTVAL (operands[2])); + + if (mask == const0_rtx) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (mask == constm1_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_anddi3 (operands[0], operands[1], mask)); + DONE; + } + + operands[1] = force_reg (DImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_insn "*builtin_zapnot_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (unspec:DI + [(not:QI (match_operand:QI 2 "register_operand" "r"))] + UNSPEC_ZAP) + (match_operand:DI 1 "reg_or_0_operand" "rJ")))] + "" + "zapnot %r1,%2,%0" + [(set_attr "type" "shift")]) + +(define_insn "builtin_amask" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")] + UNSPEC_AMASK))] + "" + "amask %1,%0" + [(set_attr "type" "ilog")]) + +(define_insn "builtin_implver" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_IMPLVER))] + "" + "implver %0" + [(set_attr "type" "ilog")]) + +(define_insn "builtin_rpcc" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))] + "" + "rpcc %0" + [(set_attr "type" "ilog")]) + +(define_expand "builtin_minub8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minsb8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minuw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_minsw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxub8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxsb8" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxuw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "builtin_maxsw4" + [(match_operand:DI 0 "register_operand") + (match_operand:DI 1 "reg_or_0_operand") + (match_operand:DI 2 "reg_or_0_operand")] + "TARGET_MAX" +{ + alpha_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_insn "builtin_perr" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ") + (match_operand:DI 2 "reg_or_8bit_operand" "rJ")] + UNSPEC_PERR))] + "TARGET_MAX" + "perr %r1,%r2,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_pklb" + [(set (match_operand:DI 0 "register_operand") + (vec_concat:V8QI + (vec_concat:V4QI + (truncate:V2QI (match_operand:DI 1 "register_operand")) + (match_dup 2)) + (match_dup 3)))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V8QImode, operands[0]); + operands[1] = gen_lowpart (V2SImode, operands[1]); + operands[2] = CONST0_RTX (V2QImode); + operands[3] = CONST0_RTX (V4QImode); +}) + +(define_insn "*pklb" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (vec_concat:V8QI + (vec_concat:V4QI + (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r")) + (match_operand:V2QI 2 "const0_operand")) + (match_operand:V4QI 3 "const0_operand")))] + "TARGET_MAX" + "pklb %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_pkwb" + [(set (match_operand:DI 0 "register_operand") + (vec_concat:V8QI + (truncate:V4QI (match_operand:DI 1 "register_operand")) + (match_dup 2)))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V8QImode, operands[0]); + operands[1] = gen_lowpart (V4HImode, operands[1]); + operands[2] = CONST0_RTX (V4QImode); +}) + +(define_insn "*pkwb" + [(set (match_operand:V8QI 0 "register_operand" "=r") + (vec_concat:V8QI + (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r")) + (match_operand:V4QI 2 "const0_operand")))] + "TARGET_MAX" + "pkwb %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_unpkbl" + [(set (match_operand:DI 0 "register_operand") + (zero_extend:V2SI + (vec_select:V2QI (match_operand:DI 1 "register_operand") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V2SImode, operands[0]); + operands[1] = gen_lowpart (V8QImode, operands[1]); +}) + +(define_insn "*unpkbl" + [(set (match_operand:V2SI 0 "register_operand" "=r") + (zero_extend:V2SI + (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (parallel [(const_int 0) (const_int 1)]))))] + "TARGET_MAX" + "unpkbl %r1,%0" + [(set_attr "type" "mvi")]) + +(define_expand "builtin_unpkbw" + [(set (match_operand:DI 0 "register_operand") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:DI 1 "register_operand") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_MAX" +{ + operands[0] = gen_lowpart (V4HImode, operands[0]); + operands[1] = gen_lowpart (V8QImode, operands[1]); +}) + +(define_insn "*unpkbw" + [(set (match_operand:V4HI 0 "register_operand" "=r") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW") + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3)]))))] + "TARGET_MAX" + "unpkbw %r1,%0" + [(set_attr "type" "mvi")]) + +(include "sync.md") + +;; The call patterns are at the end of the file because their +;; wildcard operand0 interferes with nice recognition. + +(define_insn "*call_value_osf_1_er_noreturn" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,%1\t\t!samegp + ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),%1\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn "*call_value_osf_1_er" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,(%1),0\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%* + bsr $26,%1\t\t!samegp + ldq $27,%1($29)\t\t!literal!%#\;jsr $26,($27),0\t\t!lituse_jsr!%#\;ldah $29,0($26)\t\t!gpdisp!%*\;lda $29,0($29)\t\t!gpdisp!%*" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +;; We must use peep2 instead of a split because we need accurate life +;; information for $gp. Consider the case of { bar(); while (1); }. +(define_peephole2 + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[1], Pmode) + && (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (match_dup 2))) + (use (reg:DI 29)) + (use (match_dup 1)) + (use (match_dup 4)) + (clobber (reg:DI 26))])] +{ + if (CONSTANT_P (operands[1])) + { + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx, + operands[1], operands[4])); + } + else + { + operands[3] = operands[1]; + operands[1] = const0_rtx; + operands[4] = const0_rtx; + } +}) + +(define_peephole2 + [(parallel [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))])] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF && reload_completed + && ! samegp_function_operand (operands[1], Pmode) + && ! (peep2_regno_dead_p (1, 29) + || find_reg_note (insn, REG_NORETURN, NULL_RTX))" + [(parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (match_dup 2))) + (set (match_dup 6) + (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (match_dup 5)) + (clobber (reg:DI 26))]) + (set (match_dup 6) + (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))] +{ + if (CONSTANT_P (operands[1])) + { + operands[3] = gen_rtx_REG (Pmode, 27); + operands[5] = GEN_INT (alpha_next_sequence_number++); + emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx, + operands[1], operands[5])); + } + else + { + operands[3] = operands[1]; + operands[1] = const0_rtx; + operands[5] = const0_rtx; + } + operands[4] = GEN_INT (alpha_next_sequence_number++); + operands[6] = pic_offset_table_rtx; +}) + +(define_insn "*call_value_osf_2_er_nogp" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "register_operand" "c")) + (match_operand 2))) + (use (reg:DI 29)) + (use (match_operand 3)) + (use (match_operand 4)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%1),%3%J4" + [(set_attr "type" "jsr")]) + +(define_insn "*call_value_osf_2_er" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "register_operand" "c")) + (match_operand 2))) + (set (reg:DI 29) + (unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand")] + UNSPEC_LDGP1)) + (use (match_operand 3)) + (use (match_operand 4)) + (clobber (reg:DI 26))] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "jsr $26,(%1),%3%J4\;ldah $29,0($26)\t\t!gpdisp!%5" + [(set_attr "type" "jsr") + (set_attr "cannot_copy" "true") + (set_attr "length" "8")]) + +(define_insn "*call_value_osf_1_noreturn" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF + && find_reg_note (insn, REG_NORETURN, NULL_RTX)" + "@ + jsr $26,($27),0 + bsr $26,$%1..ng + jsr $26,%1" + [(set_attr "type" "jsr") + (set_attr "length" "*,*,8")]) + +(define_insn_and_split "call_value_osf_tlsgd" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSGD_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "HAVE_AS_TLS" + "#" + "&& reload_completed" + [(set (match_dup 3) + (unspec:DI [(match_dup 5) + (match_dup 1) + (match_dup 2)] UNSPEC_LITERAL)) + (parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (const_int 0))) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (unspec [(match_dup 2)] UNSPEC_TLSGD_CALL)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] +{ + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "call_value_osf_tlsldm" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand")) + (const_int 0))) + (unspec [(match_operand:DI 2 "const_int_operand")] UNSPEC_TLSLDM_CALL) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "HAVE_AS_TLS" + "#" + "&& reload_completed" + [(set (match_dup 3) + (unspec:DI [(match_dup 5) + (match_dup 1) + (match_dup 2)] UNSPEC_LITERAL)) + (parallel [(set (match_dup 0) + (call (mem:DI (match_dup 3)) + (const_int 0))) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1)) + (use (match_dup 1)) + (use (unspec [(match_dup 2)] UNSPEC_TLSLDM_CALL)) + (clobber (reg:DI 26))]) + (set (match_dup 5) + (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))] +{ + operands[3] = gen_rtx_REG (Pmode, 27); + operands[4] = GEN_INT (alpha_next_sequence_number++); + operands[5] = pic_offset_table_rtx; +} + [(set_attr "type" "multi")]) + +(define_insn "*call_value_osf_1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s")) + (match_operand 2))) + (use (reg:DI 29)) + (clobber (reg:DI 26))] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + jsr $26,($27),0\;ldgp $29,0($26) + bsr $26,$%1..ng + jsr $26,%1\;ldgp $29,0($26)" + [(set_attr "type" "jsr") + (set_attr "length" "12,*,16")]) + +(define_insn "*sibcall_value_osf_1_er" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s")) + (match_operand 2))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,%1\t\t!samegp + ldq $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +(define_insn "*sibcall_value_osf_1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s")) + (match_operand 2))) + (unspec [(reg:DI 29)] UNSPEC_SIBCALL)] + "! TARGET_EXPLICIT_RELOCS && TARGET_ABI_OSF" + "@ + br $31,$%1..ng + lda $27,%1\;jmp $31,($27),%1" + [(set_attr "type" "jsr") + (set_attr "length" "*,8")]) + +; GAS relies on the order and position of instructions output below in order +; to generate relocs for VMS link to potentially optimize the call. +; Please do not molest. +(define_insn "*call_value_vms_1" + [(set (match_operand 0) + (call (mem:DI (match_operand:DI 1 "call_operand" "r,s")) + (match_operand 2))) + (use (match_operand:DI 3 "nonmemory_operand" "r,n")) + (use (reg:DI 25)) + (use (reg:DI 26)) + (clobber (reg:DI 27))] + "TARGET_ABI_OPEN_VMS" +{ + switch (which_alternative) + { + case 0: + return "mov %3,$27\;jsr $26,0\;ldq $27,0($29)"; + case 1: + operands [3] = alpha_use_linkage (operands [1], true, false); + operands [4] = alpha_use_linkage (operands [1], false, false); + return "ldq $26,%4\;ldq $27,%3\;jsr $26,%1\;ldq $27,0($29)"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "jsr") + (set_attr "length" "12,16")]) diff --git a/gcc-4.9/gcc/config/alpha/alpha.opt b/gcc-4.9/gcc/config/alpha/alpha.opt new file mode 100644 index 000000000..dc937ac66 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/alpha.opt @@ -0,0 +1,130 @@ +; Options for the DEC Alpha port of the compiler +; +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +msoft-float +Target Report Mask(SOFT_FP) +Do not use hardware fp + +mfp-regs +Target Report Mask(FPREGS) +Use fp registers + +mgas +Target Ignore +Does nothing. Preserved for backward compatibility. + +mieee-conformant +Target RejectNegative Mask(IEEE_CONFORMANT) +Request IEEE-conformant math library routines (OSF/1) + +mieee +Target Report RejectNegative Mask(IEEE) +Emit IEEE-conformant code, without inexact exceptions + +mieee-with-inexact +Target Report RejectNegative Mask(IEEE_WITH_INEXACT) + +mbuild-constants +Target Report Mask(BUILD_CONSTANTS) +Do not emit complex integer constants to read-only memory + +mfloat-vax +Target Report RejectNegative Mask(FLOAT_VAX) +Use VAX fp + +mfloat-ieee +Target Report RejectNegative InverseMask(FLOAT_VAX) +Do not use VAX fp + +mbwx +Target Report Mask(BWX) +Emit code for the byte/word ISA extension + +mmax +Target Report Mask(MAX) +Emit code for the motion video ISA extension + +mfix +Target Report Mask(FIX) +Emit code for the fp move and sqrt ISA extension + +mcix +Target Report Mask(CIX) +Emit code for the counting ISA extension + +mexplicit-relocs +Target Report Mask(EXPLICIT_RELOCS) +Emit code using explicit relocation directives + +msmall-data +Target Report RejectNegative Mask(SMALL_DATA) +Emit 16-bit relocations to the small data areas + +mlarge-data +Target Report RejectNegative InverseMask(SMALL_DATA) +Emit 32-bit relocations to the small data areas + +msmall-text +Target Report RejectNegative Mask(SMALL_TEXT) +Emit direct branches to local functions + +mlarge-text +Target Report RejectNegative InverseMask(SMALL_TEXT) +Emit indirect branches to local functions + +mtls-kernel +Target Report Mask(TLS_KERNEL) +Emit rdval instead of rduniq for thread pointer + +mlong-double-128 +Target Report RejectNegative Mask(LONG_DOUBLE_128) +Use 128-bit long double + +mlong-double-64 +Target Report RejectNegative InverseMask(LONG_DOUBLE_128) +Use 64-bit long double + +mcpu= +Target RejectNegative Joined Var(alpha_cpu_string) +Use features of and schedule given CPU + +mtune= +Target RejectNegative Joined Var(alpha_tune_string) +Schedule given CPU + +mfp-rounding-mode= +Target RejectNegative Joined Var(alpha_fprm_string) +Control the generated fp rounding mode + +mfp-trap-mode= +Target RejectNegative Joined Var(alpha_fptm_string) +Control the IEEE trap mode + +mtrap-precision= +Target RejectNegative Joined Var(alpha_tp_string) +Control the precision given to fp exceptions + +mmemory-latency= +Target RejectNegative Joined Var(alpha_mlat_string) +Tune expected memory latency + +mtls-size= +Target RejectNegative Joined UInteger Var(alpha_tls_size) Init(32) +Specify bit size of immediate TLS offsets diff --git a/gcc-4.9/gcc/config/alpha/constraints.md b/gcc-4.9/gcc/config/alpha/constraints.md new file mode 100644 index 000000000..e67c9a9a0 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/constraints.md @@ -0,0 +1,120 @@ +;; Constraint definitions for DEC Alpha. +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;; Unused letters: +;;; ABCDEF V YZ +;;; de ghijkl pq tu wxyz + +;; Integer register constraints. + +(define_register_constraint "a" "R24_REG" + "General register 24, input to division routine") + +(define_register_constraint "b" "R25_REG" + "General register 24, input to division routine") + +(define_register_constraint "c" "R27_REG" + "General register 27, function call address") + +(define_register_constraint "f" "TARGET_FPREGS ? FLOAT_REGS : NO_REGS" + "Any floating-point register") + +(define_register_constraint "v" "R0_REG" + "General register 0, function value return address") + +(define_memory_constraint "w" + "A memory whose address is only a register" + (match_operand 0 "mem_noofs_operand")) + +;; Integer constant constraints. +(define_constraint "I" + "An unsigned 8 bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 255)"))) + +(define_constraint "J" + "The constant zero" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "K" + "Signed 16-bit integer constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -32768, 32767)"))) + +(define_constraint "L" + "A shifted signed 16-bit constant appropriate for LDAH" + (and (match_code "const_int") + (match_test "(ival & 0xffff) == 0 + && (ival >> 31 == -1 || ival >> 31 == 0)"))) + +(define_constraint "M" + "A valid operand of a ZAP insn" + (and (match_code "const_int") + (match_test "zap_mask (ival) != 0"))) + +(define_constraint "N" + "A complemented unsigned 8-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (~ival, 0, 255)"))) + +(define_constraint "O" + "A negated unsigned 8-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (-ival, 0, 255)"))) + +(define_constraint "P" + "The constant 1, 2 or 3" + (and (match_code "const_int") + (match_test "ival == 1 || ival == 2 || ival == 3"))) + +(define_constraint "H" + "A valid operand of a ZAP insn, when building with 32-bit HOST_WIDE_INT" + (and (match_code "const_double") + (match_test "mode == VOIDmode && zap_mask (hval) && zap_mask (lval)"))) + +;; Floating-point constant constraints. +(define_constraint "G" + "The floating point zero constant" + (and (match_code "const_double") + (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT + && op == CONST0_RTX (mode)"))) + +;; "Extra" constraints. +(define_constraint "Q" + "@internal A normal_memory_operand" + (match_operand 0 "normal_memory_operand")) + +(define_constraint "R" + "@internal A direct_call_operand" + (match_operand:DI 0 "direct_call_operand")) + +(define_constraint "S" + "An unsigned 6-bit constant" + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 63)"))) + +(define_constraint "T" + "@internal A high-part symbol" + (match_code "high")) + +(define_constraint "W" + "A vector zero constant" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (mode)"))) diff --git a/gcc-4.9/gcc/config/alpha/driver-alpha.c b/gcc-4.9/gcc/config/alpha/driver-alpha.c new file mode 100644 index 000000000..1981d0e6d --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/driver-alpha.c @@ -0,0 +1,99 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by Arthur Loiret + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* Chip family type IDs, returned by implver instruction. */ +#define IMPLVER_EV4_FAMILY 0 /* LCA/EV4/EV45 */ +#define IMPLVER_EV5_FAMILY 1 /* EV5/EV56/PCA56 */ +#define IMPLVER_EV6_FAMILY 2 /* EV6 */ +#define IMPLVER_EV7_FAMILY 3 /* EV7 */ + +/* Bit defines for amask instruction. */ +#define AMASK_BWX 0x1 /* byte/word extension. */ +#define AMASK_FIX 0x2 /* sqrt and f <-> i conversions + extension. */ +#define AMASK_CIX 0x4 /* count extension. */ +#define AMASK_MVI 0x100 /* multimedia extension. */ +#define AMASK_PRECISE 0x200 /* Precise arithmetic traps. */ +#define AMASK_LOCKPFTCHOK 0x1000 /* Safe to prefetch lock cache + block. */ + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "cpu" or "tune" as argument depending on if -mcpu=native + or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-mcpu=ev6" on an Alpha 21264 for + -mcpu=native. If the routine can't detect a known processor, + the -mcpu or -mtune option is discarded. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + static const struct cpu_types { + long implver; + long amask; + const char *const cpu; + } cpu_types[] = { + { IMPLVER_EV7_FAMILY, AMASK_BWX|AMASK_MVI|AMASK_FIX|AMASK_CIX, "ev67" }, + { IMPLVER_EV6_FAMILY, AMASK_BWX|AMASK_MVI|AMASK_FIX|AMASK_CIX, "ev67" }, + { IMPLVER_EV6_FAMILY, AMASK_BWX|AMASK_MVI|AMASK_FIX, "ev6" }, + { IMPLVER_EV5_FAMILY, AMASK_BWX|AMASK_MVI, "pca56" }, + { IMPLVER_EV5_FAMILY, AMASK_BWX, "ev56" }, + { IMPLVER_EV5_FAMILY, 0, "ev5" }, + { IMPLVER_EV4_FAMILY, 0, "ev4" }, + { 0, 0, NULL } + }; + long implver; + long amask; + const char *cpu; + int i; + + if (argc < 1) + return NULL; + + if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune")) + return NULL; + + implver = __builtin_alpha_implver (); + amask = __builtin_alpha_amask (~0L); + cpu = NULL; + + for (i = 0; cpu_types[i].cpu != NULL; i++) + if (implver == cpu_types[i].implver + && (~amask & cpu_types[i].amask) == cpu_types[i].amask) + { + cpu = cpu_types[i].cpu; + break; + } + + if (cpu == NULL) + return NULL; + + return concat ("-m", argv[0], "=", cpu, NULL); +} diff --git a/gcc-4.9/gcc/config/alpha/elf.h b/gcc-4.9/gcc/config/alpha/elf.h new file mode 100644 index 000000000..5a6803aba --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/elf.h @@ -0,0 +1,168 @@ +/* Definitions of target machine for GNU compiler, for DEC Alpha w/ELF. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + Contributed by Richard Henderson (rth@tamu.edu). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef CC1_SPEC +#define CC1_SPEC "%{G*}" + +#undef ASM_SPEC +#define ASM_SPEC "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug}" + +/* Do not output a .file directive at the beginning of the input file. */ + +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", LOG); + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ +do { \ + if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value) \ + switch_to_section (sbss_section); \ + else \ + switch_to_section (bss_section); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1); \ +} while (0) + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. */ + +#undef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN); \ +} while (0) + +#undef BSS_SECTION_ASM_OP +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#undef SBSS_SECTION_ASM_OP +#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\"aw\"" +#undef SDATA_SECTION_ASM_OP +#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\"aw\"" + +/* This is how we tell the assembler that two symbols have the same value. */ + +#undef ASM_OUTPUT_DEF +#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME) \ + do { \ + assemble_name(FILE, ALIAS); \ + fputs(" = ", FILE); \ + assemble_name(FILE, NAME); \ + fputc('\n', FILE); \ + } while (0) + +#undef ASM_OUTPUT_DEF_FROM_DECLS +#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \ + do { \ + const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + const char *name = IDENTIFIER_POINTER (TARGET); \ + if (TREE_CODE (DECL) == FUNCTION_DECL) \ + { \ + fputc ('$', FILE); \ + assemble_name (FILE, alias); \ + fputs ("..ng = $", FILE); \ + assemble_name (FILE, name); \ + fputs ("..ng\n", FILE); \ + } \ + ASM_OUTPUT_DEF (FILE, alias, name); \ + } while (0) + +/* Provide a STARTFILE_SPEC appropriate for ELF. Here we add the + (even more) magical crtbegin.o file which provides part of the + support for getting C++ file-scope static object constructed + before entering `main'. */ + +#undef STARTFILE_SPEC +#ifdef HAVE_LD_PIE +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\ + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#else +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\ + crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}" +#endif + +/* Provide a ENDFILE_SPEC appropriate for ELF. Here we tack on the + magical crtend.o file which provides part of the support for + getting C++ file-scope static object constructed before entering + `main', followed by a normal ELF "finalizer" file, `crtn.o'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + +/* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. + + Since application size is already constrained to <2GB by the form of + the ldgp relocation, we can use a 32-bit pc-relative relocation to + static data. Dynamic data is accessed indirectly to allow for read + only EH sections. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) + +/* If defined, a C statement to be executed just prior to the output of + assembler code for INSN. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + (alpha_this_literal_sequence_number = 0, \ + alpha_this_gpdisp_sequence_number = 0) +extern int alpha_this_literal_sequence_number; +extern int alpha_this_gpdisp_sequence_number; + +/* Since the bits of the _init and _fini function is spread across + many object files, each potentially with its own GP, we must assume + we need to load our GP. Further, the .init/.fini section can + easily be more than 4MB away from the function to call so we can't + use bsr. */ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n" \ +" br $29,1f\n" \ +"1: ldgp $29,0($29)\n" \ +" unop\n" \ +" jsr $26," USER_LABEL_PREFIX #FUNC "\n" \ +" .align 3\n" \ +" .previous"); + +/* If we have the capability create headers for efficient EH lookup. + As of Jan 2002, only glibc 2.2.4 can actually make use of this, but + I imagine that other systems will catch up. In the meantime, it + doesn't harm to make sure that the data exists to be used later. */ +#if defined(HAVE_LD_EH_FRAME_HDR) +#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} " +#endif diff --git a/gcc-4.9/gcc/config/alpha/elf.opt b/gcc-4.9/gcc/config/alpha/elf.opt new file mode 100644 index 000000000..680379033 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/elf.opt @@ -0,0 +1,29 @@ +; Alpha ELF options. + +; Copyright (C) 2011-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +relax +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc-4.9/gcc/config/alpha/ev4.md b/gcc-4.9/gcc/config/alpha/ev4.md new file mode 100644 index 000000000..89d6c98e3 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/ev4.md @@ -0,0 +1,161 @@ +;; Scheduling description for Alpha EV4. +;; Copyright (C) 2002-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; On EV4 there are two classes of resources to consider: resources needed +; to issue, and resources needed to execute. IBUS[01] are in the first +; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second. +; (There are a few other register-like resources, but ...) + +(define_automaton "ev4_0,ev4_1,ev4_2") +(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0") +(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1") +(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2") +(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1") + +; Assume type "multi" single issues. +(define_insn_reservation "ev4_multi" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "multi")) + "ev4_ib0+ev4_ib1") + +; Loads from L0 completes in three cycles. adjust_cost still factors +; in user-specified memory latency, so return 1 here. +(define_insn_reservation "ev4_ld" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "ild,fld,ldsym,ld_l")) + "ev4_ib01+ev4_abox") + +; Stores can issue before the data (but not address) is ready. +(define_insn_reservation "ev4_ist" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "ist")) + "ev4_ib1+ev4_abox") + +; ??? Separate from ev4_ist because store_data_bypass_p can't handle +; the patterns with multiple sets, like store-conditional. +(define_insn_reservation "ev4_ist_c" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "st_c")) + "ev4_ib1+ev4_abox") + +(define_insn_reservation "ev4_fst" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "fst")) + "ev4_ib0+ev4_abox") + +; Memory barrier blocks ABOX insns until it's acknowledged by the external +; memory bus. This may be *quite* slow. Setting this to 4 cycles gets +; about all the benefit without making the DFA too large. +(define_insn_reservation "ev4_mb" 4 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "mb")) + "ev4_ib1+ev4_abox,ev4_abox*3") + +; Branches have no delay cost, but do tie up the unit for two cycles. +(define_insn_reservation "ev4_ibr" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "ibr,jsr")) + "ev4_ib1+ev4_bbox,ev4_bbox") + +(define_insn_reservation "ev4_callpal" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "callpal")) + "ev4_ib1+ev4_bbox,ev4_bbox") + +(define_insn_reservation "ev4_fbr" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "fbr")) + "ev4_ib0+ev4_bbox,ev4_bbox") + +; Arithmetic insns are normally have their results available after +; two cycles. There are a number of exceptions. + +(define_insn_reservation "ev4_iaddlog" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "iadd,ilog")) + "ev4_ib0+ev4_ebox") + +(define_bypass 1 + "ev4_iaddlog" + "ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi") + +(define_insn_reservation "ev4_shiftcm" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "shift,icmov")) + "ev4_ib0+ev4_ebox") + +(define_insn_reservation "ev4_icmp" 2 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "icmp")) + "ev4_ib0+ev4_ebox") + +(define_bypass 1 "ev4_icmp" "ev4_ibr") + +(define_bypass 0 + "ev4_iaddlog,ev4_shiftcm,ev4_icmp" + "ev4_ist" + "store_data_bypass_p") + +; Multiplies use a non-pipelined imul unit. Also, "no [ebox] insn can +; be issued exactly three cycles before an integer multiply completes". + +(define_insn_reservation "ev4_imulsi" 21 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "si"))) + "ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox") + +(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p") + +(define_insn_reservation "ev4_imuldi" 23 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "!si"))) + "ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox") + +(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p") + +; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in. +(define_insn_reservation "ev4_fpop" 6 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "fadd,fmul,fcpys,fcmov")) + "ev4_ib1+ev4_fbox") + +(define_bypass 4 "ev4_fpop" "ev4_fpop") + +; The floating point divider is not pipelined. Also, "no FPOP insn can be +; issued exactly five or exactly six cycles before an fdiv insn completes". + +(define_insn_reservation "ev4_fdivsf" 34 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + "ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox") + +(define_insn_reservation "ev4_fdivdf" 63 + (and (eq_attr "tune" "ev4") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + "ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox") + +; Traps don't consume or produce data. +(define_insn_reservation "ev4_misc" 1 + (and (eq_attr "tune" "ev4") + (eq_attr "type" "misc")) + "ev4_ib1") diff --git a/gcc-4.9/gcc/config/alpha/ev5.md b/gcc-4.9/gcc/config/alpha/ev5.md new file mode 100644 index 000000000..9d1871ea9 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/ev5.md @@ -0,0 +1,194 @@ +;; Scheduling description for Alpha EV5. +;; Copyright (C) 2002-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; EV5 has two asymmetric integer units, E0 and E1, plus separate +;; FP add and multiply units. + +(define_automaton "ev5_0,ev5_1") +(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0") +(define_reservation "ev5_e01" "ev5_e0|ev5_e1") +(define_reservation "ev5_fam" "ev5_fa|ev5_fm") +(define_cpu_unit "ev5_imul" "ev5_0") +(define_cpu_unit "ev5_fdiv" "ev5_1") + +; Assume type "multi" single issues. +(define_insn_reservation "ev5_multi" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "multi")) + "ev5_e0+ev5_e1+ev5_fa+ev5_fm") + +; Stores can only issue to E0, and may not issue with loads. +; Model this with some fake units. + +(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0") +(define_reservation "ev5_ld" "ev5_l0|ev5_l1") +(exclusion_set "ev5_l0,ev5_l1" "ev5_st") + +(define_insn_reservation "ev5_st" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ist,fst,st_c,mb")) + "ev5_e0+ev5_st") + +; Loads from L0 complete in two cycles. adjust_cost still factors +; in user-specified memory latency, so return 1 here. +(define_insn_reservation "ev5_ld" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ild,fld,ldsym")) + "ev5_e01+ev5_ld") + +(define_insn_reservation "ev5_ld_l" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ld_l")) + "ev5_e0+ev5_ld") + +; Integer branches slot only to E1. +(define_insn_reservation "ev5_ibr" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ibr")) + "ev5_e1") + +(define_insn_reservation "ev5_callpal" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "callpal")) + "ev5_e1") + +(define_insn_reservation "ev5_jsr" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "jsr")) + "ev5_e1") + +(define_insn_reservation "ev5_shift" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "shift")) + "ev5_e0") + +(define_insn_reservation "ev5_mvi" 2 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "mvi")) + "ev5_e0") + +(define_insn_reservation "ev5_cmov" 2 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "icmov")) + "ev5_e01") + +(define_insn_reservation "ev5_iadd" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "iadd")) + "ev5_e01") + +(define_insn_reservation "ev5_ilogcmp" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "ilog,icmp")) + "ev5_e01") + +; Conditional move and branch can issue the same cycle as the test. +(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p") + +; Multiplies use a non-pipelined imul unit. Also, "no insn can be issued +; to E0 exactly two cycles before an integer multiply completes". + +(define_insn_reservation "ev5_imull" 8 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "si"))) + "ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0") + +(define_insn_reservation "ev5_imulq" 12 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "di"))) + "ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0") + +(define_insn_reservation "ev5_imulh" 14 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "imul") + (eq_attr "opsize" "udi"))) + "ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0") + +; The multiplier is unable to receive data from Ebox bypass paths. The +; instruction issues at the expected time, but its latency is increased +; by the time it takes for the input data to become available to the +; multiplier. For example, an IMULL instruction issued one cycle later +; than an ADDL instruction, which produced one of its operands, has a +; latency of 10 (8 + 2). If the IMULL instruction is issued two cycles +; later than the ADDL instruction, the latency is 9 (8 + 1). +; +; Model this instead with increased latency on the input instruction. + +(define_bypass 3 + "ev5_ld,ev5_ld_l,ev5_shift,ev5_mvi,ev5_cmov,ev5_iadd,ev5_ilogcmp" + "ev5_imull,ev5_imulq,ev5_imulh") + +(define_bypass 9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh") +(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh") +(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh") + +; Similarly for the FPU we have two asymmetric units. + +(define_insn_reservation "ev5_fadd" 4 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fadd,fcmov")) + "ev5_fa") + +(define_insn_reservation "ev5_fbr" 1 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fbr")) + "ev5_fa") + +(define_insn_reservation "ev5_fcpys" 4 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fcpys")) + "ev5_fam") + +(define_insn_reservation "ev5_fmul" 4 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "fmul")) + "ev5_fm") + +; The floating point divider is not pipelined. Also, "no insn can be issued +; to FA exactly five before an fdiv insn completes". +; +; ??? Do not model this late reservation due to the enormously increased +; size of the resulting DFA. +; +; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces +; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate +; automata produces incorrect results for insns that can choose one or +; the other, i.e. ev5_fcpys. + +(define_insn_reservation "ev5_fdivsf" 15 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + ; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4" + "ev5_fa+ev5_fdiv,ev5_fdiv*14") + +(define_insn_reservation "ev5_fdivdf" 22 + (and (eq_attr "tune" "ev5") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + ; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4" + "ev5_fa+ev5_fdiv,ev5_fdiv*21") + +; Traps don't consume or produce data; rpcc is latency 2 if we ever add it. +(define_insn_reservation "ev5_misc" 2 + (and (eq_attr "tune" "ev5") + (eq_attr "type" "misc")) + "ev5_e0") diff --git a/gcc-4.9/gcc/config/alpha/ev6.md b/gcc-4.9/gcc/config/alpha/ev6.md new file mode 100644 index 000000000..e0612a411 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/ev6.md @@ -0,0 +1,181 @@ +;; Scheduling description for Alpha EV6. +;; Copyright (C) 2002-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; EV6 can issue 4 insns per clock. It's out-of-order, so this isn't +; expected to help over-much, but a precise description can be important +; for software pipelining. +; +; EV6 has two symmetric pairs ("clusters") of two asymmetric integer +; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1. +; +; ??? The clusters have independent register files that are re-synced +; every cycle. Thus there is one additional cycle of latency between +; insns issued on different clusters. Possibly model that by duplicating +; all EBOX insn_reservations that can issue to either cluster, increasing +; all latencies by one, and adding bypasses within the cluster. +; +; ??? In addition, instruction order affects cluster issue. + +(define_automaton "ev6_0,ev6_1") +(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0") +(define_reservation "ev6_u" "ev6_u0|ev6_u1") +(define_reservation "ev6_l" "ev6_l0|ev6_l1") +(define_reservation "ev6_ebox" "ev6_u|ev6_l") + +(define_cpu_unit "ev6_fa" "ev6_1") +(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0") +(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1") + +; Assume type "multi" single issues. +(define_insn_reservation "ev6_multi" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "multi")) + "ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1") + +; Integer loads take at least 3 clocks, and only issue to lower units. +; adjust_cost still factors in user-specified memory latency, so return 1 here. +(define_insn_reservation "ev6_ild" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ild,ldsym,ld_l")) + "ev6_l") + +(define_insn_reservation "ev6_ist" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ist,st_c")) + "ev6_l") + +(define_insn_reservation "ev6_mb" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "mb")) + "ev6_l1") + +; FP loads take at least 4 clocks. adjust_cost still factors +; in user-specified memory latency, so return 2 here. +(define_insn_reservation "ev6_fld" 2 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fld")) + "ev6_l") + +; The FPU communicates with memory and the integer register file +; via two fp store units. We need a slot in the fst immediately, and +; a slot in LOW after the operand data is ready. At which point the +; data may be moved either to the store queue or the integer register +; file and the insn retired. + +(define_insn_reservation "ev6_fst" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fst")) + "ev6_fst,nothing,ev6_l") + +; Arithmetic goes anywhere. +(define_insn_reservation "ev6_arith" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "iadd,ilog,icmp")) + "ev6_ebox") + +; Motion video insns also issue only to U0, and take three ticks. +(define_insn_reservation "ev6_mvi" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "mvi")) + "ev6_u0") + +; Shifts issue to upper units. +(define_insn_reservation "ev6_shift" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "shift")) + "ev6_u") + +; Multiplies issue only to U1, and all take 7 ticks. +(define_insn_reservation "ev6_imul" 7 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "imul")) + "ev6_u1") + +; Conditional moves decompose into two independent primitives, each taking +; one cycle. Since ev6 is out-of-order, we can't see anything but two cycles. +(define_insn_reservation "ev6_icmov" 2 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "icmov")) + "ev6_ebox,ev6_ebox") + +; Integer branches issue to upper units +(define_insn_reservation "ev6_ibr" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ibr,callpal")) + "ev6_u") + +; Calls only issue to L0. +(define_insn_reservation "ev6_jsr" 1 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "jsr")) + "ev6_l0") + +; Ftoi/itof only issue to lower pipes. +(define_insn_reservation "ev6_itof" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "itof")) + "ev6_l") + +(define_insn_reservation "ev6_ftoi" 3 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "ftoi")) + "ev6_fst,nothing,ev6_l") + +(define_insn_reservation "ev6_fmul" 4 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fmul")) + "ev6_fm") + +(define_insn_reservation "ev6_fadd" 4 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fadd,fcpys,fbr")) + "ev6_fa") + +(define_bypass 6 "ev6_fmul,ev6_fadd" "ev6_fst,ev6_ftoi") + +(define_insn_reservation "ev6_fcmov" 8 + (and (eq_attr "tune" "ev6") + (eq_attr "type" "fcmov")) + "ev6_fa,nothing*3,ev6_fa") + +(define_bypass 10 "ev6_fcmov" "ev6_fst,ev6_ftoi") + +(define_insn_reservation "ev6_fdivsf" 12 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "si"))) + "ev6_fa*9") + +(define_insn_reservation "ev6_fdivdf" 15 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fdiv") + (eq_attr "opsize" "di"))) + "ev6_fa*12") + +(define_insn_reservation "ev6_sqrtsf" 18 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "si"))) + "ev6_fa*15") + +(define_insn_reservation "ev6_sqrtdf" 33 + (and (eq_attr "tune" "ev6") + (and (eq_attr "type" "fsqrt") + (eq_attr "opsize" "di"))) + "ev6_fa*30") diff --git a/gcc-4.9/gcc/config/alpha/freebsd.h b/gcc-4.9/gcc/config/alpha/freebsd.h new file mode 100644 index 000000000..9e52d33e4 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/freebsd.h @@ -0,0 +1,68 @@ +/* Definitions for DEC Alpha/AXP running FreeBSD using the ELF format + Copyright (C) 2000-2014 Free Software Foundation, Inc. + Contributed by David E. O'Brien and BSDi. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ + { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER } + +/* Provide a CPP_SPEC appropriate for FreeBSD/alpha -- dealing with + the GCC option `-posix'. */ + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}" + +#define LINK_SPEC "%{G*} %{relax:-relax} \ + %{p:%nconsider using '-pg' instead of '-p' with gprof(1)} \ + %{assert*} %{R*} %{rpath*} %{defsym*} \ + %{shared:-Bshareable %{h*} %{soname*}} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(fbsd_dynamic_linker) } \ + %{static:-Bstatic}} \ + %{symbolic:-Bsymbolic}" + + +/************************[ Target stuff ]***********************************/ + +/* Define the actual types of some ANSI-mandated types. + Needs to agree with . GCC defaults come from c-decl.c, + c-common.c, and config//.h. */ + +/* alpha.h gets this wrong for FreeBSD. We use the GCC defaults instead. */ +#undef WCHAR_TYPE + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#define TARGET_ELF 1 + +#undef HAS_INIT_SECTION + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* Don't default to pcc-struct-return, we want to retain compatibility with + older FreeBSD releases AND pcc-struct-return may not be reentrant. */ + +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 0 diff --git a/gcc-4.9/gcc/config/alpha/linux-elf.h b/gcc-4.9/gcc/config/alpha/linux-elf.h new file mode 100644 index 000000000..bdefe237f --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/linux-elf.h @@ -0,0 +1,53 @@ +/* Definitions of target machine for GNU compiler + for Alpha Linux-based GNU systems using ELF. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + Contributed by Richard Henderson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ +{ "elf_dynamic_linker", ELF_DYNAMIC_LINKER }, + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" +#if DEFAULT_LIBC == LIBC_UCLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}" +#elif DEFAULT_LIBC == LIBC_GLIBC +#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}" +#else +#error "Unsupported DEFAULT_LIBC" +#endif +#define GNU_USER_DYNAMIC_LINKER \ + CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER) + +#define ELF_DYNAMIC_LINKER GNU_USER_DYNAMIC_LINKER + +#define LINK_SPEC "-m elf64alpha %{G*} %{relax:-relax} \ + %{O*:-O3} %{!O*:-O1} \ + %{shared:-shared} \ + %{!shared: \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker %(elf_dynamic_linker)} \ + %{static:-static}}" + +#undef LIB_SPEC +#define LIB_SPEC \ +"%{pthread:-lpthread} %{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} " + +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack diff --git a/gcc-4.9/gcc/config/alpha/linux.h b/gcc-4.9/gcc/config/alpha/linux.h new file mode 100644 index 000000000..966e9b2d0 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/linux.h @@ -0,0 +1,102 @@ +/* Definitions of target machine for GNU compiler, + for Alpha Linux-based GNU systems. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + Contributed by Richard Henderson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__gnu_linux__"); \ + builtin_define ("_LONGLONG"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + /* The GNU C++ standard library requires this. */ \ + if (c_dialect_cxx ()) \ + builtin_define ("_GNU_SOURCE"); \ + } while (0) + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared: %{profile:-lc_p}%{!profile:-lc}}" + +#undef CPP_SPEC +#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + +/* Don't care about faults in the prologue. */ +#undef TARGET_CAN_FAULT_IN_PROLOGUE +#define TARGET_CAN_FAULT_IN_PROLOGUE 1 + +/* OS fixes up EV5 data fault on prefetch. */ +#undef TARGET_FIXUP_EV5_PREFETCH +#define TARGET_FIXUP_EV5_PREFETCH 1 + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#ifdef SINGLE_LIBC +#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC) +#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC) +#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC) +#else +#define OPTION_GLIBC (linux_libc == LIBC_GLIBC) +#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC) +#define OPTION_BIONIC (linux_libc == LIBC_BIONIC) +#endif + +/* Determine what functions are present at the runtime; + this includes full c99 runtime and sincos. */ +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function + +#define TARGET_POSIX_IO + +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G}" + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +/* Define if long doubles should be mangled as 'g'. */ +#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING + +/* -mcpu=native handling only makes sense with compiler running on + an Alpha chip. */ +#if defined(__alpha__) || defined(__alpha) +extern const char *host_detect_local_cpu (int argc, const char **argv); +# define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, + +# define MCPU_MTUNE_NATIVE_SPECS \ + " %{mcpu=native:%. */ + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + } while (0) + + +/* NetBSD doesn't use the LANGUAGE* built-ins. */ +#undef SUBTARGET_LANGUAGE_CPP_BUILTINS +#define SUBTARGET_LANGUAGE_CPP_BUILTINS() /* nothing */ + + +/* Show that we need a GP when profiling. */ +#undef TARGET_PROFILING_NEEDS_GP +#define TARGET_PROFILING_NEEDS_GP 1 + + +/* Provide a CPP_SPEC appropriate for NetBSD/alpha. We use + this to pull in CPP specs that all NetBSD configurations need. */ + +#undef CPP_SPEC +#define CPP_SPEC NETBSD_CPP_SPEC + +#undef EXTRA_SPECS +#define EXTRA_SPECS \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, \ + { "netbsd_endfile_spec", NETBSD_ENDFILE_SPEC }, + + +/* Provide a LINK_SPEC appropriate for a NetBSD/alpha ELF target. */ + +#undef LINK_SPEC +#define LINK_SPEC \ + "%{G*} %{relax:-relax} \ + %{O*:-O3} %{!O*:-O1} \ + %(netbsd_link_spec)" + +#define NETBSD_ENTRY_POINT "__start" + + +/* Provide an ENDFILE_SPEC appropriate for NetBSD/alpha ELF. Here we + add crtend.o, which provides part of the support for getting + C++ file-scope static objects deconstructed after exiting "main". + + We also need to handle the GCC option `-ffast-math'. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \ + %(netbsd_endfile_spec)" + +#define HAVE_ENABLE_EXECUTE_STACK diff --git a/gcc-4.9/gcc/config/alpha/openbsd.h b/gcc-4.9/gcc/config/alpha/openbsd.h new file mode 100644 index 000000000..74f16e134 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/openbsd.h @@ -0,0 +1,45 @@ +/* Configuration file for an alpha OpenBSD target. + Copyright (C) 1999-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Controlling the compilation driver. */ + +/* run-time target specifications */ +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + OPENBSD_OS_CPP_BUILTINS_ELF(); \ + OPENBSD_OS_CPP_BUILTINS_LP64(); \ + } while (0) + +/* Layout of source language data types. */ + +/* This must agree with */ +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + + +#define LOCAL_LABEL_PREFIX "." diff --git a/gcc-4.9/gcc/config/alpha/predicates.md b/gcc-4.9/gcc/config/alpha/predicates.md new file mode 100644 index 000000000..c68e83a70 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/predicates.md @@ -0,0 +1,653 @@ +;; Predicate definitions for DEC Alpha. +;; Copyright (C) 2004-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Return 1 if OP is the zero constant for MODE. +(define_predicate "const0_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +;; Returns true if OP is either the constant zero or a register. +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const0_operand"))) + +;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or +;; any register. +(define_predicate "reg_or_6bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is an 8-bit constant. +(define_predicate "cint8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256"))) + +;; Return 1 if OP is an 8-bit constant or any register. +(define_predicate "reg_or_8bit_operand" + (if_then_else (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant or any register. +(define_predicate "reg_or_cint_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_int_operand"))) + +;; Return 1 if the operand is a valid second operand to an add insn. +(define_predicate "add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)") + (match_operand 0 "register_operand"))) + +;; Return 1 if the operand is a valid second operand to a +;; sign-extending add insn. +(define_predicate "sext_add_operand" + (if_then_else (match_code "const_int") + (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)") + (match_operand 0 "register_operand"))) + +;; Return 1 if the operand is a non-symbolic constant operand that +;; does not satisfy add_operand. +(define_predicate "non_add_const_operand" + (and (match_code "const_int,const_double,const_vector") + (not (match_operand 0 "add_operand")))) + +;; Return 1 if the operand is a non-symbolic, nonzero constant operand. +(define_predicate "non_zero_const_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op != CONST0_RTX (mode)"))) + +;; Return 1 if OP is the constant 4 or 8. +(define_predicate "const48_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 4 || INTVAL (op) == 8"))) + +;; Return 1 if OP is a valid first operand to an AND insn. +(define_predicate "and_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100 + || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100 + || zap_mask (INTVAL (op))") + (if_then_else (match_code "const_double") + (match_test "GET_MODE (op) == VOIDmode + && zap_mask (CONST_DOUBLE_LOW (op)) + && zap_mask (CONST_DOUBLE_HIGH (op))") + (match_operand 0 "register_operand")))) + +;; Return 1 if OP is a valid first operand to an IOR or XOR insn. +(define_predicate "or_operand" + (if_then_else (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100 + || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a constant that is the width, in bits, of an integral +;; mode not larger than DImode. +(define_predicate "mode_width_operand" + (match_code "const_int") +{ + HOST_WIDE_INT i = INTVAL (op); + return i == 8 || i == 16 || i == 32 || i == 64; +}) + +;; Return 1 if OP is a constant that is a mask of ones of width of an +;; integral machine mode not larger than DImode. +(define_predicate "mode_mask_operand" + (match_code "const_int,const_double") +{ + if (CONST_INT_P (op)) + { + HOST_WIDE_INT value = INTVAL (op); + + if (value == 0xff) + return 1; + if (value == 0xffff) + return 1; + if (value == 0xffffffff) + return 1; + if (value == -1) + return 1; + } + else if (HOST_BITS_PER_WIDE_INT == 32 && GET_CODE (op) == CONST_DOUBLE) + { + if (CONST_DOUBLE_LOW (op) == 0xffffffff && CONST_DOUBLE_HIGH (op) == 0) + return 1; + } + return 0; +}) + +;; Return 1 if OP is a multiple of 8 less than 64. +(define_predicate "mul8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT i = INTVAL (op); + return i < 64 && i % 8 == 0; +}) + +;; Return 1 if OP is a hard floating-point register. +(define_predicate "hard_fp_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS; +}) + +;; Return 1 if OP is a hard general register. +(define_predicate "hard_int_register_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS; +}) + +;; Return 1 if OP is something that can be reloaded into a register; +;; if it is a MEM, it need not be valid. +(define_predicate "some_operand" + (ior (match_code "reg,mem,const_int,const_double,const_vector, + label_ref,symbol_ref,const,high") + (and (match_code "subreg") + (match_test "some_operand (SUBREG_REG (op), VOIDmode)")))) + +;; Likewise, but don't accept constants. +(define_predicate "some_ni_operand" + (ior (match_code "reg,mem") + (and (match_code "subreg") + (match_test "some_ni_operand (SUBREG_REG (op), VOIDmode)")))) + +;; Return 1 if OP is a valid operand for the source of a move insn. +(define_predicate "input_operand" + (match_code "label_ref,symbol_ref,const,high,reg,subreg,mem, + const_double,const_vector,const_int") +{ + switch (GET_CODE (op)) + { + case LABEL_REF: + case SYMBOL_REF: + case CONST: + if (TARGET_EXPLICIT_RELOCS) + { + /* We don't split symbolic operands into something unintelligable + until after reload, but we do not wish non-small, non-global + symbolic operands to be reconstructed from their high/lo_sum + form. */ + return (small_symbolic_operand (op, mode) + || global_symbolic_operand (op, mode) + || gotdtp_symbolic_operand (op, mode) + || gottp_symbolic_operand (op, mode)); + } + /* VMS still has a 32-bit mode. */ + return mode == ptr_mode || mode == Pmode; + + case HIGH: + return (TARGET_EXPLICIT_RELOCS + && local_symbolic_operand (XEXP (op, 0), mode)); + + case REG: + return 1; + + case SUBREG: + if (register_operand (op, mode)) + return 1; + /* ... fall through ... */ + case MEM: + return ((TARGET_BWX || (mode != HImode && mode != QImode)) + && general_operand (op, mode)); + + case CONST_DOUBLE: + return op == CONST0_RTX (mode); + + case CONST_VECTOR: + if (reload_in_progress || reload_completed) + return alpha_legitimate_constant_p (mode, op); + return op == CONST0_RTX (mode); + + case CONST_INT: + if (mode == QImode || mode == HImode) + return true; + if (reload_in_progress || reload_completed) + return alpha_legitimate_constant_p (mode, op); + return add_operand (op, mode); + + default: + gcc_unreachable (); + } + return 0; +}) + +;; Return 1 if OP is a SYMBOL_REF for a function known to be in this +;; file, and in the same section as the current function. + +(define_predicate "samegp_function_operand" + (match_code "symbol_ref") +{ + /* Easy test for recursion. */ + if (op == XEXP (DECL_RTL (current_function_decl), 0)) + return true; + + /* Functions that are not local can be overridden, and thus may + not share the same gp. */ + if (! SYMBOL_REF_LOCAL_P (op)) + return false; + + /* If -msmall-data is in effect, assume that there is only one GP + for the module, and so any local symbol has this property. We + need explicit relocations to be able to enforce this for symbols + not defined in this unit of translation, however. */ + if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) + return true; + + /* Functions that are not external are defined in this UoT, + and thus must share the same gp. */ + return ! SYMBOL_REF_EXTERNAL_P (op); +}) + +;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr. +(define_predicate "direct_call_operand" + (match_operand 0 "samegp_function_operand") +{ + /* If profiling is implemented via linker tricks, we can't jump + to the nogp alternate entry point. Note that crtl->profile + would not be correct, since that doesn't indicate if the target + function uses profiling. */ + /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test, + but is approximately correct for the OSF ABIs. Don't know + what to do for VMS, NT, or UMK. */ + if (!TARGET_PROFILING_NEEDS_GP && profile_flag) + return false; + + /* Must be a function. In some cases folks create thunks in static + data structures and then make calls to them. If we allow the + direct call, we'll get an error from the linker about !samegp reloc + against a symbol without a .prologue directive. */ + if (!SYMBOL_REF_FUNCTION_P (op)) + return false; + + /* Must be "near" so that the branch is assumed to reach. With + -msmall-text, this is assumed true of all local symbols. Since + we've already checked samegp, locality is already assured. */ + if (TARGET_SMALL_TEXT) + return true; + + return false; +}) + +;; Return 1 if OP is a valid operand for the MEM of a CALL insn. +;; +;; For TARGET_ABI_OSF, we want to restrict to R27 or a pseudo. + +(define_predicate "call_operand" + (ior (match_code "symbol_ref") + (and (match_code "reg") + (ior (match_test "!TARGET_ABI_OSF") + (match_test "!HARD_REGISTER_P (op)") + (match_test "REGNO (op) == R27_REG"))))) + +;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing +;; a (non-tls) variable known to be defined in this file. +(define_predicate "local_symbolic_operand" + (match_code "label_ref,const,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) == LABEL_REF) + return 1; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + return (SYMBOL_REF_LOCAL_P (op) + && !SYMBOL_REF_WEAK (op) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Return true if OP is a SYMBOL_REF or CONST referencing a variable +;; known to be defined in this file in the small data area. +(define_predicate "small_symbolic_operand" + (match_code "const,symbol_ref") +{ + HOST_WIDE_INT ofs = 0, max_ofs = 0; + + if (! TARGET_SMALL_DATA) + return false; + + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + { + ofs = INTVAL (XEXP (XEXP (op, 0), 1)); + op = XEXP (XEXP (op, 0), 0); + } + + if (GET_CODE (op) != SYMBOL_REF) + return false; + + /* ??? There's no encode_section_info equivalent for the rtl + constant pool, so SYMBOL_FLAG_SMALL never gets set. */ + if (CONSTANT_POOL_ADDRESS_P (op)) + { + max_ofs = GET_MODE_SIZE (get_pool_mode (op)); + if (max_ofs > g_switch_value) + return false; + } + else if (SYMBOL_REF_LOCAL_P (op) + && SYMBOL_REF_SMALL_P (op) + && !SYMBOL_REF_WEAK (op) + && !SYMBOL_REF_TLS_MODEL (op)) + { + if (SYMBOL_REF_DECL (op)) + max_ofs = tree_to_uhwi (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op))); + } + else + return false; + + /* Given that we know that the GP is always 8 byte aligned, we can + always adjust by 7 without overflowing. */ + if (max_ofs < 8) + max_ofs = 8; + + /* Since we know this is an object in a small data section, we know the + entire section is addressable via GP. We don't know where the section + boundaries are, but we know the entire object is within. */ + return IN_RANGE (ofs, 0, max_ofs - 1); +}) + +;; Return true if OP is a SYMBOL_REF or CONST referencing a variable +;; not known (or known not) to be defined in this file. +(define_predicate "global_symbolic_operand" + (match_code "const,symbol_ref") +{ + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (op, 0), 1))) + op = XEXP (XEXP (op, 0), 0); + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op)) + && !SYMBOL_REF_TLS_MODEL (op)); +}) + +;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref, +;; possibly with an offset. +(define_predicate "symbolic_operand" + (ior (match_code "symbol_ref,label_ref") + (and (match_code "const") + (match_test "GET_CODE (XEXP (op,0)) == PLUS + && (GET_CODE (XEXP (XEXP (op,0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (op,0), 0)) == LABEL_REF) + && CONST_INT_P (XEXP (XEXP (op,0), 1))")))) + +;; Return true if OP is valid for 16-bit DTP relative relocations. +(define_predicate "dtp16_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 32-bit DTP relative relocations. +(define_predicate "dtp32_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 64-bit DTP relative relocations. +(define_predicate "gotdtp_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)"))) + +;; Return true if OP is valid for 16-bit TP relative relocations. +(define_predicate "tp16_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)"))) + +;; Return true if OP is valid for 32-bit TP relative relocations. +(define_predicate "tp32_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)"))) + +;; Return true if OP is valid for 64-bit TP relative relocations. +(define_predicate "gottp_symbolic_operand" + (and (match_code "const") + (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)"))) + +;; Return 1 if this memory address is a known aligned register plus +;; a constant. It must be a valid address. This means that we can do +;; this as an aligned reference plus some offset. +;; +;; Take into account what reload will do. Oh god this is awful. +;; The horrible comma-operator construct below is to prevent genrecog +;; from thinking that this predicate accepts REG and SUBREG. We don't +;; use recog during reload, so pretending these codes are accepted +;; pessimizes things a tad. + +(define_special_predicate "aligned_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (match_code "mem")) +{ + rtx base; + int offset; + + if (MEM_ALIGN (op) >= 32) + return 1; + + op = XEXP (op, 0); + + /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo) + sorts of constructs. Dig for the real base register. */ + if (reload_in_progress + && GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == PLUS) + { + base = XEXP (XEXP (op, 0), 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + if (! memory_address_p (mode, op)) + return 0; + if (GET_CODE (op) == PLUS) + { + base = XEXP (op, 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + base = op; + offset = 0; + } + } + + if (offset % GET_MODE_SIZE (mode)) + return 0; + + return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32); +}) + +;; Similar, but return 1 if OP is a MEM which is not alignable. + +(define_special_predicate "unaligned_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (match_code "mem")) +{ + rtx base; + int offset; + + if (MEM_ALIGN (op) >= 32) + return 0; + + op = XEXP (op, 0); + + /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo) + sorts of constructs. Dig for the real base register. */ + if (reload_in_progress + && GET_CODE (op) == PLUS + && GET_CODE (XEXP (op, 0)) == PLUS) + { + base = XEXP (XEXP (op, 0), 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + if (! memory_address_p (mode, op)) + return 0; + if (GET_CODE (op) == PLUS) + { + base = XEXP (op, 0); + offset = INTVAL (XEXP (op, 1)); + } + else + { + base = op; + offset = 0; + } + } + + if (offset % GET_MODE_SIZE (mode)) + return 1; + + return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32); +}) + +;; Return 1 if OP is any memory location. During reload a pseudo matches. +(define_special_predicate "any_memory_operand" + (match_code "mem,reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + if (MEM_P (op)) + return true; + if (reload_in_progress && REG_P (op)) + { + unsigned regno = REGNO (op); + if (HARD_REGISTER_NUM_P (regno)) + return false; + else + return reg_renumber[regno] < 0; + } + + return false; +}) + +;; Return 1 is OP is a memory location that is not a reference +;; (using an AND) to an unaligned location. Take into account +;; what reload will do. +(define_special_predicate "normal_memory_operand" + (ior (match_test "op = resolve_reload_operand (op), 0") + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) != AND")))) + +;; Returns 1 if OP is not an eliminable register. +;; +;; This exists to cure a pathological failure in the s8addq (et al) patterns, +;; +;; long foo () { long t; bar(); return (long) &t * 26107; } +;; +;; which run afoul of a hack in reload to cure a (presumably) similar +;; problem with lea-type instructions on other targets. But there is +;; one of us and many of them, so work around the problem by selectively +;; preventing combine from making the optimization. + +(define_predicate "reg_not_elim_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return op != frame_pointer_rtx && op != arg_pointer_rtx; +}) + +;; Accept a register, but not a subreg of any kind. This allows us to +;; avoid pathological cases in reload wrt data movement common in +;; int->fp conversion. */ +(define_predicate "reg_no_subreg_operand" + (and (match_code "reg") + (match_operand 0 "register_operand"))) + +;; Return 1 if OP is a valid Alpha comparison operator for "cbranch" +;; instructions. +(define_predicate "alpha_cbranch_operator" + (ior (match_operand 0 "ordered_comparison_operator") + (match_code "ordered,unordered"))) + +;; Return 1 if OP is a valid Alpha comparison operator for "cmp" style +;; instructions. +(define_predicate "alpha_comparison_operator" + (match_code "eq,le,lt,leu,ltu")) + +;; Similarly, but with swapped operands. +(define_predicate "alpha_swapped_comparison_operator" + (match_code "eq,ge,gt,gtu")) + +;; Return 1 if OP is a valid Alpha comparison operator against zero +;; for "bcc" style instructions. +(define_predicate "alpha_zero_comparison_operator" + (match_code "eq,ne,le,lt,leu,ltu")) + +;; Return 1 if OP is a signed comparison operation. +(define_predicate "signed_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt")) + +;; Return 1 if OP is a valid Alpha floating point comparison operator. +(define_predicate "alpha_fp_comparison_operator" + (match_code "eq,le,lt,unordered")) + +;; Return 1 if this is a divide or modulus operator. +(define_predicate "divmod_operator" + (match_code "div,mod,udiv,umod")) + +;; Return 1 if this is a float->int conversion operator. +(define_predicate "fix_operator" + (match_code "fix,unsigned_fix")) + +;; Recognize an addition operation that includes a constant. Used to +;; convince reload to canonize (plus (plus reg c1) c2) during register +;; elimination. + +(define_predicate "addition_operation" + (and (match_code "plus") + (match_test "register_operand (XEXP (op, 0), mode) + && satisfies_constraint_K (XEXP (op, 1))"))) + +;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a +;; small symbolic operand until after reload. At which point we need +;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref)) +;; so that sched2 has the proper dependency information. */ +(define_predicate "some_small_symbolic_operand" + (match_code "set,parallel,prefetch,unspec,unspec_volatile") +{ + /* Avoid search unless necessary. */ + if (!TARGET_EXPLICIT_RELOCS || !reload_completed) + return false; + return for_each_rtx (&op, some_small_symbolic_operand_int, NULL); +}) + +;; Accept a register, or a memory if BWX is enabled. +(define_predicate "reg_or_bwx_memory_operand" + (ior (match_operand 0 "register_operand") + (and (match_test "TARGET_BWX") + (match_operand 0 "memory_operand")))) + +;; Accept a memory whose address is only a register. +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) diff --git a/gcc-4.9/gcc/config/alpha/sync.md b/gcc-4.9/gcc/config/alpha/sync.md new file mode 100644 index 000000000..2145fdf2b --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/sync.md @@ -0,0 +1,343 @@ +;; GCC machine description for Alpha synchronization instructions. +;; Copyright (C) 2005-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_code_iterator FETCHOP [plus minus ior xor and]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) +(define_code_attr fetchop_pred + [(plus "add_operand") (minus "reg_or_8bit_operand") + (ior "or_operand") (xor "or_operand") (and "and_operand")]) +(define_code_attr fetchop_constr + [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "riNHM")]) + + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MB))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MB))] + "" + "mb" + [(set_attr "type" "mb")]) + +(define_insn "load_locked_" + [(set (match_operand:I48MODE 0 "register_operand" "=r") + (unspec_volatile:I48MODE + [(match_operand:I48MODE 1 "memory_operand" "m")] + UNSPECV_LL))] + "" + "ld_l %0,%1" + [(set_attr "type" "ld_l")]) + +(define_insn "store_conditional_" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC)) + (set (match_operand:I48MODE 1 "memory_operand" "=m") + (match_operand:I48MODE 2 "reg_or_0_operand" "0"))] + "" + "st_c %0,%1" + [(set_attr "type" "st_c")]) + +;; The Alpha Architecture Handbook says that it is UNPREDICTABLE whether +;; the lock is cleared by a normal load or store. This means we cannot +;; expand a ll/sc sequence before reload, lest a register spill is +;; inserted inside the sequence. It is also UNPREDICTABLE whether the +;; lock is cleared by a TAKEN branch. This means that we can not expand +;; a ll/sc sequence containing a branch (i.e. compare-and-swap) until after +;; the final basic-block reordering pass. + +(define_expand "atomic_compare_and_swap" + [(parallel + [(set (match_operand:DI 0 "register_operand") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 1 "register_operand") ;; val out + (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 2 "memory_operand") ;; memory + (unspec_volatile:I48MODE + [(match_dup 2) + (match_operand:I48MODE 3 "reg_or_8bit_operand") ;; expected + (match_operand:I48MODE 4 "add_operand") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand")] ;; fail model + UNSPECV_CMPXCHG))])] + "" +{ + if (mode == SImode) + { + operands[3] = convert_modes (DImode, SImode, operands[3], 0); + operands[4] = convert_modes (DImode, SImode, operands[4], 0); + } +}) + +(define_insn_and_split "*atomic_compare_and_swap" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 1 "register_operand" "=&r") ;; val out + (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:I48MODE 2 "memory_operand" "+m") ;; memory + (unspec_volatile:I48MODE + [(match_dup 2) + (match_operand:DI 3 "reg_or_8bit_operand" "rI") ;; expected + (match_operand:DI 4 "add_operand" "rKL") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand")] ;; fail model + UNSPECV_CMPXCHG))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_compare_and_swap (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:DI 0 "register_operand") ;; bool out + (match_operand:I12MODE 1 "register_operand") ;; val out + (match_operand:I12MODE 2 "mem_noofs_operand") ;; memory + (match_operand:I12MODE 3 "register_operand") ;; expected + (match_operand:I12MODE 4 "add_operand") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; succ model + (match_operand:SI 7 "const_int_operand")] ;; fail model + "" +{ + alpha_expand_compare_and_swap_12 (operands); + DONE; +}) + +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; bool out + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG)) + (set (match_operand:DI 1 "register_operand" "=&r") ;; val out + (zero_extend:DI + (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG))) + (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w") ;; memory + (unspec_volatile:I12MODE + [(match_dup 2) + (match_operand:DI 3 "reg_or_8bit_operand" "rI") ;; expected + (match_operand:DI 4 "reg_or_0_operand" "rJ") ;; desired + (match_operand:DI 5 "register_operand" "r") ;; align + (match_operand:SI 6 "const_int_operand") ;; is_weak + (match_operand:SI 7 "const_int_operand") ;; succ model + (match_operand:SI 8 "const_int_operand")] ;; fail model + UNSPECV_CMPXCHG)) + (clobber (match_scratch:DI 9 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_compare_and_swap_12 (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_exchange" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") ;; output + (match_operand:I48MODE 1 "memory_operand" "+m")) ;; memory + (set (match_dup 1) + (unspec:I48MODE + [(match_operand:I48MODE 2 "add_operand" "rKL") ;; input + (match_operand:SI 3 "const_int_operand")] ;; model + UNSPEC_XCHG)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_exchange (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_expand "atomic_exchange" + [(match_operand:I12MODE 0 "register_operand") ;; output + (match_operand:I12MODE 1 "mem_noofs_operand") ;; memory + (match_operand:I12MODE 2 "reg_or_0_operand") ;; input + (match_operand:SI 3 "const_int_operand")] ;; model + "" +{ + alpha_expand_atomic_exchange_12 (operands); + DONE; +}) + +(define_insn_and_split "atomic_exchange_1" + [(set (match_operand:DI 0 "register_operand" "=&r") ;; output + (zero_extend:DI + (match_operand:I12MODE 1 "mem_noofs_operand" "+w"))) ;; memory + (set (match_dup 1) + (unspec:I12MODE + [(match_operand:DI 2 "reg_or_8bit_operand" "rI") ;; input + (match_operand:DI 3 "register_operand" "r") ;; align + (match_operand:SI 4 "const_int_operand")] ;; model + UNSPEC_XCHG)) + (clobber (match_scratch:DI 5 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_exchange_12 (operands); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_" + [(set (match_operand:I48MODE 0 "memory_operand" "+m") + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 0) + (match_operand:I48MODE 1 "" "")) + (match_operand:SI 2 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (, operands[0], operands[1], + NULL, NULL, operands[3], + (enum memmodel) INTVAL (operands[2])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_nand" + [(set (match_operand:I48MODE 0 "memory_operand" "+m") + (unspec:I48MODE + [(not:I48MODE + (and:I48MODE (match_dup 0) + (match_operand:I48MODE 1 "register_operand" "r"))) + (match_operand:SI 2 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 3 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (NOT, operands[0], operands[1], + NULL, NULL, operands[3], + (enum memmodel) INTVAL (operands[2])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_fetch_" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 1) + (match_operand:I48MODE 2 "" "")) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (, operands[1], operands[2], + operands[0], NULL, operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_fetch_nand" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (match_operand:I48MODE 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:I48MODE + [(not:I48MODE + (and:I48MODE (match_dup 1) + (match_operand:I48MODE 2 "register_operand" "r"))) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (NOT, operands[1], operands[2], + operands[0], NULL, operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic__fetch" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (FETCHOP:I48MODE + (match_operand:I48MODE 1 "memory_operand" "+m") + (match_operand:I48MODE 2 "" ""))) + (set (match_dup 1) + (unspec:I48MODE + [(FETCHOP:I48MODE (match_dup 1) (match_dup 2)) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (, operands[1], operands[2], + NULL, operands[0], operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) + +(define_insn_and_split "atomic_nand_fetch" + [(set (match_operand:I48MODE 0 "register_operand" "=&r") + (not:I48MODE + (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m") + (match_operand:I48MODE 2 "register_operand" "r")))) + (set (match_dup 1) + (unspec:I48MODE + [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2))) + (match_operand:SI 3 "const_int_operand")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:I48MODE 4 "=&r"))] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + alpha_split_atomic_op (NOT, operands[1], operands[2], + NULL, operands[0], operands[4], + (enum memmodel) INTVAL (operands[3])); + DONE; +} + [(set_attr "type" "multi")]) diff --git a/gcc-4.9/gcc/config/alpha/t-linux b/gcc-4.9/gcc/config/alpha/t-linux new file mode 100644 index 000000000..1b4a26f74 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/t-linux @@ -0,0 +1 @@ +MULTIARCH_DIRNAME = $(call if_multiarch,alpha-linux-gnu) diff --git a/gcc-4.9/gcc/config/alpha/t-vms b/gcc-4.9/gcc/config/alpha/t-vms new file mode 100644 index 000000000..12a702125 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/t-vms @@ -0,0 +1,21 @@ +# Copyright (C) 1996-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = mcpu=ev6 +MULTILIB_DIRNAMES = ev6 +MULTILIB_OSDIRNAMES = ev6 diff --git a/gcc-4.9/gcc/config/alpha/vms.h b/gcc-4.9/gcc/config/alpha/vms.h new file mode 100644 index 000000000..b2977784b --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/vms.h @@ -0,0 +1,306 @@ +/* Output variables, constants and external declarations, for GNU compiler. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Alpha/VMS object format is not really Elf, but this makes compiling + crtstuff.c and dealing with shared library initialization much easier. */ +#define OBJECT_FORMAT_ELF + +/* Do not use TM clone registry as it currently doesn't work. Alpha/VMS + object is too far from ELF for supporting TM out of the box. */ +#define USE_TM_CLONE_REGISTRY 0 + +/* This enables certain macros in alpha.h, which will make an indirect + reference to an external symbol an invalid address. This needs to be + defined before we include alpha.h, since it determines which macros + are used for GO_IF_*. */ + +#define NO_EXTERNAL_INDIRECT_ADDRESS + +#define SUBTARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__ALPHA"); \ + if (TARGET_FLOAT_VAX) \ + builtin_define ("__G_FLOAT"); \ + else \ + builtin_define ("__IEEE_FLOAT"); \ + } while (0) + +#undef PCC_STATIC_STRUCT_RETURN + +#define MAX_OFILE_ALIGNMENT 524288 /* 8 x 2^16 by DEC Ada Test CD40VRA */ + +/* The maximum alignment 'malloc' honors. */ +#undef MALLOC_ABI_ALIGNMENT +#define MALLOC_ABI_ALIGNMENT \ + ((flag_vms_malloc64 && flag_vms_pointer_size != VMS_POINTER_SIZE_NONE \ + ? 16 : 8) * BITS_PER_UNIT) + +#undef FIXED_REGISTERS +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } + +#undef CALL_USED_REGISTERS +#define CALL_USED_REGISTERS \ + {1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + +/* List the order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. + + We allocate in the following order: + $f1 (nonsaved floating-point register) + $f10-$f15 (likewise) + $f22-$f30 (likewise) + $f21-$f16 (likewise, but input args) + $f0 (nonsaved, but return value) + $f2-$f9 (saved floating-point registers) + $1 (nonsaved integer registers) + $22-$25 (likewise) + $28 (likewise) + $0 (likewise, but return value) + $21-$16 (likewise, but input args) + $27 (procedure value in OSF, nonsaved in NT) + $2-$8 (saved integer registers) + $9-$14 (saved integer registers) + $26 (return PC) + $15 (frame pointer) + $29 (global pointer) + $30, $31, $f31 (stack pointer and always zero/ap & fp) */ + +#undef REG_ALLOC_ORDER +#define REG_ALLOC_ORDER \ + {33, \ + 42, 43, 44, 45, 46, 47, \ + 54, 55, 56, 57, 58, 59, 60, 61, 62, \ + 53, 52, 51, 50, 49, 48, \ + 32, \ + 34, 35, 36, 37, 38, 39, 40, 41, \ + 1, \ + 22, 23, 24, 25, \ + 28, \ + 0, \ + 21, 20, 19, 18, 17, 16, \ + 27, \ + 2, 3, 4, 5, 6, 7, 8, \ + 9, 10, 11, 12, 13, 14, \ + 26, \ + 15, \ + 29, \ + 30, 31, 63 } + +#undef HARD_FRAME_POINTER_REGNUM +#define HARD_FRAME_POINTER_REGNUM 29 + +/* Define registers used by the epilogue and return instruction. */ +#undef EPILOGUE_USES +#define EPILOGUE_USES(REGNO) ((REGNO) == 26 || (REGNO) == 29) + +#undef INITIAL_ELIMINATION_OFFSET +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = alpha_vms_initial_elimination_offset(FROM, TO)) + + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. + + On Alpha/VMS, this is a structure that contains the number of + arguments and, for each argument, the datatype of that argument. + + The number of arguments is a number of words of arguments scanned so far. + Thus 6 or more means all following args should go on the stack. */ + +enum avms_arg_type {I64, FF, FD, FG, FS, FT}; +typedef struct {int num_args; enum avms_arg_type atypes[6];} avms_arg_info; + +#undef CUMULATIVE_ARGS +#define CUMULATIVE_ARGS avms_arg_info + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ + +#undef INIT_CUMULATIVE_ARGS +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM).num_args = 0; \ + (CUM).atypes[0] = (CUM).atypes[1] = (CUM).atypes[2] = I64; \ + (CUM).atypes[3] = (CUM).atypes[4] = (CUM).atypes[5] = I64; + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Even though pointers are 64bits, only 32bit ever remain significant in code + addresses. */ +#define MASK_RETURN_ADDR \ + (flag_vms_pointer_size == VMS_POINTER_SIZE_NONE \ + ? constm1_rtx \ + : GEN_INT (0xffffffff)) + +#undef ASM_WEAKEN_LABEL +#define ASM_WEAKEN_LABEL(FILE, NAME) \ + do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \ + fputc ('\n', FILE); } while (0) + +#define READONLY_DATA_SECTION_ASM_OP "\t.rdata" +#define CTORS_SECTION_ASM_OP "\t.ctors" +#define DTORS_SECTION_ASM_OP "\t.dtors" +#define SDATA_SECTION_ASM_OP "\t.sdata" +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t.long " #FUNC"\n"); + +#undef ASM_OUTPUT_ADDR_DIFF_ELT + +#undef ASM_OUTPUT_ADDR_VEC_ELT +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + fprintf (FILE, "\t.quad $L%d\n", (VALUE)) + +#undef CASE_VECTOR_MODE +#define CASE_VECTOR_MODE DImode +#undef CASE_VECTOR_PC_RELATIVE + +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN) \ +{ ASM_OUTPUT_ALIGN (FILE, 3); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); } + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. */ + +#define COMMON_ASM_OP "\t.comm\t" + +#undef ASM_OUTPUT_ALIGNED_DECL_COMMON +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + vms_output_aligned_decl_common (FILE, DECL, NAME, SIZE, ALIGN) + +/* Control how constructors and destructors are emitted. */ +#define TARGET_ASM_CONSTRUCTOR vms_asm_out_constructor +#define TARGET_ASM_DESTRUCTOR vms_asm_out_destructor + +#define DWARF2_DEBUGGING_INFO 1 +#define VMS_DEBUGGING_INFO 1 + +#define DWARF2_UNWIND_INFO 1 + +#undef EH_RETURN_HANDLER_RTX +#define EH_RETURN_HANDLER_RTX \ + gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx, 8)) + +#define LINK_EH_SPEC "vms-dwarf2eh.o%s " +#define LINK_GCC_C_SEQUENCE_SPEC "%G" + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + fprintf (FILE, "\t.align %d\n", LOG); + +/* This is how to advance the location counter by SIZE bytes. */ + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE)) + +/* This says how to output an assembler line + to define a global common symbol. */ + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE,ROUNDED) \ +( fputs ("\t.lcomm ", (FILE)), \ + assemble_name ((FILE), (NAME)), \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))) + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION vms_asm_named_section + +#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \ + do \ + { \ + fprintf ((FILE), "\t"); \ + assemble_name (FILE, LABEL1); \ + fprintf (FILE, " = "); \ + assemble_name (FILE, LABEL2); \ + fprintf (FILE, "\n"); \ + } \ + while (0) + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE VMS_AND_DWARF2_DEBUG + +#define ASM_PN_FORMAT "%s___%lu" + +/* ??? VMS uses different linkage. */ +#undef TARGET_ASM_OUTPUT_MI_THUNK + +#undef ASM_SPEC +#undef ASM_FINAL_SPEC + +/* The VMS convention is to always provide minimal debug info + for a traceback unless specifically overridden. + + Because ASM_OUTPUT_ADDR_DIFF_ELT is not defined for alpha-vms, + jump tables cannot be output for PIC code, because you can't put + an absolute address in a readonly section. Putting the table in + a writable section is a security hole. Therefore, we unset the + flag_jump_tables flag, forcing switch statements to be expanded + using decision trees. There are probably other ways to address + this issue, but using a decision tree is clearly safe. */ + +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS \ +do { \ + if (write_symbols == NO_DEBUG \ + && debug_info_level == DINFO_LEVEL_NONE) \ + { \ + write_symbols = VMS_DEBUG; \ + debug_info_level = DINFO_LEVEL_TERSE; \ + } \ + if (flag_pic) \ + flag_jump_tables = 0; \ +} while (0) + +#undef LINK_SPEC +#if HAVE_GNU_LD +/* GNU-ld built-in linker script already handles the dwarf2 debug sections. */ +#define LINK_SPEC "%{shared} %{v}" +#else +/* Link with vms-dwarf2.o if -g (except -g0). This causes the + VMS link to pull all the dwarf2 debug sections together. */ +#define LINK_SPEC "%{g0} %{g*:-g vms-dwarf2.o%s} %{shared} %{v} %{map}" +#endif + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{!shared:crt0.o%s crtbegin.o%s} \ + %{!static:%{shared:crtbeginS.o%s}}" + +#define ENDFILE_SPEC "%{!shared:crtend.o%s} %{!static:%{shared:crtendS.o%s}}" + +#define INIT_SECTION_ASM_OP "\t.section LIB$INITIALIZE,GBL,NOWRT" + +#define LONGLONG_STANDALONE 1 + +#undef TARGET_VALID_POINTER_MODE +#define TARGET_VALID_POINTER_MODE vms_valid_pointer_mode + +/* Default values for _CRTL_VER and _VMS_VER. */ +#define VMS_DEFAULT_CRTL_VER 70320000 +#define VMS_DEFAULT_VMS_VER 70320000 diff --git a/gcc-4.9/gcc/config/alpha/x-alpha b/gcc-4.9/gcc/config/alpha/x-alpha new file mode 100644 index 000000000..2b22e5e32 --- /dev/null +++ b/gcc-4.9/gcc/config/alpha/x-alpha @@ -0,0 +1,3 @@ +driver-alpha.o: $(srcdir)/config/alpha/driver-alpha.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc-4.9/gcc/config/arc/arc-modes.def b/gcc-4.9/gcc/config/arc/arc-modes.def new file mode 100644 index 000000000..f279e3c72 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc-modes.def @@ -0,0 +1,37 @@ +/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + Contributor: Joern Rennecke + on behalf of Synopsys Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Some insns set all condition code flags, some only set the ZNC flags, and + some only set the ZN flags. */ + +CC_MODE (CC_ZN); +CC_MODE (CC_Z); +CC_MODE (CC_C); +CC_MODE (CC_FP_GT); +CC_MODE (CC_FP_GE); +CC_MODE (CC_FP_ORD); +CC_MODE (CC_FP_UNEQ); +CC_MODE (CC_FPX); + +/* Vector modes. */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ diff --git a/gcc-4.9/gcc/config/arc/arc-opts.h b/gcc-4.9/gcc/config/arc/arc-opts.h new file mode 100644 index 000000000..f2f1bc72b --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc-opts.h @@ -0,0 +1,28 @@ +/* GCC option-handling definitions for the Synopsys DesignWare ARC architecture. + + Copyright (C) 2007-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +enum processor_type +{ + PROCESSOR_NONE, + PROCESSOR_A5, + PROCESSOR_ARC600, + PROCESSOR_ARC601, + PROCESSOR_ARC700 +}; diff --git a/gcc-4.9/gcc/config/arc/arc-protos.h b/gcc-4.9/gcc/config/arc/arc-protos.h new file mode 100644 index 000000000..dd54fa8bd --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc-protos.h @@ -0,0 +1,118 @@ +/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu. + Copyright (C) 2000-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifdef RTX_CODE + +extern enum machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx); + +/* Define the function that build the compare insn for scc, bcc and mov*cc. */ +extern struct rtx_def *gen_compare_reg (rtx, enum machine_mode); + +/* Declarations for various fns used in the .md file. */ +extern void arc_output_function_epilogue (FILE *, HOST_WIDE_INT, int); +extern const char *output_shift (rtx *); +extern bool compact_sda_memory_operand (rtx op,enum machine_mode mode); +extern bool arc_double_limm_p (rtx); +extern void arc_print_operand (FILE *, rtx, int); +extern void arc_print_operand_address (FILE *, rtx); +extern void arc_final_prescan_insn (rtx, rtx *, int); +extern void arc_set_default_type_attributes(tree type); +extern const char *arc_output_libcall (const char *); +extern bool prepare_extend_operands (rtx *operands, enum rtx_code code, + enum machine_mode omode); +extern int arc_output_addsi (rtx *operands, bool, bool); +extern int arc_output_commutative_cond_exec (rtx *operands, bool); +extern bool arc_expand_movmem (rtx *operands); +extern bool prepare_move_operands (rtx *operands, enum machine_mode mode); +extern void emit_shift (enum rtx_code, rtx, rtx, rtx); +#endif /* RTX_CODE */ + +#ifdef TREE_CODE +extern enum arc_function_type arc_compute_function_type (struct function *); +#endif /* TREE_CODE */ + + +extern void arc_init (void); +extern unsigned int arc_compute_frame_size (int); +extern bool arc_ccfsm_branch_deleted_p (void); +extern void arc_ccfsm_record_branch_deleted (void); + +extern rtx arc_legitimize_pic_address (rtx, rtx); +void arc_asm_output_aligned_decl_local (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); +extern rtx arc_return_addr_rtx (int , rtx); +extern bool check_if_valid_regno_const (rtx *, int); +extern bool check_if_valid_sleep_operand (rtx *, int); +extern bool arc_legitimate_constant_p (enum machine_mode, rtx); +extern bool arc_legitimate_pc_offset_p (rtx); +extern bool arc_legitimate_pic_addr_p (rtx); +extern void emit_pic_move (rtx *, enum machine_mode); +extern bool arc_raw_symbolic_reference_mentioned_p (rtx, bool); +extern bool arc_legitimate_pic_operand_p (rtx); +extern bool arc_is_longcall_p (rtx); +extern bool arc_is_shortcall_p (rtx); +extern bool arc_profile_call (rtx callee); +extern bool valid_brcc_with_delay_p (rtx *); +extern bool small_data_pattern (rtx , enum machine_mode); +extern rtx arc_rewrite_small_data (rtx); +extern bool arc_ccfsm_cond_exec_p (void); +struct secondary_reload_info; +extern int arc_register_move_cost (enum machine_mode, enum reg_class, + enum reg_class); +extern rtx disi_highpart (rtx); +extern int arc_adjust_insn_length (rtx, int, bool); +extern int arc_corereg_hazard (rtx, rtx); +extern int arc_hazard (rtx, rtx); +extern int arc_write_ext_corereg (rtx); +extern rtx gen_acc1 (void); +extern rtx gen_acc2 (void); +extern rtx gen_mlo (void); +extern rtx gen_mhi (void); +extern bool arc_branch_size_unknown_p (void); +struct arc_ccfsm; +extern void arc_ccfsm_record_condition (rtx, bool, rtx, struct arc_ccfsm *); +extern void arc_expand_prologue (void); +extern void arc_expand_epilogue (int); +extern void arc_init_expanders (void); +extern int arc_check_millicode (rtx op, int offset, int load_p); +extern int arc_get_unalign (void); +extern void arc_clear_unalign (void); +extern void arc_toggle_unalign (void); +extern void split_addsi (rtx *); +extern void split_subsi (rtx *); +extern void arc_pad_return (void); +extern rtx arc_split_move (rtx *); +extern int arc_verify_short (rtx insn, int unalign, int); +extern const char *arc_short_long (rtx insn, const char *, const char *); +extern rtx arc_regno_use_in (unsigned int, rtx); +extern int arc_attr_type (rtx); +extern bool arc_scheduling_not_expected (void); +extern bool arc_sets_cc_p (rtx insn); +extern int arc_label_align (rtx label); +extern bool arc_need_delay (rtx insn); +extern bool arc_text_label (rtx); +extern int arc_decl_pretend_args (tree decl); +extern bool arc_short_comparison_p (rtx, int); +extern bool arc_epilogue_uses (int regno); +/* insn-attrtab.c doesn't include reload.h, which declares regno_clobbered_p. */ +extern int regno_clobbered_p (unsigned int, rtx, enum machine_mode, int); +extern int arc_return_slot_offset (void); +extern bool arc_legitimize_reload_address (rtx *, enum machine_mode, int, int); diff --git a/gcc-4.9/gcc/config/arc/arc-simd.h b/gcc-4.9/gcc/config/arc/arc-simd.h new file mode 100644 index 000000000..768e35b95 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc-simd.h @@ -0,0 +1,186 @@ +/* Synopsys DesignWare ARC SIMD include file. + Copyright (C) 2007-2014 Free Software Foundation, Inc. + Written by Saurabh Verma (saurabh.verma@celunite.com) on behalf os Synopsys + Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +#ifndef _ARC_SIMD_H +#define _ARC_SIMD_H 1 + +#ifndef __ARC_SIMD__ +#error Use the "-msimd" flag to enable ARC SIMD support +#endif + +/* I0-I7 registers. */ +#define _IREG_I0 0 +#define _IREG_I1 1 +#define _IREG_I2 2 +#define _IREG_I3 3 +#define _IREG_I4 4 +#define _IREG_I5 5 +#define _IREG_I6 6 +#define _IREG_I7 7 + +/* DMA configuration registers. */ +#define _DMA_REG_DR0 0 +#define _DMA_SDM_SRC_ADR_REG _DMA_REG_DR0 +#define _DMA_SDM_DEST_ADR_REG _DMA_REG_DR0 + +#define _DMA_REG_DR1 1 +#define _DMA_SDM_STRIDE_REG _DMA_REG_DR1 + +#define _DMA_REG_DR2 2 +#define _DMA_BLK_REG _DMA_REG_DR2 + +#define _DMA_REG_DR3 3 +#define _DMA_LOC_REG _DMA_REG_DR3 + +#define _DMA_REG_DR4 4 +#define _DMA_SYS_SRC_ADR_REG _DMA_REG_DR4 +#define _DMA_SYS_DEST_ADR_REG _DMA_REG_DR4 + +#define _DMA_REG_DR5 5 +#define _DMA_SYS_STRIDE_REG _DMA_REG_DR5 + +#define _DMA_REG_DR6 6 +#define _DMA_CFG_REG _DMA_REG_DR6 + +#define _DMA_REG_DR7 7 +#define _DMA_FT_BASE_ADR_REG _DMA_REG_DR7 + +/* Predefined types used in vector instructions. */ +typedef int __v4si __attribute__((vector_size(16))); +typedef short __v8hi __attribute__((vector_size(16))); + +/* Synonyms */ +#define _vaddaw __builtin_arc_vaddaw +#define _vaddw __builtin_arc_vaddw +#define _vavb __builtin_arc_vavb +#define _vavrb __builtin_arc_vavrb +#define _vdifaw __builtin_arc_vdifaw +#define _vdifw __builtin_arc_vdifw +#define _vmaxaw __builtin_arc_vmaxaw +#define _vmaxw __builtin_arc_vmaxw +#define _vminaw __builtin_arc_vminaw +#define _vminw __builtin_arc_vminw +#define _vmulaw __builtin_arc_vmulaw +#define _vmulfaw __builtin_arc_vmulfaw +#define _vmulfw __builtin_arc_vmulfw +#define _vmulw __builtin_arc_vmulw +#define _vsubaw __builtin_arc_vsubaw +#define _vsubw __builtin_arc_vsubw +#define _vsummw __builtin_arc_vsummw +#define _vand __builtin_arc_vand +#define _vandaw __builtin_arc_vandaw +#define _vbic __builtin_arc_vbic +#define _vbicaw __builtin_arc_vbicaw +#define _vor __builtin_arc_vor +#define _vxor __builtin_arc_vxor +#define _vxoraw __builtin_arc_vxoraw +#define _veqw __builtin_arc_veqw +#define _vlew __builtin_arc_vlew +#define _vltw __builtin_arc_vltw +#define _vnew __builtin_arc_vnew +#define _vmr1aw __builtin_arc_vmr1aw +#define _vmr1w __builtin_arc_vmr1w +#define _vmr2aw __builtin_arc_vmr2aw +#define _vmr2w __builtin_arc_vmr2w +#define _vmr3aw __builtin_arc_vmr3aw +#define _vmr3w __builtin_arc_vmr3w +#define _vmr4aw __builtin_arc_vmr4aw +#define _vmr4w __builtin_arc_vmr4w +#define _vmr5aw __builtin_arc_vmr5aw +#define _vmr5w __builtin_arc_vmr5w +#define _vmr6aw __builtin_arc_vmr6aw +#define _vmr6w __builtin_arc_vmr6w +#define _vmr7aw __builtin_arc_vmr7aw +#define _vmr7w __builtin_arc_vmr7w +#define _vmrb __builtin_arc_vmrb +#define _vh264f __builtin_arc_vh264f +#define _vh264ft __builtin_arc_vh264ft +#define _vh264fw __builtin_arc_vh264fw +#define _vvc1f __builtin_arc_vvc1f +#define _vvc1ft __builtin_arc_vvc1ft +#define _vbaddw __builtin_arc_vbaddw +#define _vbmaxw __builtin_arc_vbmaxw +#define _vbminw __builtin_arc_vbminw +#define _vbmulaw __builtin_arc_vbmulaw +#define _vbmulfw __builtin_arc_vbmulfw +#define _vbmulw __builtin_arc_vbmulw +#define _vbrsubw __builtin_arc_vbrsubw +#define _vbsubw __builtin_arc_vbsubw +#define _vasrw __builtin_arc_vasrw +#define _vsr8 __builtin_arc_vsr8 +#define _vsr8aw __builtin_arc_vsr8aw +#define _vasrrwi __builtin_arc_vasrrwi +#define _vasrsrwi __builtin_arc_vasrsrwi +#define _vasrwi __builtin_arc_vasrwi +#define _vasrpwbi __builtin_arc_vasrpwbi +#define _vasrrpwbi __builtin_arc_vasrrpwbi +#define _vsr8awi __builtin_arc_vsr8awi +#define _vsr8i __builtin_arc_vsr8i +#define _vmvaw __builtin_arc_vmvaw +#define _vmvw __builtin_arc_vmvw +#define _vmvzw __builtin_arc_vmvzw +#define _vd6tapf __builtin_arc_vd6tapf +#define _vmovaw __builtin_arc_vmovaw +#define _vmovw __builtin_arc_vmovw +#define _vmovzw __builtin_arc_vmovzw +#define _vabsaw __builtin_arc_vabsaw +#define _vabsw __builtin_arc_vabsw +#define _vaddsuw __builtin_arc_vaddsuw +#define _vsignw __builtin_arc_vsignw +#define _vexch1 __builtin_arc_vexch1 +#define _vexch2 __builtin_arc_vexch2 +#define _vexch4 __builtin_arc_vexch4 +#define _vupbaw __builtin_arc_vupbaw +#define _vupbw __builtin_arc_vupbw +#define _vupsbaw __builtin_arc_vupsbaw +#define _vupsbw __builtin_arc_vupsbw +#define _vdirun __builtin_arc_vdirun +#define _vdorun __builtin_arc_vdorun +#define _vdiwr __builtin_arc_vdiwr +#define _vdowr __builtin_arc_vdowr +#define _vrec __builtin_arc_vrec +#define _vrun __builtin_arc_vrun +#define _vrecrun __builtin_arc_vrecrun +#define _vendrec __builtin_arc_vendrec +#define _vld32wh __builtin_arc_vld32wh +#define _vld32wl __builtin_arc_vld32wl +#define _vld64 __builtin_arc_vld64 +#define _vld32 __builtin_arc_vld32 +#define _vld64w __builtin_arc_vld64w +#define _vld128 __builtin_arc_vld128 +#define _vst128 __builtin_arc_vst128 +#define _vst64 __builtin_arc_vst64 +#define _vst16_n __builtin_arc_vst16_n +#define _vst32_n __builtin_arc_vst32_n +#define _vinti __builtin_arc_vinti + +/* Additional synonyms to ease programming. */ +#define _setup_dma_in_channel_reg _vdiwr +#define _setup_dma_out_channel_reg _vdowr + +#endif /* _ARC_SIMD_H */ diff --git a/gcc-4.9/gcc/config/arc/arc.c b/gcc-4.9/gcc/config/arc/arc.c new file mode 100644 index 000000000..0eaabede2 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc.c @@ -0,0 +1,9360 @@ +/* Subroutines used for code generation on the Synopsys DesignWare ARC cpu. + Copyright (C) 1994-2014 Free Software Foundation, Inc. + + Sources derived from work done by Sankhya Technologies (www.sankhya.com) on + behalf of Synopsys Inc. + + Position Independent Code support added,Code cleaned up, + Comments and Support For ARC700 instructions added by + Saurabh Verma (saurabh.verma@codito.com) + Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) + + Fixing ABI inconsistencies, optimizations for ARC600 / ARC700 pipelines, + profiling support added by Joern Rennecke + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "varasm.h" +#include "stor-layout.h" +#include "stringpool.h" +#include "calls.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "function.h" +#include "toplev.h" +#include "ggc.h" +#include "tm_p.h" +#include "target.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "expr.h" +#include "recog.h" +#include "debug.h" +#include "diagnostic.h" +#include "insn-codes.h" +#include "langhooks.h" +#include "optabs.h" +#include "tm-constrs.h" +#include "reload.h" /* For operands_match_p */ +#include "df.h" +#include "tree-pass.h" +#include "context.h" +#include "pass_manager.h" + +/* Which cpu we're compiling for (A5, ARC600, ARC601, ARC700). */ +static const char *arc_cpu_string = ""; + +/* ??? Loads can handle any constant, stores can only handle small ones. */ +/* OTOH, LIMMs cost extra, so their usefulness is limited. */ +#define RTX_OK_FOR_OFFSET_P(MODE, X) \ +(GET_CODE (X) == CONST_INT \ + && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \ + (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \ + ? 0 \ + : -(-GET_MODE_SIZE (MODE) | -4) >> 1))) + +#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX, STRICT) \ +(GET_CODE (X) == PLUS \ + && RTX_OK_FOR_BASE_P (XEXP (X, 0), (STRICT)) \ + && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1), (STRICT)) \ + && GET_MODE_SIZE ((MODE)) <= 4) \ + || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1)))) + +#define LEGITIMATE_SCALED_ADDRESS_P(MODE, X, STRICT) \ +(GET_CODE (X) == PLUS \ + && GET_CODE (XEXP (X, 0)) == MULT \ + && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0), (STRICT)) \ + && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \ + && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \ + || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \ + && (RTX_OK_FOR_BASE_P (XEXP (X, 1), (STRICT)) \ + || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1))))) + +#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \ + (GET_CODE (X) == PLUS \ + && (REG_P (XEXP ((X), 0)) && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM) \ + && ((GET_CODE (XEXP((X),1)) == SYMBOL_REF \ + && SYMBOL_REF_SMALL_P (XEXP ((X), 1))) \ + || (GET_CODE (XEXP ((X), 1)) == CONST \ + && GET_CODE (XEXP (XEXP ((X), 1), 0)) == PLUS \ + && GET_CODE (XEXP (XEXP (XEXP ((X), 1), 0), 0)) == SYMBOL_REF \ + && SYMBOL_REF_SMALL_P (XEXP (XEXP (XEXP ((X), 1), 0), 0)) \ + && GET_CODE (XEXP(XEXP (XEXP ((X), 1), 0), 1)) == CONST_INT))) + +/* Array of valid operand punctuation characters. */ +char arc_punct_chars[256]; + +/* State used by arc_ccfsm_advance to implement conditional execution. */ +struct GTY (()) arc_ccfsm +{ + int state; + int cc; + rtx cond; + rtx target_insn; + int target_label; +}; + +#define arc_ccfsm_current cfun->machine->ccfsm_current + +#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \ + ((STATE)->state == 1 || (STATE)->state == 2) + +/* Indicate we're conditionalizing insns now. */ +#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \ + ((STATE)->state += 2) + +#define ARC_CCFSM_COND_EXEC_P(STATE) \ + ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5 \ + || current_insn_predicate) + +/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE. */ +#define CCFSM_ISCOMPACT(INSN,STATE) \ + (ARC_CCFSM_COND_EXEC_P (STATE) \ + ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \ + || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \ + : get_attr_iscompact (INSN) != ISCOMPACT_FALSE) + +/* Likewise, but also consider that INSN might be in a delay slot of JUMP. */ +#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \ + ((ARC_CCFSM_COND_EXEC_P (STATE) \ + || (JUMP_P (JUMP) \ + && INSN_ANNULLED_BRANCH_P (JUMP) \ + && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \ + ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \ + || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \ + : get_attr_iscompact (INSN) != ISCOMPACT_FALSE) + +/* The maximum number of insns skipped which will be conditionalised if + possible. */ +/* When optimizing for speed: + Let p be the probability that the potentially skipped insns need to + be executed, pn the cost of a correctly predicted non-taken branch, + mt the cost of a mis/non-predicted taken branch, + mn mispredicted non-taken, pt correctly predicted taken ; + costs expressed in numbers of instructions like the ones considered + skipping. + Unfortunately we don't have a measure of predictability - this + is linked to probability only in that in the no-eviction-scenario + there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger + value that can be assumed *if* the distribution is perfectly random. + A predictability of 1 is perfectly plausible not matter what p is, + because the decision could be dependent on an invocation parameter + of the program. + For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn + For small p, we want MAX_INSNS_SKIPPED == pt + + When optimizing for size: + We want to skip insn unless we could use 16 opcodes for the + non-conditionalized insn to balance the branch length or more. + Performance can be tie-breaker. */ +/* If the potentially-skipped insns are likely to be executed, we'll + generally save one non-taken branch + o + this to be no less than the 1/p */ +#define MAX_INSNS_SKIPPED 3 + +/* The values of unspec's first field. */ +enum { + ARC_UNSPEC_PLT = 3, + ARC_UNSPEC_GOT, + ARC_UNSPEC_GOTOFF +} ; + + +enum arc_builtins { + ARC_BUILTIN_NOP = 2, + ARC_BUILTIN_NORM = 3, + ARC_BUILTIN_NORMW = 4, + ARC_BUILTIN_SWAP = 5, + ARC_BUILTIN_BRK = 6, + ARC_BUILTIN_DIVAW = 7, + ARC_BUILTIN_EX = 8, + ARC_BUILTIN_MUL64 = 9, + ARC_BUILTIN_MULU64 = 10, + ARC_BUILTIN_RTIE = 11, + ARC_BUILTIN_SYNC = 12, + ARC_BUILTIN_CORE_READ = 13, + ARC_BUILTIN_CORE_WRITE = 14, + ARC_BUILTIN_FLAG = 15, + ARC_BUILTIN_LR = 16, + ARC_BUILTIN_SR = 17, + ARC_BUILTIN_SLEEP = 18, + ARC_BUILTIN_SWI = 19, + ARC_BUILTIN_TRAP_S = 20, + ARC_BUILTIN_UNIMP_S = 21, + ARC_BUILTIN_ALIGNED = 22, + + /* Sentinel to mark start of simd builtins. */ + ARC_SIMD_BUILTIN_BEGIN = 1000, + + ARC_SIMD_BUILTIN_VADDAW = 1001, + ARC_SIMD_BUILTIN_VADDW = 1002, + ARC_SIMD_BUILTIN_VAVB = 1003, + ARC_SIMD_BUILTIN_VAVRB = 1004, + ARC_SIMD_BUILTIN_VDIFAW = 1005, + ARC_SIMD_BUILTIN_VDIFW = 1006, + ARC_SIMD_BUILTIN_VMAXAW = 1007, + ARC_SIMD_BUILTIN_VMAXW = 1008, + ARC_SIMD_BUILTIN_VMINAW = 1009, + ARC_SIMD_BUILTIN_VMINW = 1010, + ARC_SIMD_BUILTIN_VMULAW = 1011, + ARC_SIMD_BUILTIN_VMULFAW = 1012, + ARC_SIMD_BUILTIN_VMULFW = 1013, + ARC_SIMD_BUILTIN_VMULW = 1014, + ARC_SIMD_BUILTIN_VSUBAW = 1015, + ARC_SIMD_BUILTIN_VSUBW = 1016, + ARC_SIMD_BUILTIN_VSUMMW = 1017, + ARC_SIMD_BUILTIN_VAND = 1018, + ARC_SIMD_BUILTIN_VANDAW = 1019, + ARC_SIMD_BUILTIN_VBIC = 1020, + ARC_SIMD_BUILTIN_VBICAW = 1021, + ARC_SIMD_BUILTIN_VOR = 1022, + ARC_SIMD_BUILTIN_VXOR = 1023, + ARC_SIMD_BUILTIN_VXORAW = 1024, + ARC_SIMD_BUILTIN_VEQW = 1025, + ARC_SIMD_BUILTIN_VLEW = 1026, + ARC_SIMD_BUILTIN_VLTW = 1027, + ARC_SIMD_BUILTIN_VNEW = 1028, + ARC_SIMD_BUILTIN_VMR1AW = 1029, + ARC_SIMD_BUILTIN_VMR1W = 1030, + ARC_SIMD_BUILTIN_VMR2AW = 1031, + ARC_SIMD_BUILTIN_VMR2W = 1032, + ARC_SIMD_BUILTIN_VMR3AW = 1033, + ARC_SIMD_BUILTIN_VMR3W = 1034, + ARC_SIMD_BUILTIN_VMR4AW = 1035, + ARC_SIMD_BUILTIN_VMR4W = 1036, + ARC_SIMD_BUILTIN_VMR5AW = 1037, + ARC_SIMD_BUILTIN_VMR5W = 1038, + ARC_SIMD_BUILTIN_VMR6AW = 1039, + ARC_SIMD_BUILTIN_VMR6W = 1040, + ARC_SIMD_BUILTIN_VMR7AW = 1041, + ARC_SIMD_BUILTIN_VMR7W = 1042, + ARC_SIMD_BUILTIN_VMRB = 1043, + ARC_SIMD_BUILTIN_VH264F = 1044, + ARC_SIMD_BUILTIN_VH264FT = 1045, + ARC_SIMD_BUILTIN_VH264FW = 1046, + ARC_SIMD_BUILTIN_VVC1F = 1047, + ARC_SIMD_BUILTIN_VVC1FT = 1048, + + /* Va, Vb, rlimm instructions. */ + ARC_SIMD_BUILTIN_VBADDW = 1050, + ARC_SIMD_BUILTIN_VBMAXW = 1051, + ARC_SIMD_BUILTIN_VBMINW = 1052, + ARC_SIMD_BUILTIN_VBMULAW = 1053, + ARC_SIMD_BUILTIN_VBMULFW = 1054, + ARC_SIMD_BUILTIN_VBMULW = 1055, + ARC_SIMD_BUILTIN_VBRSUBW = 1056, + ARC_SIMD_BUILTIN_VBSUBW = 1057, + + /* Va, Vb, Ic instructions. */ + ARC_SIMD_BUILTIN_VASRW = 1060, + ARC_SIMD_BUILTIN_VSR8 = 1061, + ARC_SIMD_BUILTIN_VSR8AW = 1062, + + /* Va, Vb, u6 instructions. */ + ARC_SIMD_BUILTIN_VASRRWi = 1065, + ARC_SIMD_BUILTIN_VASRSRWi = 1066, + ARC_SIMD_BUILTIN_VASRWi = 1067, + ARC_SIMD_BUILTIN_VASRPWBi = 1068, + ARC_SIMD_BUILTIN_VASRRPWBi = 1069, + ARC_SIMD_BUILTIN_VSR8AWi = 1070, + ARC_SIMD_BUILTIN_VSR8i = 1071, + + /* Va, Vb, u8 (simm) instructions. */ + ARC_SIMD_BUILTIN_VMVAW = 1075, + ARC_SIMD_BUILTIN_VMVW = 1076, + ARC_SIMD_BUILTIN_VMVZW = 1077, + ARC_SIMD_BUILTIN_VD6TAPF = 1078, + + /* Va, rlimm, u8 (simm) instructions. */ + ARC_SIMD_BUILTIN_VMOVAW = 1080, + ARC_SIMD_BUILTIN_VMOVW = 1081, + ARC_SIMD_BUILTIN_VMOVZW = 1082, + + /* Va, Vb instructions. */ + ARC_SIMD_BUILTIN_VABSAW = 1085, + ARC_SIMD_BUILTIN_VABSW = 1086, + ARC_SIMD_BUILTIN_VADDSUW = 1087, + ARC_SIMD_BUILTIN_VSIGNW = 1088, + ARC_SIMD_BUILTIN_VEXCH1 = 1089, + ARC_SIMD_BUILTIN_VEXCH2 = 1090, + ARC_SIMD_BUILTIN_VEXCH4 = 1091, + ARC_SIMD_BUILTIN_VUPBAW = 1092, + ARC_SIMD_BUILTIN_VUPBW = 1093, + ARC_SIMD_BUILTIN_VUPSBAW = 1094, + ARC_SIMD_BUILTIN_VUPSBW = 1095, + + ARC_SIMD_BUILTIN_VDIRUN = 1100, + ARC_SIMD_BUILTIN_VDORUN = 1101, + ARC_SIMD_BUILTIN_VDIWR = 1102, + ARC_SIMD_BUILTIN_VDOWR = 1103, + + ARC_SIMD_BUILTIN_VREC = 1105, + ARC_SIMD_BUILTIN_VRUN = 1106, + ARC_SIMD_BUILTIN_VRECRUN = 1107, + ARC_SIMD_BUILTIN_VENDREC = 1108, + + ARC_SIMD_BUILTIN_VLD32WH = 1110, + ARC_SIMD_BUILTIN_VLD32WL = 1111, + ARC_SIMD_BUILTIN_VLD64 = 1112, + ARC_SIMD_BUILTIN_VLD32 = 1113, + ARC_SIMD_BUILTIN_VLD64W = 1114, + ARC_SIMD_BUILTIN_VLD128 = 1115, + ARC_SIMD_BUILTIN_VST128 = 1116, + ARC_SIMD_BUILTIN_VST64 = 1117, + + ARC_SIMD_BUILTIN_VST16_N = 1120, + ARC_SIMD_BUILTIN_VST32_N = 1121, + + ARC_SIMD_BUILTIN_VINTI = 1201, + + ARC_SIMD_BUILTIN_END +}; + +/* A nop is needed between a 4 byte insn that sets the condition codes and + a branch that uses them (the same isn't true for an 8 byte insn that sets + the condition codes). Set by arc_ccfsm_advance. Used by + arc_print_operand. */ + +static int get_arc_condition_code (rtx); + +static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *); + +/* Initialized arc_attribute_table to NULL since arc doesnot have any + machine specific supported attributes. */ +const struct attribute_spec arc_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "interrupt", 1, 1, true, false, false, arc_handle_interrupt_attribute, true }, + /* Function calls made to this symbol must be done indirectly, because + it may lie outside of the 21/25 bit addressing range of a normal function + call. */ + { "long_call", 0, 0, false, true, true, NULL, false }, + /* Whereas these functions are always known to reside within the 25 bit + addressing range of unconditionalized bl. */ + { "medium_call", 0, 0, false, true, true, NULL, false }, + /* And these functions are always known to reside within the 21 bit + addressing range of blcc. */ + { "short_call", 0, 0, false, true, true, NULL, false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; +static int arc_comp_type_attributes (const_tree, const_tree); +static void arc_file_start (void); +static void arc_internal_label (FILE *, const char *, unsigned long); +static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, + tree); +static int arc_address_cost (rtx, enum machine_mode, addr_space_t, bool); +static void arc_encode_section_info (tree decl, rtx rtl, int first); + +static void arc_init_builtins (void); +static rtx arc_expand_builtin (tree, rtx, rtx, enum machine_mode, int); + +static int branch_dest (rtx); + +static void arc_output_pic_addr_const (FILE *, rtx, int); +void emit_pic_move (rtx *, enum machine_mode); +bool arc_legitimate_pic_operand_p (rtx); +static bool arc_function_ok_for_sibcall (tree, tree); +static rtx arc_function_value (const_tree, const_tree, bool); +const char * output_shift (rtx *); +static void arc_reorg (void); +static bool arc_in_small_data_p (const_tree); + +static void arc_init_reg_tables (void); +static bool arc_return_in_memory (const_tree, const_tree); +static void arc_init_simd_builtins (void); +static bool arc_vector_mode_supported_p (enum machine_mode); + +static bool arc_can_use_doloop_p (double_int, double_int, unsigned int, bool); +static const char *arc_invalid_within_doloop (const_rtx); + +static void output_short_suffix (FILE *file); + +static bool arc_frame_pointer_required (void); + +/* Implements target hook vector_mode_supported_p. */ + +static bool +arc_vector_mode_supported_p (enum machine_mode mode) +{ + if (!TARGET_SIMD_SET) + return false; + + if ((mode == V4SImode) + || (mode == V8HImode)) + return true; + + return false; +} + + +/* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */ +static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED; +static rtx arc_delegitimize_address (rtx); +static bool arc_can_follow_jump (const_rtx follower, const_rtx followee); + +static rtx frame_insn (rtx); +static void arc_function_arg_advance (cumulative_args_t, enum machine_mode, + const_tree, bool); +static rtx arc_legitimize_address_0 (rtx, rtx, enum machine_mode mode); + +static void arc_finalize_pic (void); + +/* initialize the GCC target structure. */ +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START arc_file_start +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE arc_attribute_table +#undef TARGET_ASM_INTERNAL_LABEL +#define TARGET_ASM_INTERNAL_LABEL arc_internal_label +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS arc_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST arc_address_cost + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS arc_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN arc_expand_builtin + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk + +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg + +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE \ + default_promote_function_mode_always_promote + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY arc_return_in_memory +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE arc_pass_by_reference + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs + +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes + +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE arc_function_value + +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p + +#undef TARGET_CAN_USE_DOLOOP_P +#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p + +#undef TARGET_INVALID_WITHIN_DOLOOP +#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop + +#undef TARGET_PRESERVE_RELOAD_P +#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p + +#undef TARGET_CAN_FOLLOW_JUMP +#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address + +/* Usually, we will be able to scale anchor offsets. + When this fails, we want LEGITIMIZE_ADDRESS to kick in. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET (-1024) +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET (1020) + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD arc_secondary_reload + +#define TARGET_OPTION_OVERRIDE arc_override_options + +#define TARGET_CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage + +#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline + +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address + +#define TARGET_CAN_ELIMINATE arc_can_eliminate + +#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required + +#define TARGET_FUNCTION_ARG arc_function_arg + +#define TARGET_FUNCTION_ARG_ADVANCE arc_function_arg_advance + +#define TARGET_LEGITIMATE_CONSTANT_P arc_legitimate_constant_p + +#define TARGET_LEGITIMATE_ADDRESS_P arc_legitimate_address_p + +#define TARGET_MODE_DEPENDENT_ADDRESS_P arc_mode_dependent_address_p + +#define TARGET_LEGITIMIZE_ADDRESS arc_legitimize_address + +#define TARGET_ADJUST_INSN_LENGTH arc_adjust_insn_length + +#define TARGET_INSN_LENGTH_PARAMETERS arc_insn_length_parameters + +#define TARGET_LRA_P arc_lra_p +#define TARGET_REGISTER_PRIORITY arc_register_priority +/* Stores with scaled offsets have different displacement ranges. */ +#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true +#define TARGET_SPILL_CLASS arc_spill_class + +#include "target-def.h" + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +/* Try to keep the (mov:DF _, reg) as early as possible so + that the dh-lr insns appear together and can + use the peephole2 pattern. */ + +static int +arc_sched_adjust_priority (rtx insn, int priority) +{ + rtx set = single_set (insn); + if (set + && GET_MODE (SET_SRC(set)) == DFmode + && GET_CODE (SET_SRC(set)) == REG) + { + /* Incrementing priority by 20 (empirically derived). */ + return priority + 20; + } + + return priority; +} + +static reg_class_t +arc_secondary_reload (bool in_p, rtx x, reg_class_t cl, enum machine_mode, + secondary_reload_info *) +{ + if (cl == DOUBLE_REGS) + return GENERAL_REGS; + + /* The loop counter register can be stored, but not loaded directly. */ + if ((cl == LPCOUNT_REG || cl == WRITABLE_CORE_REGS) + && in_p && MEM_P (x)) + return GENERAL_REGS; + return NO_REGS; +} + +static unsigned arc_ifcvt (void); + +namespace { + +const pass_data pass_data_arc_ifcvt = +{ + RTL_PASS, + "arc_ifcvt", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_IFCVT2, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish /* todo_flags_finish */ +}; + +class pass_arc_ifcvt : public rtl_opt_pass +{ +public: + pass_arc_ifcvt(gcc::context *ctxt) + : rtl_opt_pass(pass_data_arc_ifcvt, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_arc_ifcvt (m_ctxt); } + unsigned int execute () { return arc_ifcvt (); } +}; + +} // anon namespace + +rtl_opt_pass * +make_pass_arc_ifcvt (gcc::context *ctxt) +{ + return new pass_arc_ifcvt (ctxt); +} + +static unsigned arc_predicate_delay_insns (void); + +namespace { + +const pass_data pass_data_arc_predicate_delay_insns = +{ + RTL_PASS, + "arc_predicate_delay_insns", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_IFCVT2, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish /* todo_flags_finish */ +}; + +class pass_arc_predicate_delay_insns : public rtl_opt_pass +{ +public: + pass_arc_predicate_delay_insns(gcc::context *ctxt) + : rtl_opt_pass(pass_data_arc_predicate_delay_insns, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return arc_predicate_delay_insns (); } +}; + +} // anon namespace + +rtl_opt_pass * +make_pass_arc_predicate_delay_insns (gcc::context *ctxt) +{ + return new pass_arc_predicate_delay_insns (ctxt); +} + +/* Called by OVERRIDE_OPTIONS to initialize various things. */ + +void +arc_init (void) +{ + enum attr_tune tune_dflt = TUNE_NONE; + + if (TARGET_A5) + { + arc_cpu_string = "A5"; + } + else if (TARGET_ARC600) + { + arc_cpu_string = "ARC600"; + tune_dflt = TUNE_ARC600; + } + else if (TARGET_ARC601) + { + arc_cpu_string = "ARC601"; + tune_dflt = TUNE_ARC600; + } + else if (TARGET_ARC700) + { + arc_cpu_string = "ARC700"; + tune_dflt = TUNE_ARC700_4_2_STD; + } + else + gcc_unreachable (); + if (arc_tune == TUNE_NONE) + arc_tune = tune_dflt; + /* Note: arc_multcost is only used in rtx_cost if speed is true. */ + if (arc_multcost < 0) + switch (arc_tune) + { + case TUNE_ARC700_4_2_STD: + /* latency 7; + max throughput (1 multiply + 4 other insns) / 5 cycles. */ + arc_multcost = COSTS_N_INSNS (4); + if (TARGET_NOMPY_SET) + arc_multcost = COSTS_N_INSNS (30); + break; + case TUNE_ARC700_4_2_XMAC: + /* latency 5; + max throughput (1 multiply + 2 other insns) / 3 cycles. */ + arc_multcost = COSTS_N_INSNS (3); + if (TARGET_NOMPY_SET) + arc_multcost = COSTS_N_INSNS (30); + break; + case TUNE_ARC600: + if (TARGET_MUL64_SET) + { + arc_multcost = COSTS_N_INSNS (4); + break; + } + /* Fall through. */ + default: + arc_multcost = COSTS_N_INSNS (30); + break; + } + + /* Support mul64 generation only for A5 and ARC600. */ + if (TARGET_MUL64_SET && TARGET_ARC700) + error ("-mmul64 not supported for ARC700"); + + /* MPY instructions valid only for ARC700. */ + if (TARGET_NOMPY_SET && !TARGET_ARC700) + error ("-mno-mpy supported only for ARC700"); + + /* mul/mac instructions only for ARC600. */ + if (TARGET_MULMAC_32BY16_SET && !(TARGET_ARC600 || TARGET_ARC601)) + error ("-mmul32x16 supported only for ARC600 or ARC601"); + + if (!TARGET_DPFP && TARGET_DPFP_DISABLE_LRSR) + error ("-mno-dpfp-lrsr suppforted only with -mdpfp"); + + /* FPX-1. No fast and compact together. */ + if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET) + || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET)) + error ("FPX fast and compact options cannot be specified together"); + + /* FPX-2. No fast-spfp for arc600 or arc601. */ + if (TARGET_SPFP_FAST_SET && (TARGET_ARC600 || TARGET_ARC601)) + error ("-mspfp_fast not available on ARC600 or ARC601"); + + /* FPX-3. No FPX extensions on pre-ARC600 cores. */ + if ((TARGET_DPFP || TARGET_SPFP) + && !(TARGET_ARC600 || TARGET_ARC601 || TARGET_ARC700)) + error ("FPX extensions not available on pre-ARC600 cores"); + + /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic. */ + if (flag_pic && !TARGET_ARC700) + { + warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string); + flag_pic = 0; + } + + arc_init_reg_tables (); + + /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ + memset (arc_punct_chars, 0, sizeof (arc_punct_chars)); + arc_punct_chars['#'] = 1; + arc_punct_chars['*'] = 1; + arc_punct_chars['?'] = 1; + arc_punct_chars['!'] = 1; + arc_punct_chars['^'] = 1; + arc_punct_chars['&'] = 1; + + if (optimize > 1 && !TARGET_NO_COND_EXEC) + { + /* There are two target-independent ifcvt passes, and arc_reorg may do + one or more arc_ifcvt calls. */ + opt_pass *pass_arc_ifcvt_4 = make_pass_arc_ifcvt (g); + struct register_pass_info arc_ifcvt4_info + = { pass_arc_ifcvt_4, "dbr", 1, PASS_POS_INSERT_AFTER }; + struct register_pass_info arc_ifcvt5_info + = { pass_arc_ifcvt_4->clone (), "shorten", 1, PASS_POS_INSERT_BEFORE }; + + register_pass (&arc_ifcvt4_info); + register_pass (&arc_ifcvt5_info); + } + + if (flag_delayed_branch) + { + opt_pass *pass_arc_predicate_delay_insns + = make_pass_arc_predicate_delay_insns (g); + struct register_pass_info arc_predicate_delay_info + = { pass_arc_predicate_delay_insns, "dbr", 1, PASS_POS_INSERT_AFTER }; + + register_pass (&arc_predicate_delay_info); + } +} + +/* Check ARC options, generate derived target attributes. */ + +static void +arc_override_options (void) +{ + if (arc_cpu == PROCESSOR_NONE) + arc_cpu = PROCESSOR_ARC700; + + if (arc_size_opt_level == 3) + optimize_size = 1; + + if (flag_pic) + target_flags |= MASK_NO_SDATA_SET; + + if (flag_no_common == 255) + flag_no_common = !TARGET_NO_SDATA_SET; + + /* TARGET_COMPACT_CASESI needs the "q" register class. */ \ + if (TARGET_MIXED_CODE) + TARGET_Q_CLASS = 1; + if (!TARGET_Q_CLASS) + TARGET_COMPACT_CASESI = 0; + if (TARGET_COMPACT_CASESI) + TARGET_CASE_VECTOR_PC_RELATIVE = 1; + + /* These need to be done at start up. It's convenient to do them here. */ + arc_init (); +} + +/* The condition codes of the ARC, and the inverse function. */ +/* For short branches, the "c" / "nc" names are not defined in the ARC + Programmers manual, so we have to use "lo" / "hs"" instead. */ +static const char *arc_condition_codes[] = +{ + "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv", + "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0 +}; + +enum arc_cc_code_index +{ + ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N, + ARC_CC_C, ARC_CC_NC, ARC_CC_V, ARC_CC_NV, + ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ, + ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC +}; + +#define ARC_INVERSE_CONDITION_CODE(X) ((X) ^ 1) + +/* Returns the index of the ARC condition code string in + `arc_condition_codes'. COMPARISON should be an rtx like + `(eq (...) (...))'. */ + +static int +get_arc_condition_code (rtx comparison) +{ + switch (GET_MODE (XEXP (comparison, 0))) + { + case CCmode: + case SImode: /* For BRcc. */ + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case GT : return ARC_CC_GT; + case LE : return ARC_CC_LE; + case GE : return ARC_CC_GE; + case LT : return ARC_CC_LT; + case GTU : return ARC_CC_HI; + case LEU : return ARC_CC_LS; + case LTU : return ARC_CC_LO; + case GEU : return ARC_CC_HS; + default : gcc_unreachable (); + } + case CC_ZNmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case GE: return ARC_CC_P; + case LT: return ARC_CC_N; + case GT : return ARC_CC_PNZ; + default : gcc_unreachable (); + } + case CC_Zmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + default : gcc_unreachable (); + } + case CC_Cmode: + switch (GET_CODE (comparison)) + { + case LTU : return ARC_CC_C; + case GEU : return ARC_CC_NC; + default : gcc_unreachable (); + } + case CC_FP_GTmode: + if (TARGET_ARGONAUT_SET && TARGET_SPFP) + switch (GET_CODE (comparison)) + { + case GT : return ARC_CC_N; + case UNLE: return ARC_CC_P; + default : gcc_unreachable (); + } + else + switch (GET_CODE (comparison)) + { + case GT : return ARC_CC_HI; + case UNLE : return ARC_CC_LS; + default : gcc_unreachable (); + } + case CC_FP_GEmode: + /* Same for FPX and non-FPX. */ + switch (GET_CODE (comparison)) + { + case GE : return ARC_CC_HS; + case UNLT : return ARC_CC_LO; + default : gcc_unreachable (); + } + case CC_FP_UNEQmode: + switch (GET_CODE (comparison)) + { + case UNEQ : return ARC_CC_EQ; + case LTGT : return ARC_CC_NE; + default : gcc_unreachable (); + } + case CC_FP_ORDmode: + switch (GET_CODE (comparison)) + { + case UNORDERED : return ARC_CC_C; + case ORDERED : return ARC_CC_NC; + default : gcc_unreachable (); + } + case CC_FPXmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_EQ; + case NE : return ARC_CC_NE; + case UNORDERED : return ARC_CC_C; + case ORDERED : return ARC_CC_NC; + case LTGT : return ARC_CC_HI; + case UNEQ : return ARC_CC_LS; + default : gcc_unreachable (); + } + default : gcc_unreachable (); + } + /*NOTREACHED*/ + return (42); +} + +/* Return true if COMPARISON has a short form that can accomodate OFFSET. */ + +bool +arc_short_comparison_p (rtx comparison, int offset) +{ + gcc_assert (ARC_CC_NC == ARC_CC_HS); + gcc_assert (ARC_CC_C == ARC_CC_LO); + switch (get_arc_condition_code (comparison)) + { + case ARC_CC_EQ: case ARC_CC_NE: + return offset >= -512 && offset <= 506; + case ARC_CC_GT: case ARC_CC_LE: case ARC_CC_GE: case ARC_CC_LT: + case ARC_CC_HI: case ARC_CC_LS: case ARC_CC_LO: case ARC_CC_HS: + return offset >= -64 && offset <= 58; + default: + return false; + } +} + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ + +enum machine_mode +arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) +{ + enum machine_mode mode = GET_MODE (x); + rtx x1; + + /* For an operation that sets the condition codes as a side-effect, the + C and V flags is not set as for cmp, so we can only use comparisons where + this doesn't matter. (For LT and GE we can use "mi" and "pl" + instead.) */ + /* ??? We could use "pnz" for greater than zero, however, we could then + get into trouble because the comparison could not be reversed. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && y == const0_rtx + && (op == EQ || op == NE + || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x) <= 4)))) + return CC_ZNmode; + + /* add.f for if (a+b) */ + if (mode == SImode + && GET_CODE (y) == NEG + && (op == EQ || op == NE)) + return CC_ZNmode; + + /* Check if this is a test suitable for bxor.f . */ + if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y) + && ((INTVAL (y) - 1) & INTVAL (y)) == 0 + && INTVAL (y)) + return CC_Zmode; + + /* Check if this is a test suitable for add / bmsk.f . */ + if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y) + && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1))) + && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0 + && (~INTVAL (x1) | INTVAL (y)) < 0 + && (~INTVAL (x1) | INTVAL (y)) > -0x800) + return CC_Zmode; + + if (GET_MODE (x) == SImode && (op == LTU || op == GEU) + && GET_CODE (x) == PLUS + && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) + return CC_Cmode; + + if (TARGET_ARGONAUT_SET + && ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP))) + switch (op) + { + case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED: + return CC_FPXmode; + case LT: case UNGE: case GT: case UNLE: + return CC_FP_GTmode; + case LE: case UNGT: case GE: case UNLT: + return CC_FP_GEmode; + default: gcc_unreachable (); + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE) + switch (op) + { + case EQ: case NE: return CC_Zmode; + case LT: case UNGE: + case GT: case UNLE: return CC_FP_GTmode; + case LE: case UNGT: + case GE: case UNLT: return CC_FP_GEmode; + case UNEQ: case LTGT: return CC_FP_UNEQmode; + case ORDERED: case UNORDERED: return CC_FP_ORDmode; + default: gcc_unreachable (); + } + + return CCmode; +} + +/* Vectors to keep interesting information about registers where it can easily + be got. We use to use the actual mode value as the bit number, but there + is (or may be) more than 32 modes now. Instead we use two tables: one + indexed by hard register number, and one indexed by mode. */ + +/* The purpose of arc_mode_class is to shrink the range of modes so that + they all fit (as bit numbers) in a 32-bit word (again). Each real mode is + mapped into one arc_mode_class mode. */ + +enum arc_mode_class { + C_MODE, + S_MODE, D_MODE, T_MODE, O_MODE, + SF_MODE, DF_MODE, TF_MODE, OF_MODE, + V_MODE +}; + +/* Modes for condition codes. */ +#define C_MODES (1 << (int) C_MODE) + +/* Modes for single-word and smaller quantities. */ +#define S_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) + +/* Modes for double-word and smaller quantities. */ +#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) + +/* Mode for 8-byte DF values only. */ +#define DF_MODES (1 << DF_MODE) + +/* Modes for quad-word and smaller quantities. */ +#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) + +/* Modes for 128-bit vectors. */ +#define V_MODES (1 << (int) V_MODE) + +/* Value is 1 if register/mode pair is acceptable on arc. */ + +unsigned int arc_hard_regno_mode_ok[] = { + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, + T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES, + D_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + + /* ??? Leave these as S_MODES for now. */ + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES, + + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, + + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, + S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES +}; + +unsigned int arc_mode_class [NUM_MACHINE_MODES]; + +enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER]; + +enum reg_class +arc_preferred_reload_class (rtx, enum reg_class cl) +{ + if ((cl) == CHEAP_CORE_REGS || (cl) == WRITABLE_CORE_REGS) + return GENERAL_REGS; + return cl; +} + +/* Initialize the arc_mode_class array. */ + +static void +arc_init_reg_tables (void) +{ + int i; + + for (i = 0; i < NUM_MACHINE_MODES; i++) + { + switch (GET_MODE_CLASS (i)) + { + case MODE_INT: + case MODE_PARTIAL_INT: + case MODE_COMPLEX_INT: + if (GET_MODE_SIZE (i) <= 4) + arc_mode_class[i] = 1 << (int) S_MODE; + else if (GET_MODE_SIZE (i) == 8) + arc_mode_class[i] = 1 << (int) D_MODE; + else if (GET_MODE_SIZE (i) == 16) + arc_mode_class[i] = 1 << (int) T_MODE; + else if (GET_MODE_SIZE (i) == 32) + arc_mode_class[i] = 1 << (int) O_MODE; + else + arc_mode_class[i] = 0; + break; + case MODE_FLOAT: + case MODE_COMPLEX_FLOAT: + if (GET_MODE_SIZE (i) <= 4) + arc_mode_class[i] = 1 << (int) SF_MODE; + else if (GET_MODE_SIZE (i) == 8) + arc_mode_class[i] = 1 << (int) DF_MODE; + else if (GET_MODE_SIZE (i) == 16) + arc_mode_class[i] = 1 << (int) TF_MODE; + else if (GET_MODE_SIZE (i) == 32) + arc_mode_class[i] = 1 << (int) OF_MODE; + else + arc_mode_class[i] = 0; + break; + case MODE_VECTOR_INT: + arc_mode_class [i] = (1<< (int) V_MODE); + break; + case MODE_CC: + default: + /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so + we must explicitly check for them here. */ + if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode + || i == (int) CC_Cmode + || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode) + arc_mode_class[i] = 1 << (int) C_MODE; + else + arc_mode_class[i] = 0; + break; + } + } +} + +/* Core registers 56..59 are used for multiply extension options. + The dsp option uses r56 and r57, these are then named acc1 and acc2. + acc1 is the highpart, and acc2 the lowpart, so which register gets which + number depends on endianness. + The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi. + Because mlo / mhi form a 64 bit value, we use different gcc internal + register numbers to make them form a register pair as the gcc internals + know it. mmid gets number 57, if still available, and mlo / mhi get + number 58 and 59, depending on endianness. We use DBX_REGISTER_NUMBER + to map this back. */ + char rname56[5] = "r56"; + char rname57[5] = "r57"; + char rname58[5] = "r58"; + char rname59[5] = "r59"; + +static void +arc_conditional_register_usage (void) +{ + int regno; + int i; + int fix_start = 60, fix_end = 55; + + if (TARGET_MUL64_SET) + { + fix_start = 57; + fix_end = 59; + + /* We don't provide a name for mmed. In rtl / assembly resource lists, + you are supposed to refer to it as mlo & mhi, e.g + (zero_extract:SI (reg:DI 58) (const_int 32) (16)) . + In an actual asm instruction, you are of course use mmed. + The point of avoiding having a separate register for mmed is that + this way, we don't have to carry clobbers of that reg around in every + isntruction that modifies mlo and/or mhi. */ + strcpy (rname57, ""); + strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo"); + strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi"); + } + if (TARGET_MULMAC_32BY16_SET) + { + fix_start = 56; + fix_end = fix_end > 57 ? fix_end : 57; + strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2"); + strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1"); + } + for (regno = fix_start; regno <= fix_end; regno++) + { + if (!fixed_regs[regno]) + warning (0, "multiply option implies r%d is fixed", regno); + fixed_regs [regno] = call_used_regs[regno] = 1; + } + if (TARGET_Q_CLASS) + { + reg_alloc_order[2] = 12; + reg_alloc_order[3] = 13; + reg_alloc_order[4] = 14; + reg_alloc_order[5] = 15; + reg_alloc_order[6] = 1; + reg_alloc_order[7] = 0; + reg_alloc_order[8] = 4; + reg_alloc_order[9] = 5; + reg_alloc_order[10] = 6; + reg_alloc_order[11] = 7; + reg_alloc_order[12] = 8; + reg_alloc_order[13] = 9; + reg_alloc_order[14] = 10; + reg_alloc_order[15] = 11; + } + if (TARGET_SIMD_SET) + { + int i; + for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++) + reg_alloc_order [i] = i; + for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG; + i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++) + reg_alloc_order [i] = i; + } + /* For Arctangent-A5 / ARC600, lp_count may not be read in an instruction + following immediately after another one setting it to a new value. + There was some discussion on how to enforce scheduling constraints for + processors with missing interlocks on the gcc mailing list: + http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html . + However, we can't actually use this approach, because for ARC the + delay slot scheduling pass is active, which runs after + machine_dependent_reorg. */ + if (TARGET_ARC600) + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); + else if (!TARGET_ARC700) + fixed_regs[LP_COUNT] = 1; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (!call_used_regs[regno]) + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); + for (regno = 32; regno < 60; regno++) + if (!fixed_regs[regno]) + SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno); + if (TARGET_ARC700) + { + for (regno = 32; regno <= 60; regno++) + CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno); + + /* If they have used -ffixed-lp_count, make sure it takes + effect. */ + if (fixed_regs[LP_COUNT]) + { + CLEAR_HARD_REG_BIT (reg_class_contents[LPCOUNT_REG], LP_COUNT); + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT); + CLEAR_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], LP_COUNT); + + /* Instead of taking out SF_MODE like below, forbid it outright. */ + arc_hard_regno_mode_ok[60] = 0; + } + else + arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE; + } + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (i < 29) + { + if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15)))) + arc_regno_reg_class[i] = ARCOMPACT16_REGS; + else + arc_regno_reg_class[i] = GENERAL_REGS; + } + else if (i < 60) + arc_regno_reg_class[i] + = (fixed_regs[i] + ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i) + ? CHEAP_CORE_REGS : ALL_CORE_REGS) + : ((TARGET_ARC700 + && TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)) + ? CHEAP_CORE_REGS : WRITABLE_CORE_REGS)); + else + arc_regno_reg_class[i] = NO_REGS; + } + + /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated. */ + if (!TARGET_Q_CLASS) + { + CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]); + CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]); + } + + gcc_assert (FIRST_PSEUDO_REGISTER >= 144); + + /* Handle Special Registers. */ + arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register. */ + arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register. */ + arc_regno_reg_class[31] = LINK_REGS; /* blink register. */ + arc_regno_reg_class[60] = LPCOUNT_REG; + arc_regno_reg_class[61] = NO_REGS; /* CC_REG: must be NO_REGS. */ + arc_regno_reg_class[62] = GENERAL_REGS; + + if (TARGET_DPFP) + { + for (i = 40; i < 44; ++i) + { + arc_regno_reg_class[i] = DOUBLE_REGS; + + /* Unless they want us to do 'mov d1, 0x00000000' make sure + no attempt is made to use such a register as a destination + operand in *movdf_insn. */ + if (!TARGET_ARGONAUT_SET) + { + /* Make sure no 'c', 'w', 'W', or 'Rac' constraint is + interpreted to mean they can use D1 or D2 in their insn. */ + CLEAR_HARD_REG_BIT(reg_class_contents[CHEAP_CORE_REGS ], i); + CLEAR_HARD_REG_BIT(reg_class_contents[ALL_CORE_REGS ], i); + CLEAR_HARD_REG_BIT(reg_class_contents[WRITABLE_CORE_REGS ], i); + CLEAR_HARD_REG_BIT(reg_class_contents[MPY_WRITABLE_CORE_REGS], i); + } + } + } + else + { + /* Disable all DOUBLE_REGISTER settings, + if not generating DPFP code. */ + arc_regno_reg_class[40] = ALL_REGS; + arc_regno_reg_class[41] = ALL_REGS; + arc_regno_reg_class[42] = ALL_REGS; + arc_regno_reg_class[43] = ALL_REGS; + + arc_hard_regno_mode_ok[40] = 0; + arc_hard_regno_mode_ok[42] = 0; + + CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]); + } + + if (TARGET_SIMD_SET) + { + gcc_assert (ARC_FIRST_SIMD_VR_REG == 64); + gcc_assert (ARC_LAST_SIMD_VR_REG == 127); + + for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++) + arc_regno_reg_class [i] = SIMD_VR_REGS; + + gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128); + gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128); + gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136); + gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG == 143); + + for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG; + i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++) + arc_regno_reg_class [i] = SIMD_DMA_CONFIG_REGS; + } + + /* pc : r63 */ + arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS; +} + +/* Handle an "interrupt" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +arc_handle_interrupt_attribute (tree *, tree name, tree args, int, + bool *no_add_attrs) +{ + gcc_assert (args); + + tree value = TREE_VALUE (args); + + if (TREE_CODE (value) != STRING_CST) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not a string constant", + name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (value), "ilink1") + && strcmp (TREE_STRING_POINTER (value), "ilink2")) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not \"ilink1\" or \"ilink2\"", + name); + *no_add_attrs = true; + } + return NULL_TREE; +} + +/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible, + and two if they are nearly compatible (which causes a warning to be + generated). */ + +static int +arc_comp_type_attributes (const_tree type1, + const_tree type2) +{ + int l1, l2, m1, m2, s1, s2; + + /* Check for mismatch of non-default calling convention. */ + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + /* Check for mismatched call attributes. */ + l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; + m1 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type1)) != NULL; + m2 = lookup_attribute ("medium_call", TYPE_ATTRIBUTES (type2)) != NULL; + s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; + s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; + + /* Only bother to check if an attribute is defined. */ + if (l1 | l2 | m1 | m2 | s1 | s2) + { + /* If one type has an attribute, the other must have the same attribute. */ + if ((l1 != l2) || (m1 != m2) || (s1 != s2)) + return 0; + + /* Disallow mixed attributes. */ + if (l1 + m1 + s1 > 1) + return 0; + } + + + return 1; +} + +/* Set the default attributes for TYPE. */ + +void +arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED) +{ + gcc_unreachable(); +} + +/* Misc. utilities. */ + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for the cc reg in the proper mode. */ + +rtx +gen_compare_reg (rtx comparison, enum machine_mode omode) +{ + enum rtx_code code = GET_CODE (comparison); + rtx x = XEXP (comparison, 0); + rtx y = XEXP (comparison, 1); + rtx tmp, cc_reg; + enum machine_mode mode, cmode; + + + cmode = GET_MODE (x); + if (cmode == VOIDmode) + cmode = GET_MODE (y); + gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode); + if (cmode == SImode) + { + if (!register_operand (x, SImode)) + { + if (register_operand (y, SImode)) + { + tmp = x; + x = y; + y = tmp; + code = swap_condition (code); + } + else + x = copy_to_mode_reg (SImode, x); + } + if (GET_CODE (y) == SYMBOL_REF && flag_pic) + y = copy_to_mode_reg (SImode, y); + } + else + { + x = force_reg (cmode, x); + y = force_reg (cmode, y); + } + mode = SELECT_CC_MODE (code, x, y); + + cc_reg = gen_rtx_REG (mode, CC_REG); + + /* ??? FIXME (x-y)==0, as done by both cmpsfpx_raw and + cmpdfpx_raw, is not a correct comparison for floats: + http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm + */ + if (TARGET_ARGONAUT_SET + && ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP))) + { + switch (code) + { + case NE: case EQ: case LT: case UNGE: case LE: case UNGT: + case UNEQ: case LTGT: case ORDERED: case UNORDERED: + break; + case GT: case UNLE: case GE: case UNLT: + code = swap_condition (code); + tmp = x; + x = y; + y = tmp; + break; + default: + gcc_unreachable (); + } + if (cmode == SFmode) + { + emit_insn (gen_cmpsfpx_raw (x, y)); + } + else /* DFmode */ + { + /* Accepts Dx regs directly by insns. */ + emit_insn (gen_cmpdfpx_raw (x, y)); + } + + if (mode != CC_FPXmode) + emit_insn (gen_rtx_SET (VOIDmode, cc_reg, + gen_rtx_COMPARE (mode, + gen_rtx_REG (CC_FPXmode, 61), + const0_rtx))); + } + else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE) + { + rtx op0 = gen_rtx_REG (cmode, 0); + rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD); + + switch (code) + { + case NE: case EQ: case GT: case UNLE: case GE: case UNLT: + case UNEQ: case LTGT: case ORDERED: case UNORDERED: + break; + case LT: case UNGE: case LE: case UNGT: + code = swap_condition (code); + tmp = x; + x = y; + y = tmp; + break; + default: + gcc_unreachable (); + } + if (currently_expanding_to_rtl) + { + emit_move_insn (op0, x); + emit_move_insn (op1, y); + } + else + { + gcc_assert (rtx_equal_p (op0, x)); + gcc_assert (rtx_equal_p (op1, y)); + } + emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1))); + } + else + emit_insn (gen_rtx_SET (omode, cc_reg, + gen_rtx_COMPARE (mode, x, y))); + return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx); +} + +/* Return true if VALUE, a const_double, will fit in a limm (4 byte number). + We assume the value can be either signed or unsigned. */ + +bool +arc_double_limm_p (rtx value) +{ + HOST_WIDE_INT low, high; + + gcc_assert (GET_CODE (value) == CONST_DOUBLE); + + if (TARGET_DPFP) + return true; + + low = CONST_DOUBLE_LOW (value); + high = CONST_DOUBLE_HIGH (value); + + if (low & 0x80000000) + { + return (((unsigned HOST_WIDE_INT) low <= 0xffffffff && high == 0) + || (((low & - (unsigned HOST_WIDE_INT) 0x80000000) + == - (unsigned HOST_WIDE_INT) 0x80000000) + && high == -1)); + } + else + { + return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0; + } +} + +/* Do any needed setup for a variadic function. For the ARC, we must + create a register parameter block, and then copy any anonymous arguments + in registers to memory. + + CUM has not been updated for the last named argument which has type TYPE + and mode MODE, and we rely on this fact. */ + +static void +arc_setup_incoming_varargs (cumulative_args_t args_so_far, + enum machine_mode mode, tree type, + int *pretend_size, int no_rtl) +{ + int first_anon_arg; + CUMULATIVE_ARGS next_cum; + + /* We must treat `__builtin_va_alist' as an anonymous arg. */ + + next_cum = *get_cumulative_args (args_so_far); + arc_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1); + first_anon_arg = next_cum; + + if (first_anon_arg < MAX_ARC_PARM_REGS) + { + /* First anonymous (unnamed) argument is in a reg. */ + + /* Note that first_reg_offset < MAX_ARC_PARM_REGS. */ + int first_reg_offset = first_anon_arg; + + if (!no_rtl) + { + rtx regblock + = gen_rtx_MEM (BLKmode, plus_constant (Pmode, arg_pointer_rtx, + FIRST_PARM_OFFSET (0))); + move_block_from_reg (first_reg_offset, regblock, + MAX_ARC_PARM_REGS - first_reg_offset); + } + + *pretend_size + = ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD); + } +} + +/* Cost functions. */ + +/* Provide the costs of an addressing mode that contains ADDR. + If ADDR is not a valid address, its cost is irrelevant. */ + +int +arc_address_cost (rtx addr, enum machine_mode, addr_space_t, bool speed) +{ + switch (GET_CODE (addr)) + { + case REG : + return speed || satisfies_constraint_Rcq (addr) ? 0 : 1; + case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: + case PRE_MODIFY: case POST_MODIFY: + return !speed; + + case LABEL_REF : + case SYMBOL_REF : + case CONST : + /* Most likely needs a LIMM. */ + return COSTS_N_INSNS (1); + + case PLUS : + { + register rtx plus0 = XEXP (addr, 0); + register rtx plus1 = XEXP (addr, 1); + + if (GET_CODE (plus0) != REG + && (GET_CODE (plus0) != MULT + || !CONST_INT_P (XEXP (plus0, 1)) + || (INTVAL (XEXP (plus0, 1)) != 2 + && INTVAL (XEXP (plus0, 1)) != 4))) + break; + + switch (GET_CODE (plus1)) + { + case CONST_INT : + return (!RTX_OK_FOR_OFFSET_P (SImode, plus1) + ? COSTS_N_INSNS (1) + : speed + ? 0 + : (satisfies_constraint_Rcq (plus0) + && satisfies_constraint_O (plus1)) + ? 0 + : 1); + case REG: + return (speed < 1 ? 0 + : (satisfies_constraint_Rcq (plus0) + && satisfies_constraint_Rcq (plus1)) + ? 0 : 1); + case CONST : + case SYMBOL_REF : + case LABEL_REF : + return COSTS_N_INSNS (1); + default: + break; + } + break; + } + default: + break; + } + + return 4; +} + +/* Emit instruction X with the frame related bit set. */ + +static rtx +frame_insn (rtx x) +{ + x = emit_insn (x); + RTX_FRAME_RELATED_P (x) = 1; + return x; +} + +/* Emit a frame insn to move SRC to DST. */ + +static rtx +frame_move (rtx dst, rtx src) +{ + return frame_insn (gen_rtx_SET (VOIDmode, dst, src)); +} + +/* Like frame_move, but add a REG_INC note for REG if ADDR contains an + auto increment address, or is zero. */ + +static rtx +frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr) +{ + rtx insn = frame_move (dst, src); + + if (!addr + || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC + || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY) + add_reg_note (insn, REG_INC, reg); + return insn; +} + +/* Emit a frame insn which adjusts a frame address register REG by OFFSET. */ + +static rtx +frame_add (rtx reg, HOST_WIDE_INT offset) +{ + gcc_assert ((offset & 0x3) == 0); + if (!offset) + return NULL_RTX; + return frame_move (reg, plus_constant (Pmode, reg, offset)); +} + +/* Emit a frame insn which adjusts stack pointer by OFFSET. */ + +static rtx +frame_stack_add (HOST_WIDE_INT offset) +{ + return frame_add (stack_pointer_rtx, offset); +} + +/* Traditionally, we push saved registers first in the prologue, + then we allocate the rest of the frame - and reverse in the epilogue. + This has still its merits for ease of debugging, or saving code size + or even execution time if the stack frame is so large that some accesses + can't be encoded anymore with offsets in the instruction code when using + a different scheme. + Also, it would be a good starting point if we got instructions to help + with register save/restore. + + However, often stack frames are small, and the pushing / popping has + some costs: + - the stack modification prevents a lot of scheduling. + - frame allocation / deallocation needs extra instructions. + - unless we know that we compile ARC700 user code, we need to put + a memory barrier after frame allocation / before deallocation to + prevent interrupts clobbering our data in the frame. + In particular, we don't have any such guarantees for library functions, + which tend to, on the other hand, to have small frames. + + Thus, for small frames, we'd like to use a different scheme: + - The frame is allocated in full with the first prologue instruction, + and deallocated in full with the last epilogue instruction. + Thus, the instructions in-betwen can be freely scheduled. + - If the function has no outgoing arguments on the stack, we can allocate + one register save slot at the top of the stack. This register can then + be saved simultanously with frame allocation, and restored with + frame deallocation. + This register can be picked depending on scheduling considerations, + although same though should go into having some set of registers + to be potentially lingering after a call, and others to be available + immediately - i.e. in the absence of interprocedual optimization, we + can use an ABI-like convention for register allocation to reduce + stalls after function return. */ +/* Function prologue/epilogue handlers. */ + +/* ARCompact stack frames look like: + + Before call After call + high +-----------------------+ +-----------------------+ + mem | reg parm save area | | reg parm save area | + | only created for | | only created for | + | variable arg fns | | variable arg fns | + AP +-----------------------+ +-----------------------+ + | return addr register | | return addr register | + | (if required) | | (if required) | + +-----------------------+ +-----------------------+ + | | | | + | reg save area | | reg save area | + | | | | + +-----------------------+ +-----------------------+ + | frame pointer | | frame pointer | + | (if required) | | (if required) | + FP +-----------------------+ +-----------------------+ + | | | | + | local/temp variables | | local/temp variables | + | | | | + +-----------------------+ +-----------------------+ + | | | | + | arguments on stack | | arguments on stack | + | | | | + SP +-----------------------+ +-----------------------+ + | reg parm save area | + | only created for | + | variable arg fns | + AP +-----------------------+ + | return addr register | + | (if required) | + +-----------------------+ + | | + | reg save area | + | | + +-----------------------+ + | frame pointer | + | (if required) | + FP +-----------------------+ + | | + | local/temp variables | + | | + +-----------------------+ + | | + | arguments on stack | + low | | + mem SP +-----------------------+ + +Notes: +1) The "reg parm save area" does not exist for non variable argument fns. + The "reg parm save area" can be eliminated completely if we created our + own va-arc.h, but that has tradeoffs as well (so it's not done). */ + +/* Structure to be filled in by arc_compute_frame_size with register + save masks, and offsets for the current function. */ +struct GTY (()) arc_frame_info +{ + unsigned int total_size; /* # bytes that the entire frame takes up. */ + unsigned int extra_size; /* # bytes of extra stuff. */ + unsigned int pretend_size; /* # bytes we push and pretend caller did. */ + unsigned int args_size; /* # bytes that outgoing arguments take up. */ + unsigned int reg_size; /* # bytes needed to store regs. */ + unsigned int var_size; /* # bytes that variables take up. */ + unsigned int reg_offset; /* Offset from new sp to store regs. */ + unsigned int gmask; /* Mask of saved gp registers. */ + int initialized; /* Nonzero if frame size already calculated. */ + short millicode_start_reg; + short millicode_end_reg; + bool save_return_addr; +}; + +/* Defining data structures for per-function information. */ + +typedef struct GTY (()) machine_function +{ + enum arc_function_type fn_type; + struct arc_frame_info frame_info; + /* To keep track of unalignment caused by short insns. */ + int unalign; + int force_short_suffix; /* Used when disgorging return delay slot insns. */ + const char *size_reason; + struct arc_ccfsm ccfsm_current; + /* Map from uid to ccfsm state during branch shortening. */ + rtx ccfsm_current_insn; + char arc_reorg_started; + char prescan_initialized; +} machine_function; + +/* Type of function DECL. + + The result is cached. To reset the cache at the end of a function, + call with DECL = NULL_TREE. */ + +enum arc_function_type +arc_compute_function_type (struct function *fun) +{ + tree decl = fun->decl; + tree a; + enum arc_function_type fn_type = fun->machine->fn_type; + + if (fn_type != ARC_FUNCTION_UNKNOWN) + return fn_type; + + /* Assume we have a normal function (not an interrupt handler). */ + fn_type = ARC_FUNCTION_NORMAL; + + /* Now see if this is an interrupt handler. */ + for (a = DECL_ATTRIBUTES (decl); + a; + a = TREE_CHAIN (a)) + { + tree name = TREE_PURPOSE (a), args = TREE_VALUE (a); + + if (name == get_identifier ("interrupt") + && list_length (args) == 1 + && TREE_CODE (TREE_VALUE (args)) == STRING_CST) + { + tree value = TREE_VALUE (args); + + if (!strcmp (TREE_STRING_POINTER (value), "ilink1")) + fn_type = ARC_FUNCTION_ILINK1; + else if (!strcmp (TREE_STRING_POINTER (value), "ilink2")) + fn_type = ARC_FUNCTION_ILINK2; + else + gcc_unreachable (); + break; + } + } + + return fun->machine->fn_type = fn_type; +} + +#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) +#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM)) + +/* Tell prologue and epilogue if register REGNO should be saved / restored. + The return address and frame pointer are treated separately. + Don't consider them here. + Addition for pic: The gp register needs to be saved if the current + function changes it to access gotoff variables. + FIXME: This will not be needed if we used some arbitrary register + instead of r26. +*/ +#define MUST_SAVE_REGISTER(regno, interrupt_p) \ +(((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \ + && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \ + || (flag_pic && crtl->uses_pic_offset_table \ + && regno == PIC_OFFSET_TABLE_REGNUM) ) + +#define MUST_SAVE_RETURN_ADDR \ + (cfun->machine->frame_info.save_return_addr) + +/* Return non-zero if there are registers to be saved or loaded using + millicode thunks. We can only use consecutive sequences starting + with r13, and not going beyond r25. + GMASK is a bitmask of registers to save. This function sets + FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range + of registers to be saved / restored with a millicode call. */ + +static int +arc_compute_millicode_save_restore_regs (unsigned int gmask, + struct arc_frame_info *frame) +{ + int regno; + + int start_reg = 13, end_reg = 25; + + for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));) + regno++; + end_reg = regno - 1; + /* There is no point in using millicode thunks if we don't save/restore + at least three registers. For non-leaf functions we also have the + blink restore. */ + if (regno - start_reg >= 3 - (crtl->is_leaf == 0)) + { + frame->millicode_start_reg = 13; + frame->millicode_end_reg = regno - 1; + return 1; + } + return 0; +} + +/* Return the bytes needed to compute the frame pointer from the current + stack pointer. + + SIZE is the size needed for local variables. */ + +unsigned int +arc_compute_frame_size (int size) /* size = # of var. bytes allocated. */ +{ + int regno; + unsigned int total_size, var_size, args_size, pretend_size, extra_size; + unsigned int reg_size, reg_offset; + unsigned int gmask; + enum arc_function_type fn_type; + int interrupt_p; + struct arc_frame_info *frame_info = &cfun->machine->frame_info; + + size = ARC_STACK_ALIGN (size); + + /* 1) Size of locals and temporaries */ + var_size = size; + + /* 2) Size of outgoing arguments */ + args_size = crtl->outgoing_args_size; + + /* 3) Calculate space needed for saved registers. + ??? We ignore the extension registers for now. */ + + /* See if this is an interrupt handler. Call used registers must be saved + for them too. */ + + reg_size = 0; + gmask = 0; + fn_type = arc_compute_function_type (cfun); + interrupt_p = ARC_INTERRUPT_P (fn_type); + + for (regno = 0; regno <= 31; regno++) + { + if (MUST_SAVE_REGISTER (regno, interrupt_p)) + { + reg_size += UNITS_PER_WORD; + gmask |= 1 << regno; + } + } + + /* 4) Space for back trace data structure. + (if required) + (if required). */ + frame_info->save_return_addr + = (!crtl->is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM)); + /* Saving blink reg in case of leaf function for millicode thunk calls. */ + if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET) + { + if (arc_compute_millicode_save_restore_regs (gmask, frame_info)) + frame_info->save_return_addr = true; + } + + extra_size = 0; + if (MUST_SAVE_RETURN_ADDR) + extra_size = 4; + if (frame_pointer_needed) + extra_size += 4; + + /* 5) Space for variable arguments passed in registers */ + pretend_size = crtl->args.pretend_args_size; + + /* Ensure everything before the locals is aligned appropriately. */ + { + unsigned int extra_plus_reg_size; + unsigned int extra_plus_reg_size_aligned; + + extra_plus_reg_size = extra_size + reg_size; + extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size); + reg_size = extra_plus_reg_size_aligned - extra_size; + } + + /* Compute total frame size. */ + total_size = var_size + args_size + extra_size + pretend_size + reg_size; + + total_size = ARC_STACK_ALIGN (total_size); + + /* Compute offset of register save area from stack pointer: + A5 Frame: pretend_size reg_size var_size args_size <--sp + */ + reg_offset = (total_size - (pretend_size + reg_size + extra_size) + + (frame_pointer_needed ? 4 : 0)); + + /* Save computed information. */ + frame_info->total_size = total_size; + frame_info->extra_size = extra_size; + frame_info->pretend_size = pretend_size; + frame_info->var_size = var_size; + frame_info->args_size = args_size; + frame_info->reg_size = reg_size; + frame_info->reg_offset = reg_offset; + frame_info->gmask = gmask; + frame_info->initialized = reload_completed; + + /* Ok, we're done. */ + return total_size; +} + +/* Common code to save/restore registers. */ +/* BASE_REG is the base register to use for addressing and to adjust. + GMASK is a bitmask of general purpose registers to save/restore. + epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk + If *FIRST_OFFSET is non-zero, add it first to BASE_REG - preferably + using a pre-modify for the first memory access. *FIRST_OFFSET is then + zeroed. */ + +static void +arc_save_restore (rtx base_reg, + unsigned int gmask, int epilogue_p, int *first_offset) +{ + unsigned int offset = 0; + int regno; + struct arc_frame_info *frame = &cfun->machine->frame_info; + rtx sibthunk_insn = NULL_RTX; + + if (gmask) + { + /* Millicode thunks implementation: + Generates calls to millicodes for registers starting from r13 to r25 + Present Limitations: + - Only one range supported. The remaining regs will have the ordinary + st and ld instructions for store and loads. Hence a gmask asking + to store r13-14, r16-r25 will only generate calls to store and + load r13 to r14 while store and load insns will be generated for + r16 to r25 in the prologue and epilogue respectively. + + - Presently library only supports register ranges starting from r13. + */ + if (epilogue_p == 2 || frame->millicode_end_reg > 14) + { + int start_call = frame->millicode_start_reg; + int end_call = frame->millicode_end_reg; + int n_regs = end_call - start_call + 1; + int i = 0, r, off = 0; + rtx insn; + rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + + if (*first_offset) + { + /* "reg_size" won't be more than 127 . */ + gcc_assert (epilogue_p || abs (*first_offset <= 127)); + frame_add (base_reg, *first_offset); + *first_offset = 0; + } + insn = gen_rtx_PARALLEL + (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1)); + if (epilogue_p == 2) + i += 2; + else + XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr); + for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++) + { + rtx reg = gen_rtx_REG (SImode, r); + rtx mem + = gen_frame_mem (SImode, plus_constant (Pmode, base_reg, off)); + + if (epilogue_p) + XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, reg, mem); + else + XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, mem, reg); + gmask = gmask & ~(1L << r); + } + if (epilogue_p == 2) + sibthunk_insn = insn; + else + frame_insn (insn); + offset += off; + } + + for (regno = 0; regno <= 31; regno++) + { + if ((gmask & (1L << regno)) != 0) + { + rtx reg = gen_rtx_REG (SImode, regno); + rtx addr, mem; + + if (*first_offset) + { + gcc_assert (!offset); + addr = plus_constant (Pmode, base_reg, *first_offset); + addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr); + *first_offset = 0; + } + else + { + gcc_assert (SMALL_INT (offset)); + addr = plus_constant (Pmode, base_reg, offset); + } + mem = gen_frame_mem (SImode, addr); + if (epilogue_p) + frame_move_inc (reg, mem, base_reg, addr); + else + frame_move_inc (mem, reg, base_reg, addr); + offset += UNITS_PER_WORD; + } /* if */ + } /* for */ + }/* if */ + if (sibthunk_insn) + { + rtx r12 = gen_rtx_REG (Pmode, 12); + + frame_insn (gen_rtx_SET (VOIDmode, r12, GEN_INT (offset))); + XVECEXP (sibthunk_insn, 0, 0) = ret_rtx; + XVECEXP (sibthunk_insn, 0, 1) + = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12)); + sibthunk_insn = emit_jump_insn (sibthunk_insn); + RTX_FRAME_RELATED_P (sibthunk_insn) = 1; + } +} /* arc_save_restore */ + + +int arc_return_address_regs[4] + = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM}; + +/* Set up the stack and frame pointer (if desired) for the function. */ + +void +arc_expand_prologue (void) +{ + int size = get_frame_size (); + unsigned int gmask = cfun->machine->frame_info.gmask; + /* unsigned int frame_pointer_offset;*/ + unsigned int frame_size_to_allocate; + /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13. + Change the stack layout so that we rather store a high register with the + PRE_MODIFY, thus enabling more short insn generation.) */ + int first_offset = 0; + + size = ARC_STACK_ALIGN (size); + + /* Compute/get total frame size. */ + size = (!cfun->machine->frame_info.initialized + ? arc_compute_frame_size (size) + : cfun->machine->frame_info.total_size); + + if (flag_stack_usage_info) + current_function_static_stack_size = size; + + /* Keep track of frame size to be allocated. */ + frame_size_to_allocate = size; + + /* These cases shouldn't happen. Catch them now. */ + gcc_assert (!(size == 0 && gmask)); + + /* Allocate space for register arguments if this is a variadic function. */ + if (cfun->machine->frame_info.pretend_size != 0) + { + /* Ensure pretend_size is maximum of 8 * word_size. */ + gcc_assert (cfun->machine->frame_info.pretend_size <= 32); + + frame_stack_add (-(HOST_WIDE_INT)cfun->machine->frame_info.pretend_size); + frame_size_to_allocate -= cfun->machine->frame_info.pretend_size; + } + + /* The home-grown ABI says link register is saved first. */ + if (MUST_SAVE_RETURN_ADDR) + { + rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM); + rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); + + frame_move_inc (mem, ra, stack_pointer_rtx, 0); + frame_size_to_allocate -= UNITS_PER_WORD; + + } /* MUST_SAVE_RETURN_ADDR */ + + /* Save any needed call-saved regs (and call-used if this is an + interrupt handler) for ARCompact ISA. */ + if (cfun->machine->frame_info.reg_size) + { + first_offset = -cfun->machine->frame_info.reg_size; + /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask. */ + arc_save_restore (stack_pointer_rtx, gmask, 0, &first_offset); + frame_size_to_allocate -= cfun->machine->frame_info.reg_size; + } + + + /* Save frame pointer if needed. */ + if (frame_pointer_needed) + { + rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (-UNITS_PER_WORD + first_offset)); + rtx mem = gen_frame_mem (Pmode, gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + addr)); + frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0); + frame_size_to_allocate -= UNITS_PER_WORD; + first_offset = 0; + frame_move (frame_pointer_rtx, stack_pointer_rtx); + } + + /* ??? We don't handle the case where the saved regs are more than 252 + bytes away from sp. This can be handled by decrementing sp once, saving + the regs, and then decrementing it again. The epilogue doesn't have this + problem as the `ld' insn takes reg+limm values (though it would be more + efficient to avoid reg+limm). */ + + frame_size_to_allocate -= first_offset; + /* Allocate the stack frame. */ + if (frame_size_to_allocate > 0) + frame_stack_add ((HOST_WIDE_INT) 0 - frame_size_to_allocate); + + /* Setup the gp register, if needed. */ + if (crtl->uses_pic_offset_table) + arc_finalize_pic (); +} + +/* Do any necessary cleanup after a function to restore stack, frame, + and regs. */ + +void +arc_expand_epilogue (int sibcall_p) +{ + int size = get_frame_size (); + enum arc_function_type fn_type = arc_compute_function_type (cfun); + + size = ARC_STACK_ALIGN (size); + size = (!cfun->machine->frame_info.initialized + ? arc_compute_frame_size (size) + : cfun->machine->frame_info.total_size); + + unsigned int pretend_size = cfun->machine->frame_info.pretend_size; + unsigned int frame_size; + unsigned int size_to_deallocate; + int restored; + int can_trust_sp_p = !cfun->calls_alloca; + int first_offset = 0; + int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0; + + size_to_deallocate = size; + + frame_size = size - (pretend_size + + cfun->machine->frame_info.reg_size + + cfun->machine->frame_info.extra_size); + + /* ??? There are lots of optimizations that can be done here. + EG: Use fp to restore regs if it's closer. + Maybe in time we'll do them all. For now, always restore regs from + sp, but don't restore sp if we don't have to. */ + + if (!can_trust_sp_p) + gcc_assert (frame_pointer_needed); + + /* Restore stack pointer to the beginning of saved register area for + ARCompact ISA. */ + if (frame_size) + { + if (frame_pointer_needed) + frame_move (stack_pointer_rtx, frame_pointer_rtx); + else + first_offset = frame_size; + size_to_deallocate -= frame_size; + } + else if (!can_trust_sp_p) + frame_stack_add (-frame_size); + + + /* Restore any saved registers. */ + if (frame_pointer_needed) + { + rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx); + + frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr), + stack_pointer_rtx, 0); + size_to_deallocate -= UNITS_PER_WORD; + } + + /* Load blink after the calls to thunk calls in case of optimize size. */ + if (millicode_p) + { + int sibthunk_p = (!sibcall_p + && fn_type == ARC_FUNCTION_NORMAL + && !cfun->machine->frame_info.pretend_size); + + gcc_assert (!(cfun->machine->frame_info.gmask + & (FRAME_POINTER_MASK | RETURN_ADDR_MASK))); + arc_save_restore (stack_pointer_rtx, + cfun->machine->frame_info.gmask, + 1 + sibthunk_p, &first_offset); + if (sibthunk_p) + goto epilogue_done; + } + /* If we are to restore registers, and first_offset would require + a limm to be encoded in a PRE_MODIFY, yet we can add it with a + fast add to the stack pointer, do this now. */ + if ((!SMALL_INT (first_offset) + && cfun->machine->frame_info.gmask + && ((TARGET_ARC700 && !optimize_size) + ? first_offset <= 0x800 + : satisfies_constraint_C2a (GEN_INT (first_offset)))) + /* Also do this if we have both gprs and return + address to restore, and they both would need a LIMM. */ + || (MUST_SAVE_RETURN_ADDR + && !SMALL_INT ((cfun->machine->frame_info.reg_size + first_offset) >> 2) + && cfun->machine->frame_info.gmask)) + { + frame_stack_add (first_offset); + first_offset = 0; + } + if (MUST_SAVE_RETURN_ADDR) + { + rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + int ra_offs = cfun->machine->frame_info.reg_size + first_offset; + rtx addr = plus_constant (Pmode, stack_pointer_rtx, ra_offs); + + /* If the load of blink would need a LIMM, but we can add + the offset quickly to sp, do the latter. */ + if (!SMALL_INT (ra_offs >> 2) + && !cfun->machine->frame_info.gmask + && ((TARGET_ARC700 && !optimize_size) + ? ra_offs <= 0x800 + : satisfies_constraint_C2a (GEN_INT (ra_offs)))) + { + size_to_deallocate -= ra_offs - first_offset; + first_offset = 0; + frame_stack_add (ra_offs); + ra_offs = 0; + addr = stack_pointer_rtx; + } + /* See if we can combine the load of the return address with the + final stack adjustment. + We need a separate load if there are still registers to + restore. We also want a separate load if the combined insn + would need a limm, but a separate load doesn't. */ + if (ra_offs + && !cfun->machine->frame_info.gmask + && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2))) + { + addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr); + first_offset = 0; + size_to_deallocate -= cfun->machine->frame_info.reg_size; + } + else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD) + { + addr = gen_rtx_POST_INC (Pmode, addr); + size_to_deallocate = 0; + } + frame_move_inc (ra, gen_frame_mem (Pmode, addr), stack_pointer_rtx, addr); + } + + if (!millicode_p) + { + if (cfun->machine->frame_info.reg_size) + arc_save_restore (stack_pointer_rtx, + /* The zeroing of these two bits is unnecessary, but leave this in for clarity. */ + cfun->machine->frame_info.gmask + & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset); + } + + + /* The rest of this function does the following: + ARCompact : handle epilogue_delay, restore sp (phase-2), return + */ + + /* Keep track of how much of the stack pointer we've restored. + It makes the following a lot more readable. */ + size_to_deallocate += first_offset; + restored = size - size_to_deallocate; + + if (size > restored) + frame_stack_add (size - restored); + /* Emit the return instruction. */ + if (sibcall_p == FALSE) + emit_jump_insn (gen_simple_return ()); + epilogue_done: + if (!TARGET_EPILOGUE_CFI) + { + rtx insn; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + RTX_FRAME_RELATED_P (insn) = 0; + } +} + +/* Return the offset relative to the stack pointer where the return address + is stored, or -1 if it is not stored. */ + +int +arc_return_slot_offset () +{ + struct arc_frame_info *afi = &cfun->machine->frame_info; + + return (afi->save_return_addr + ? afi->total_size - afi->pretend_size - afi->extra_size : -1); +} + +/* PIC */ + +/* Emit special PIC prologues and epilogues. */ +/* If the function has any GOTOFF relocations, then the GOTBASE + register has to be setup in the prologue + The instruction needed at the function start for setting up the + GOTBASE register is + add rdest, pc, + ---------------------------------------------------------- + The rtl to be emitted for this should be: + set (reg basereg) + (plus (reg pc) + (const (unspec (symref _DYNAMIC) 3))) + ---------------------------------------------------------- */ + +static void +arc_finalize_pic (void) +{ + rtx pat; + rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + + if (crtl->uses_pic_offset_table == 0) + return; + + gcc_assert (flag_pic != 0); + + pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC"); + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT); + pat = gen_rtx_CONST (Pmode, pat); + + pat = gen_rtx_SET (VOIDmode, baseptr_rtx, pat); + + emit_insn (pat); +} + +/* !TARGET_BARREL_SHIFTER support. */ +/* Emit a shift insn to set OP0 to OP1 shifted by OP2; CODE specifies what + kind of shift. */ + +void +emit_shift (enum rtx_code code, rtx op0, rtx op1, rtx op2) +{ + rtx shift = gen_rtx_fmt_ee (code, SImode, op1, op2); + rtx pat + = ((shift4_operator (shift, SImode) ? gen_shift_si3 : gen_shift_si3_loop) + (op0, op1, op2, shift)); + emit_insn (pat); +} + +/* Output the assembler code for doing a shift. + We go to a bit of trouble to generate efficient code as the ARC601 only has + single bit shifts. This is taken from the h8300 port. We only have one + mode of shifting and can't access individual bytes like the h8300 can, so + this is greatly simplified (at the expense of not generating hyper- + efficient code). + + This function is not used if the variable shift insns are present. */ + +/* FIXME: This probably can be done using a define_split in arc.md. + Alternately, generate rtx rather than output instructions. */ + +const char * +output_shift (rtx *operands) +{ + /* static int loopend_lab;*/ + rtx shift = operands[3]; + enum machine_mode mode = GET_MODE (shift); + enum rtx_code code = GET_CODE (shift); + const char *shift_one; + + gcc_assert (mode == SImode); + + switch (code) + { + case ASHIFT: shift_one = "add %0,%1,%1"; break; + case ASHIFTRT: shift_one = "asr %0,%1"; break; + case LSHIFTRT: shift_one = "lsr %0,%1"; break; + default: gcc_unreachable (); + } + + if (GET_CODE (operands[2]) != CONST_INT) + { + output_asm_insn ("and.f lp_count,%2, 0x1f", operands); + goto shiftloop; + } + else + { + int n; + + n = INTVAL (operands[2]); + + /* Only consider the lower 5 bits of the shift count. */ + n = n & 0x1f; + + /* First see if we can do them inline. */ + /* ??? We could get better scheduling & shorter code (using short insns) + by using splitters. Alas, that'd be even more verbose. */ + if (code == ASHIFT && n <= 9 && n > 2 + && dest_reg_operand (operands[4], SImode)) + { + output_asm_insn ("mov %4,0\n\tadd3 %0,%4,%1", operands); + for (n -=3 ; n >= 3; n -= 3) + output_asm_insn ("add3 %0,%4,%0", operands); + if (n == 2) + output_asm_insn ("add2 %0,%4,%0", operands); + else if (n) + output_asm_insn ("add %0,%0,%0", operands); + } + else if (n <= 4) + { + while (--n >= 0) + { + output_asm_insn (shift_one, operands); + operands[1] = operands[0]; + } + } + /* See if we can use a rotate/and. */ + else if (n == BITS_PER_WORD - 1) + { + switch (code) + { + case ASHIFT : + output_asm_insn ("and %0,%1,1\n\tror %0,%0", operands); + break; + case ASHIFTRT : + /* The ARC doesn't have a rol insn. Use something else. */ + output_asm_insn ("add.f 0,%1,%1\n\tsbc %0,%0,%0", operands); + break; + case LSHIFTRT : + /* The ARC doesn't have a rol insn. Use something else. */ + output_asm_insn ("add.f 0,%1,%1\n\trlc %0,0", operands); + break; + default: + break; + } + } + else if (n == BITS_PER_WORD - 2 && dest_reg_operand (operands[4], SImode)) + { + switch (code) + { + case ASHIFT : + output_asm_insn ("and %0,%1,3\n\tror %0,%0\n\tror %0,%0", operands); + break; + case ASHIFTRT : +#if 1 /* Need some scheduling comparisons. */ + output_asm_insn ("add.f %4,%1,%1\n\tsbc %0,%0,%0\n\t" + "add.f 0,%4,%4\n\trlc %0,%0", operands); +#else + output_asm_insn ("add.f %4,%1,%1\n\tbxor %0,%4,31\n\t" + "sbc.f %0,%0,%4\n\trlc %0,%0", operands); +#endif + break; + case LSHIFTRT : +#if 1 + output_asm_insn ("add.f %4,%1,%1\n\trlc %0,0\n\t" + "add.f 0,%4,%4\n\trlc %0,%0", operands); +#else + output_asm_insn ("add.f %0,%1,%1\n\trlc.f %0,0\n\t" + "and %0,%0,1\n\trlc %0,%0", operands); +#endif + break; + default: + break; + } + } + else if (n == BITS_PER_WORD - 3 && code == ASHIFT) + output_asm_insn ("and %0,%1,7\n\tror %0,%0\n\tror %0,%0\n\tror %0,%0", + operands); + /* Must loop. */ + else + { + operands[2] = GEN_INT (n); + output_asm_insn ("mov.f lp_count, %2", operands); + + shiftloop: + { + output_asm_insn ("lpnz\t2f", operands); + output_asm_insn (shift_one, operands); + output_asm_insn ("nop", operands); + fprintf (asm_out_file, "2:\t%s end single insn loop\n", + ASM_COMMENT_START); + } + } + } + + return ""; +} + +/* Nested function support. */ + +/* Directly store VALUE into memory object BLOCK at OFFSET. */ + +static void +emit_store_direct (rtx block, int offset, int value) +{ + emit_insn (gen_store_direct (adjust_address (block, SImode, offset), + force_reg (SImode, + gen_int_mode (value, SImode)))); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ +/* With potentially multiple shared objects loaded, and multiple stacks + present for multiple thereds where trampolines might reside, a simple + range check will likely not suffice for the profiler to tell if a callee + is a trampoline. We a speedier check by making the trampoline start at + an address that is not 4-byte aligned. + A trampoline looks like this: + + nop_s 0x78e0 +entry: + ld_s r12,[pcl,12] 0xd403 + ld r11,[pcl,12] 0x170c 700b + j_s [r12] 0x7c00 + nop_s 0x78e0 + + The fastest trampoline to execute for trampolines within +-8KB of CTX + would be: + add2 r11,pcl,s12 + j [limm] 0x20200f80 limm + and that would also be faster to write to the stack by computing the offset + from CTX to TRAMP at compile time. However, it would really be better to + get rid of the high cost of cache invalidation when generating trampolines, + which requires that the code part of trampolines stays constant, and + additionally either + - making sure that no executable code but trampolines is on the stack, + no icache entries linger for the area of the stack from when before the + stack was allocated, and allocating trampolines in trampoline-only + cache lines + or + - allocate trampolines fram a special pool of pre-allocated trampolines. */ + +static void +arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + + emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0); + emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c); + emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00); + emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr); + emit_move_insn (adjust_address (tramp, SImode, 16), cxt); + emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0))); +} + +/* Allow the profiler to easily distinguish trampolines from normal + functions. */ + +static rtx +arc_trampoline_adjust_address (rtx addr) +{ + return plus_constant (Pmode, addr, 2); +} + +/* This is set briefly to 1 when we output a ".as" address modifer, and then + reset when we output the scaled address. */ +static int output_scaled = 0; + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ +/* In final.c:output_asm_insn: + 'l' : label + 'a' : address + 'c' : constant address if CONSTANT_ADDRESS_P + 'n' : negative + Here: + 'Z': log2(x+1)-1 + 'z': log2 + 'M': log2(~x) + '#': condbranch delay slot suffix + '*': jump delay slot suffix + '?' : nonjump-insn suffix for conditional execution or short instruction + '!' : jump / call suffix for conditional execution or short instruction + '`': fold constant inside unary o-perator, re-recognize, and emit. + 'd' + 'D' + 'R': Second word + 'S' + 'B': Branch comparison operand - suppress sda reference + 'H': Most significant word + 'L': Least significant word + 'A': ASCII decimal representation of floating point value + 'U': Load/store update or scaling indicator + 'V': cache bypass indicator for volatile + 'P' + 'F' + '^' + 'O': Operator + 'o': original symbol - no @ prepending. */ + +void +arc_print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case 'Z': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 ); + else + output_operand_lossage ("invalid operand to %%Z code"); + + return; + + case 'z': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%d",exact_log2(INTVAL (x)) ); + else + output_operand_lossage ("invalid operand to %%z code"); + + return; + + case 'M': + if (GET_CODE (x) == CONST_INT) + fprintf (file, "%d",exact_log2(~INTVAL (x)) ); + else + output_operand_lossage ("invalid operand to %%M code"); + + return; + + case '#' : + /* Conditional branches depending on condition codes. + Note that this is only for branches that were known to depend on + condition codes before delay slot scheduling; + out-of-range brcc / bbit expansions should use '*'. + This distinction is important because of the different + allowable delay slot insns and the output of the delay suffix + for TARGET_AT_DBR_COND_EXEC. */ + case '*' : + /* Unconditional branches / branches not depending on condition codes. + This could also be a CALL_INSN. + Output the appropriate delay slot suffix. */ + if (final_sequence && XVECLEN (final_sequence, 0) != 1) + { + rtx jump = XVECEXP (final_sequence, 0, 0); + rtx delay = XVECEXP (final_sequence, 0, 1); + + /* For TARGET_PAD_RETURN we might have grabbed the delay insn. */ + if (INSN_DELETED_P (delay)) + return; + if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump)) + fputs (INSN_FROM_TARGET_P (delay) ? ".d" + : TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d" + : get_attr_type (jump) == TYPE_RETURN && code == '#' ? "" + : ".nd", + file); + else + fputs (".d", file); + } + return; + case '?' : /* with leading "." */ + case '!' : /* without leading "." */ + /* This insn can be conditionally executed. See if the ccfsm machinery + says it should be conditionalized. + If it shouldn't, we'll check the compact attribute if this insn + has a short variant, which may be used depending on code size and + alignment considerations. */ + if (current_insn_predicate) + arc_ccfsm_current.cc + = get_arc_condition_code (current_insn_predicate); + if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current)) + { + /* Is this insn in a delay slot sequence? */ + if (!final_sequence || XVECLEN (final_sequence, 0) < 2 + || current_insn_predicate + || CALL_P (XVECEXP (final_sequence, 0, 0)) + || simplejump_p (XVECEXP (final_sequence, 0, 0))) + { + /* This insn isn't in a delay slot sequence, or conditionalized + independently of its position in a delay slot. */ + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[arc_ccfsm_current.cc]); + /* If this is a jump, there are still short variants. However, + only beq_s / bne_s have the same offset range as b_s, + and the only short conditional returns are jeq_s and jne_s. */ + if (code == '!' + && (arc_ccfsm_current.cc == ARC_CC_EQ + || arc_ccfsm_current.cc == ARC_CC_NE + || 0 /* FIXME: check if branch in 7 bit range. */)) + output_short_suffix (file); + } + else if (code == '!') /* Jump with delay slot. */ + fputs (arc_condition_codes[arc_ccfsm_current.cc], file); + else /* An Instruction in a delay slot of a jump or call. */ + { + rtx jump = XVECEXP (final_sequence, 0, 0); + rtx insn = XVECEXP (final_sequence, 0, 1); + + /* If the insn is annulled and is from the target path, we need + to inverse the condition test. */ + if (JUMP_P (jump) && INSN_ANNULLED_BRANCH_P (jump)) + { + if (INSN_FROM_TARGET_P (insn)) + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]); + else + fprintf (file, "%s%s", + code == '?' ? "." : "", + arc_condition_codes[arc_ccfsm_current.cc]); + if (arc_ccfsm_current.state == 5) + arc_ccfsm_current.state = 0; + } + else + /* This insn is executed for either path, so don't + conditionalize it at all. */ + output_short_suffix (file); + + } + } + else + output_short_suffix (file); + return; + case'`': + /* FIXME: fold constant inside unary operator, re-recognize, and emit. */ + gcc_unreachable (); + case 'd' : + fputs (arc_condition_codes[get_arc_condition_code (x)], file); + return; + case 'D' : + fputs (arc_condition_codes[ARC_INVERSE_CONDITION_CODE + (get_arc_condition_code (x))], + file); + return; + case 'R' : + /* Write second word of DImode or DFmode reference, + register or memory. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)+1], file); + else if (GET_CODE (x) == MEM) + { + fputc ('[', file); + + /* Handle possible auto-increment. For PRE_INC / PRE_DEC / + PRE_MODIFY, we will have handled the first word already; + For POST_INC / POST_DEC / POST_MODIFY, the access to the + first word will be done later. In either case, the access + to the first word will do the modify, and we only have + to add an offset of four here. */ + if (GET_CODE (XEXP (x, 0)) == PRE_INC + || GET_CODE (XEXP (x, 0)) == PRE_DEC + || GET_CODE (XEXP (x, 0)) == PRE_MODIFY + || GET_CODE (XEXP (x, 0)) == POST_INC + || GET_CODE (XEXP (x, 0)) == POST_DEC + || GET_CODE (XEXP (x, 0)) == POST_MODIFY) + output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 4)); + else if (output_scaled) + { + rtx addr = XEXP (x, 0); + int size = GET_MODE_SIZE (GET_MODE (x)); + + output_address (plus_constant (Pmode, XEXP (addr, 0), + ((INTVAL (XEXP (addr, 1)) + 4) + >> (size == 2 ? 1 : 2)))); + output_scaled = 0; + } + else + output_address (plus_constant (Pmode, XEXP (x, 0), 4)); + fputc (']', file); + } + else + output_operand_lossage ("invalid operand to %%R code"); + return; + case 'S' : + /* FIXME: remove %S option. */ + break; + case 'B' /* Branch or other LIMM ref - must not use sda references. */ : + if (CONSTANT_P (x)) + { + output_addr_const (file, x); + return; + } + break; + case 'H' : + case 'L' : + if (GET_CODE (x) == REG) + { + /* L = least significant word, H = most significant word. */ + if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L')) + fputs (reg_names[REGNO (x)], file); + else + fputs (reg_names[REGNO (x)+1], file); + } + else if (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE) + { + rtx first, second; + + split_double (x, &first, &second); + + if((WORDS_BIG_ENDIAN) == 0) + fprintf (file, "0x%08lx", + code == 'L' ? INTVAL (first) : INTVAL (second)); + else + fprintf (file, "0x%08lx", + code == 'L' ? INTVAL (second) : INTVAL (first)); + + + } + else + output_operand_lossage ("invalid operand to %%H/%%L code"); + return; + case 'A' : + { + char str[30]; + + gcc_assert (GET_CODE (x) == CONST_DOUBLE + && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT); + + real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), sizeof (str), 0, 1); + fprintf (file, "%s", str); + return; + } + case 'U' : + /* Output a load/store with update indicator if appropriate. */ + if (GET_CODE (x) == MEM) + { + rtx addr = XEXP (x, 0); + switch (GET_CODE (addr)) + { + case PRE_INC: case PRE_DEC: case PRE_MODIFY: + fputs (".a", file); break; + case POST_INC: case POST_DEC: case POST_MODIFY: + fputs (".ab", file); break; + case PLUS: + /* Are we using a scaled index? */ + if (GET_CODE (XEXP (addr, 0)) == MULT) + fputs (".as", file); + /* Can we use a scaled offset? */ + else if (CONST_INT_P (XEXP (addr, 1)) + && GET_MODE_SIZE (GET_MODE (x)) > 1 + && (!(INTVAL (XEXP (addr, 1)) + & (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3)) + /* Does it make a difference? */ + && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)), + GET_MODE_SIZE (GET_MODE (x)) - 2, 0)) + { + fputs (".as", file); + output_scaled = 1; + } + break; + case REG: + break; + default: + gcc_assert (CONSTANT_P (addr)); break; + } + } + else + output_operand_lossage ("invalid operand to %%U code"); + return; + case 'V' : + /* Output cache bypass indicator for a load/store insn. Volatile memory + refs are defined to use the cache bypass mechanism. */ + if (GET_CODE (x) == MEM) + { + if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET ) + fputs (".di", file); + } + else + output_operand_lossage ("invalid operand to %%V code"); + return; + /* plt code. */ + case 'P': + case 0 : + /* Do nothing special. */ + break; + case 'F': + fputs (reg_names[REGNO (x)]+1, file); + return; + case '^': + /* This punctuation character is needed because label references are + printed in the output template using %l. This is a front end + character, and when we want to emit a '@' before it, we have to use + this '^'. */ + + fputc('@',file); + return; + case 'O': + /* Output an operator. */ + switch (GET_CODE (x)) + { + case PLUS: fputs ("add", file); return; + case SS_PLUS: fputs ("adds", file); return; + case AND: fputs ("and", file); return; + case IOR: fputs ("or", file); return; + case XOR: fputs ("xor", file); return; + case MINUS: fputs ("sub", file); return; + case SS_MINUS: fputs ("subs", file); return; + case ASHIFT: fputs ("asl", file); return; + case ASHIFTRT: fputs ("asr", file); return; + case LSHIFTRT: fputs ("lsr", file); return; + case ROTATERT: fputs ("ror", file); return; + case MULT: fputs ("mpy", file); return; + case ABS: fputs ("abs", file); return; /* Unconditional. */ + case NEG: fputs ("neg", file); return; + case SS_NEG: fputs ("negs", file); return; + case NOT: fputs ("not", file); return; /* Unconditional. */ + case ZERO_EXTEND: + fputs ("ext", file); /* bmsk allows predication. */ + goto size_suffix; + case SIGN_EXTEND: /* Unconditional. */ + fputs ("sex", file); + size_suffix: + switch (GET_MODE (XEXP (x, 0))) + { + case QImode: fputs ("b", file); return; + case HImode: fputs ("w", file); return; + default: break; + } + break; + case SS_TRUNCATE: + if (GET_MODE (x) != HImode) + break; + fputs ("sat16", file); + default: break; + } + output_operand_lossage ("invalid operand to %%O code"); return; + case 'o': + if (GET_CODE (x) == SYMBOL_REF) + { + assemble_name (file, XSTR (x, 0)); + return; + } + break; + case '&': + if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason) + fprintf (file, "; unalign: %d", cfun->machine->unalign); + return; + default : + /* Unknown flag. */ + output_operand_lossage ("invalid operand output code"); + } + + switch (GET_CODE (x)) + { + case REG : + fputs (reg_names[REGNO (x)], file); + break; + case MEM : + { + rtx addr = XEXP (x, 0); + int size = GET_MODE_SIZE (GET_MODE (x)); + + fputc ('[', file); + + switch (GET_CODE (addr)) + { + case PRE_INC: case POST_INC: + output_address (plus_constant (Pmode, XEXP (addr, 0), size)); break; + case PRE_DEC: case POST_DEC: + output_address (plus_constant (Pmode, XEXP (addr, 0), -size)); + break; + case PRE_MODIFY: case POST_MODIFY: + output_address (XEXP (addr, 1)); break; + case PLUS: + if (output_scaled) + { + output_address (plus_constant (Pmode, XEXP (addr, 0), + (INTVAL (XEXP (addr, 1)) + >> (size == 2 ? 1 : 2)))); + output_scaled = 0; + } + else + output_address (addr); + break; + default: + if (flag_pic && CONSTANT_ADDRESS_P (addr)) + arc_output_pic_addr_const (file, addr, code); + else + output_address (addr); + break; + } + fputc (']', file); + break; + } + case CONST_DOUBLE : + /* We handle SFmode constants here as output_addr_const doesn't. */ + if (GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE d; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (d, x); + REAL_VALUE_TO_TARGET_SINGLE (d, l); + fprintf (file, "0x%08lx", l); + break; + } + /* Fall through. Let output_addr_const deal with it. */ + default : + if (flag_pic) + arc_output_pic_addr_const (file, x, code); + else + { + /* FIXME: Dirty way to handle @var@sda+const. Shd be handled + with asm_output_symbol_ref */ + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + { + x = XEXP (x, 0); + output_addr_const (file, XEXP (x, 0)); + if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0))) + fprintf (file, "@sda"); + + if (GET_CODE (XEXP (x, 1)) != CONST_INT + || INTVAL (XEXP (x, 1)) >= 0) + fprintf (file, "+"); + output_addr_const (file, XEXP (x, 1)); + } + else + output_addr_const (file, x); + } + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) + fprintf (file, "@sda"); + break; + } +} + +/* Print a memory address as an operand to reference that memory location. */ + +void +arc_print_operand_address (FILE *file , rtx addr) +{ + register rtx base, index = 0; + + switch (GET_CODE (addr)) + { + case REG : + fputs (reg_names[REGNO (addr)], file); + break; + case SYMBOL_REF : + output_addr_const (file, addr); + if (SYMBOL_REF_SMALL_P (addr)) + fprintf (file, "@sda"); + break; + case PLUS : + if (GET_CODE (XEXP (addr, 0)) == MULT) + index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1); + else if (CONST_INT_P (XEXP (addr, 0))) + index = XEXP (addr, 0), base = XEXP (addr, 1); + else + base = XEXP (addr, 0), index = XEXP (addr, 1); + + gcc_assert (OBJECT_P (base)); + arc_print_operand_address (file, base); + if (CONSTANT_P (base) && CONST_INT_P (index)) + fputc ('+', file); + else + fputc (',', file); + gcc_assert (OBJECT_P (index)); + arc_print_operand_address (file, index); + break; + case CONST: + { + rtx c = XEXP (addr, 0); + + gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF); + gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT); + + output_address(XEXP(addr,0)); + + break; + } + case PRE_INC : + case PRE_DEC : + /* We shouldn't get here as we've lost the mode of the memory object + (which says how much to inc/dec by. */ + gcc_unreachable (); + break; + default : + if (flag_pic) + arc_output_pic_addr_const (file, addr, 0); + else + output_addr_const (file, addr); + break; + } +} + +/* Called via walk_stores. DATA points to a hash table we can use to + establish a unique SYMBOL_REF for each counter, which corresponds to + a caller-callee pair. + X is a store which we want to examine for an UNSPEC_PROF, which + would be an address loaded into a register, or directly used in a MEM. + If we found an UNSPEC_PROF, if we encounter a new counter the first time, + write out a description and a data allocation for a 32 bit counter. + Also, fill in the appropriate symbol_ref into each UNSPEC_PROF instance. */ + +static void +write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data) +{ + rtx *srcp, src; + htab_t htab = (htab_t) data; + rtx *slot; + + if (GET_CODE (x) != SET) + return; + srcp = &SET_SRC (x); + if (MEM_P (*srcp)) + srcp = &XEXP (*srcp, 0); + else if (MEM_P (SET_DEST (x))) + srcp = &XEXP (SET_DEST (x), 0); + src = *srcp; + if (GET_CODE (src) != CONST) + return; + src = XEXP (src, 0); + if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF) + return; + + gcc_assert (XVECLEN (src, 0) == 3); + if (!htab_elements (htab)) + { + output_asm_insn (".section .__arc_profile_desc, \"a\"\n" + "\t.long %0 + 1\n", + &XVECEXP (src, 0, 0)); + } + slot = (rtx *) htab_find_slot (htab, src, INSERT); + if (*slot == HTAB_EMPTY_ENTRY) + { + static int count_nr; + char buf[24]; + rtx count; + + *slot = src; + sprintf (buf, "__prof_count%d", count_nr++); + count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf)); + XVECEXP (src, 0, 2) = count; + output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n" + "\t.long\t%1\n" + "\t.section\t.__arc_profile_counters, \"aw\"\n" + "\t.type\t%o2, @object\n" + "\t.size\t%o2, 4\n" + "%o2:\t.zero 4", + &XVECEXP (src, 0, 0)); + *srcp = count; + } + else + *srcp = XVECEXP (*slot, 0, 2); +} + +/* Hash function for UNSPEC_PROF htab. Use both the caller's name and + the callee's name (if known). */ + +static hashval_t +unspec_prof_hash (const void *x) +{ + const_rtx u = (const_rtx) x; + const_rtx s1 = XVECEXP (u, 0, 1); + + return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0)) + ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0)); +} + +/* Equality function for UNSPEC_PROF htab. Two pieces of UNSPEC_PROF rtl + shall refer to the same counter if both caller name and callee rtl + are identical. */ + +static int +unspec_prof_htab_eq (const void *x, const void *y) +{ + const_rtx u0 = (const_rtx) x; + const_rtx u1 = (const_rtx) y; + const_rtx s01 = XVECEXP (u0, 0, 1); + const_rtx s11 = XVECEXP (u1, 0, 1); + + return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0), + XSTR (XVECEXP (u1, 0, 0), 0)) + && rtx_equal_p (s01, s11)); +} + +/* Conditional execution support. + + This is based on the ARM port but for now is much simpler. + + A finite state machine takes care of noticing whether or not instructions + can be conditionally executed, and thus decrease execution time and code + size by deleting branch instructions. The fsm is controlled by + arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the + actions of PRINT_OPERAND. The patterns in the .md file for the branch + insns also have a hand in this. */ +/* The way we leave dealing with non-anulled or annull-false delay slot + insns to the consumer is awkward. */ + +/* The state of the fsm controlling condition codes are: + 0: normal, do nothing special + 1: don't output this insn + 2: don't output this insn + 3: make insns conditional + 4: make insns conditional + 5: make insn conditional (only for outputting anulled delay slot insns) + + special value for cfun->machine->uid_ccfsm_state: + 6: return with but one insn before it since function start / call + + State transitions (state->state by whom, under what condition): + 0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over + some instructions. + 0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed + by zero or more non-jump insns and an unconditional branch with + the same target label as the condbranch. + 1 -> 3 branch patterns, after having not output the conditional branch + 2 -> 4 branch patterns, after having not output the conditional branch + 0 -> 5 branch patterns, for anulled delay slot insn. + 3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached + (the target label has CODE_LABEL_NUMBER equal to + arc_ccfsm_target_label). + 4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached + 3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns. + 5 -> 0 when outputting the delay slot insn + + If the jump clobbers the conditions then we use states 2 and 4. + + A similar thing can be done with conditional return insns. + + We also handle separating branches from sets of the condition code. + This is done here because knowledge of the ccfsm state is required, + we may not be outputting the branch. */ + +/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current, + before letting final output INSN. */ + +static void +arc_ccfsm_advance (rtx insn, struct arc_ccfsm *state) +{ + /* BODY will hold the body of INSN. */ + register rtx body; + + /* This will be 1 if trying to repeat the trick (ie: do the `else' part of + an if/then/else), and things need to be reversed. */ + int reverse = 0; + + /* If we start with a return insn, we only succeed if we find another one. */ + int seeking_return = 0; + + /* START_INSN will hold the insn from where we start looking. This is the + first insn after the following code_label if REVERSE is true. */ + rtx start_insn = insn; + + /* Type of the jump_insn. Brcc insns don't affect ccfsm changes, + since they don't rely on a cmp preceding the. */ + enum attr_type jump_insn_type; + + /* Allow -mdebug-ccfsm to turn this off so we can see how well it does. + We can't do this in macro FINAL_PRESCAN_INSN because its called from + final_scan_insn which has `optimize' as a local. */ + if (optimize < 2 || TARGET_NO_COND_EXEC) + return; + + /* Ignore notes and labels. */ + if (!INSN_P (insn)) + return; + body = PATTERN (insn); + /* If in state 4, check if the target branch is reached, in order to + change back to state 0. */ + if (state->state == 4) + { + if (insn == state->target_insn) + { + state->target_insn = NULL; + state->state = 0; + } + return; + } + + /* If in state 3, it is possible to repeat the trick, if this insn is an + unconditional branch to a label, and immediately following this branch + is the previous target label which is only used once, and the label this + branch jumps to is not too far off. Or in other words "we've done the + `then' part, see if we can do the `else' part." */ + if (state->state == 3) + { + if (simplejump_p (insn)) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + { + /* ??? Isn't this always a barrier? */ + start_insn = next_nonnote_insn (start_insn); + } + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == state->target_label + && LABEL_NUSES (start_insn) == 1) + reverse = TRUE; + else + return; + } + else if (GET_CODE (body) == SIMPLE_RETURN) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == CODE_LABEL + && CODE_LABEL_NUMBER (start_insn) == state->target_label + && LABEL_NUSES (start_insn) == 1) + { + reverse = TRUE; + seeking_return = 1; + } + else + return; + } + else + return; + } + + if (GET_CODE (insn) != JUMP_INSN + || GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + return; + + /* We can't predicate BRCC or loop ends. + Also, when generating PIC code, and considering a medium range call, + we can't predicate the call. */ + jump_insn_type = get_attr_type (insn); + if (jump_insn_type == TYPE_BRCC + || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT + || jump_insn_type == TYPE_LOOP_END + || (jump_insn_type == TYPE_CALL && !get_attr_predicable (insn))) + return; + + /* This jump might be paralleled with a clobber of the condition codes, + the jump should always come first. */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) + body = XVECEXP (body, 0, 0); + + if (reverse + || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) + { + int insns_skipped = 0, fail = FALSE, succeed = FALSE; + /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ + int then_not_else = TRUE; + /* Nonzero if next insn must be the target label. */ + int next_must_be_target_label_p; + rtx this_insn = start_insn, label = 0; + + /* Register the insn jumped to. */ + if (reverse) + { + if (!seeking_return) + label = XEXP (SET_SRC (body), 0); + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) + label = XEXP (XEXP (SET_SRC (body), 1), 0); + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) + { + label = XEXP (XEXP (SET_SRC (body), 2), 0); + then_not_else = FALSE; + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == SIMPLE_RETURN) + seeking_return = 1; + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == SIMPLE_RETURN) + { + seeking_return = 1; + then_not_else = FALSE; + } + else + gcc_unreachable (); + + /* If this is a non-annulled branch with a delay slot, there is + no need to conditionalize the delay slot. */ + if (NEXT_INSN (PREV_INSN (insn)) != insn + && state->state == 0 && !INSN_ANNULLED_BRANCH_P (insn)) + { + this_insn = NEXT_INSN (this_insn); + gcc_assert (NEXT_INSN (NEXT_INSN (PREV_INSN (start_insn))) + == NEXT_INSN (this_insn)); + } + /* See how many insns this branch skips, and what kind of insns. If all + insns are okay, and the label or unconditional branch to the same + label is not too far away, succeed. */ + for (insns_skipped = 0, next_must_be_target_label_p = FALSE; + !fail && !succeed && insns_skipped < MAX_INSNS_SKIPPED; + insns_skipped++) + { + rtx scanbody; + + this_insn = next_nonnote_insn (this_insn); + if (!this_insn) + break; + + if (next_must_be_target_label_p) + { + if (GET_CODE (this_insn) == BARRIER) + continue; + if (GET_CODE (this_insn) == CODE_LABEL + && this_insn == label) + { + state->state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + } + + scanbody = PATTERN (this_insn); + + switch (GET_CODE (this_insn)) + { + case CODE_LABEL: + /* Succeed if it is the target label, otherwise fail since + control falls in from somewhere else. */ + if (this_insn == label) + { + state->state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case BARRIER: + /* Succeed if the following insn is the target label. + Otherwise fail. + If return insns are used then the last insn in a function + will be a barrier. */ + next_must_be_target_label_p = TRUE; + break; + + case CALL_INSN: + /* Can handle a call insn if there are no insns after it. + IE: The next "insn" is the target label. We don't have to + worry about delay slots as such insns are SEQUENCE's inside + INSN's. ??? It is possible to handle such insns though. */ + if (get_attr_cond (this_insn) == COND_CANUSE) + next_must_be_target_label_p = TRUE; + else + fail = TRUE; + break; + + case JUMP_INSN: + /* If this is an unconditional branch to the same label, succeed. + If it is to another label, do nothing. If it is conditional, + fail. */ + /* ??? Probably, the test for the SET and the PC are + unnecessary. */ + + if (GET_CODE (scanbody) == SET + && GET_CODE (SET_DEST (scanbody)) == PC) + { + if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF + && XEXP (SET_SRC (scanbody), 0) == label && !reverse) + { + state->state = 2; + succeed = TRUE; + } + else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) + fail = TRUE; + else if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + else if (GET_CODE (scanbody) == SIMPLE_RETURN + && seeking_return) + { + state->state = 2; + succeed = TRUE; + } + else if (GET_CODE (scanbody) == PARALLEL) + { + if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + break; + + case INSN: + /* We can only do this with insns that can use the condition + codes (and don't set them). */ + if (GET_CODE (scanbody) == SET + || GET_CODE (scanbody) == PARALLEL) + { + if (get_attr_cond (this_insn) != COND_CANUSE) + fail = TRUE; + } + /* We can't handle other insns like sequences. */ + else + fail = TRUE; + break; + + default: + break; + } + } + + if (succeed) + { + if ((!seeking_return) && (state->state == 1 || reverse)) + state->target_label = CODE_LABEL_NUMBER (label); + else if (seeking_return || state->state == 2) + { + while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) + { + this_insn = next_nonnote_insn (this_insn); + + gcc_assert (!this_insn || + (GET_CODE (this_insn) != BARRIER + && GET_CODE (this_insn) != CODE_LABEL)); + } + if (!this_insn) + { + /* Oh dear! we ran off the end, give up. */ + extract_insn_cached (insn); + state->state = 0; + state->target_insn = NULL; + return; + } + state->target_insn = this_insn; + } + else + gcc_unreachable (); + + /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from + what it was. */ + if (!reverse) + { + state->cond = XEXP (SET_SRC (body), 0); + state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0)); + } + + if (reverse || then_not_else) + state->cc = ARC_INVERSE_CONDITION_CODE (state->cc); + } + + /* Restore recog_operand. Getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call; since the insn has been recognized already we + call insn_extract direct. */ + extract_insn_cached (insn); + } +} + +/* Record that we are currently outputting label NUM with prefix PREFIX. + It it's the label we're looking for, reset the ccfsm machinery. + + Called from ASM_OUTPUT_INTERNAL_LABEL. */ + +static void +arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state) +{ + if (state->state == 3 && state->target_label == num + && !strcmp (prefix, "L")) + { + state->state = 0; + state->target_insn = NULL_RTX; + } +} + +/* We are considering a conditional branch with the condition COND. + Check if we want to conditionalize a delay slot insn, and if so modify + the ccfsm state accordingly. + REVERSE says branch will branch when the condition is false. */ +void +arc_ccfsm_record_condition (rtx cond, bool reverse, rtx jump, + struct arc_ccfsm *state) +{ + rtx seq_insn = NEXT_INSN (PREV_INSN (jump)); + if (!state) + state = &arc_ccfsm_current; + + gcc_assert (state->state == 0); + if (seq_insn != jump) + { + rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1); + + if (!INSN_DELETED_P (insn) + && INSN_ANNULLED_BRANCH_P (jump) + && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn))) + { + state->cond = cond; + state->cc = get_arc_condition_code (cond); + if (!reverse) + arc_ccfsm_current.cc + = ARC_INVERSE_CONDITION_CODE (state->cc); + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == COND_EXEC) + gcc_assert ((INSN_FROM_TARGET_P (insn) + ? ARC_INVERSE_CONDITION_CODE (state->cc) : state->cc) + == get_arc_condition_code (XEXP (pat, 0))); + else + state->state = 5; + } + } +} + +/* Update *STATE as we would when we emit INSN. */ + +static void +arc_ccfsm_post_advance (rtx insn, struct arc_ccfsm *state) +{ + enum attr_type type; + + if (LABEL_P (insn)) + arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state); + else if (JUMP_P (insn) + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC + && ((type = get_attr_type (insn)) == TYPE_BRANCH + || (type == TYPE_UNCOND_BRANCH + /* ??? Maybe should also handle TYPE_RETURN here, + but we don't have a testcase for that. */ + && ARC_CCFSM_BRANCH_DELETED_P (state)))) + { + if (ARC_CCFSM_BRANCH_DELETED_P (state)) + ARC_CCFSM_RECORD_BRANCH_DELETED (state); + else + { + rtx src = SET_SRC (PATTERN (insn)); + arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx, + insn, state); + } + } + else if (arc_ccfsm_current.state == 5) + arc_ccfsm_current.state = 0; +} + +/* Return true if the current insn, which is a conditional branch, is to be + deleted. */ + +bool +arc_ccfsm_branch_deleted_p (void) +{ + return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current); +} + +/* Record a branch isn't output because subsequent insns can be + conditionalized. */ + +void +arc_ccfsm_record_branch_deleted (void) +{ + ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current); +} + +/* During insn output, indicate if the current insn is predicated. */ + +bool +arc_ccfsm_cond_exec_p (void) +{ + return (cfun->machine->prescan_initialized + && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current)); +} + +/* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC, + and look inside SEQUENCEs. */ + +static rtx +arc_next_active_insn (rtx insn, struct arc_ccfsm *statep) +{ + rtx pat; + + do + { + if (statep) + arc_ccfsm_post_advance (insn, statep); + insn = NEXT_INSN (insn); + if (!insn || BARRIER_P (insn)) + return NULL_RTX; + if (statep) + arc_ccfsm_advance (insn, statep); + } + while (NOTE_P (insn) + || (cfun->machine->arc_reorg_started + && LABEL_P (insn) && !label_to_alignment (insn)) + || (NONJUMP_INSN_P (insn) + && (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER))); + if (!LABEL_P (insn)) + { + gcc_assert (INSN_P (insn)); + pat = PATTERN (insn); + if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC) + return NULL_RTX; + if (GET_CODE (pat) == SEQUENCE) + return XVECEXP (pat, 0, 0); + } + return insn; +} + +/* When deciding if an insn should be output short, we want to know something + about the following insns: + - if another insn follows which we know we can output as a short insn + before an alignment-sensitive point, we can output this insn short: + the decision about the eventual alignment can be postponed. + - if a to-be-aligned label comes next, we should output this insn such + as to get / preserve 4-byte alignment. + - if a likely branch without delay slot insn, or a call with an immediately + following short insn comes next, we should out output this insn such as to + get / preserve 2 mod 4 unalignment. + - do the same for a not completely unlikely branch with a short insn + following before any other branch / label. + - in order to decide if we are actually looking at a branch, we need to + call arc_ccfsm_advance. + - in order to decide if we are looking at a short insn, we should know + if it is conditionalized. To a first order of approximation this is + the case if the state from arc_ccfsm_advance from before this insn + indicates the insn is conditionalized. However, a further refinement + could be to not conditionalize an insn if the destination register(s) + is/are dead in the non-executed case. */ +/* Return non-zero if INSN should be output as a short insn. UNALIGN is + zero if the current insn is aligned to a 4-byte-boundary, two otherwise. + If CHECK_ATTR is greater than 0, check the iscompact attribute first. */ + +int +arc_verify_short (rtx insn, int, int check_attr) +{ + enum attr_iscompact iscompact; + struct machine_function *machine; + + if (check_attr > 0) + { + iscompact = get_attr_iscompact (insn); + if (iscompact == ISCOMPACT_FALSE) + return 0; + } + machine = cfun->machine; + + if (machine->force_short_suffix >= 0) + return machine->force_short_suffix; + + return (get_attr_length (insn) & 2) != 0; +} + +/* When outputting an instruction (alternative) that can potentially be short, + output the short suffix if the insn is in fact short, and update + cfun->machine->unalign accordingly. */ + +static void +output_short_suffix (FILE *file) +{ + rtx insn = current_output_insn; + + if (arc_verify_short (insn, cfun->machine->unalign, 1)) + { + fprintf (file, "_s"); + cfun->machine->unalign ^= 2; + } + /* Restore recog_operand. */ + extract_insn_cached (insn); +} + +/* Implement FINAL_PRESCAN_INSN. */ + +void +arc_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED, + int noperands ATTRIBUTE_UNUSED) +{ + if (TARGET_DUMPISIZE) + fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); + + /* Output a nop if necessary to prevent a hazard. + Don't do this for delay slots: inserting a nop would + alter semantics, and the only time we would find a hazard is for a + call function result - and in that case, the hazard is spurious to + start with. */ + if (PREV_INSN (insn) + && PREV_INSN (NEXT_INSN (insn)) == insn + && arc_hazard (prev_real_insn (insn), insn)) + { + current_output_insn = + emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn))); + final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL); + current_output_insn = insn; + } + /* Restore extraction data which might have been clobbered by arc_hazard. */ + extract_constrain_insn_cached (insn); + + if (!cfun->machine->prescan_initialized) + { + /* Clear lingering state from branch shortening. */ + memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current); + cfun->machine->prescan_initialized = 1; + } + arc_ccfsm_advance (insn, &arc_ccfsm_current); + + cfun->machine->size_reason = 0; +} + +/* Given FROM and TO register numbers, say whether this elimination is allowed. + Frame pointer elimination is automatically handled. + + All eliminations are permissible. If we need a frame + pointer, we must eliminate ARG_POINTER_REGNUM into + FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ + +static bool +arc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return to == FRAME_POINTER_REGNUM || !arc_frame_pointer_required (); +} + +/* Define the offset between two registers, one to be eliminated, and + the other its replacement, at the start of a routine. */ + +int +arc_initial_elimination_offset (int from, int to) +{ + if (! cfun->machine->frame_info.initialized) + arc_compute_frame_size (get_frame_size ()); + + if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM) + { + return (cfun->machine->frame_info.extra_size + + cfun->machine->frame_info.reg_size); + } + + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + { + return (cfun->machine->frame_info.total_size + - cfun->machine->frame_info.pretend_size); + } + + if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM)) + { + return (cfun->machine->frame_info.total_size + - (cfun->machine->frame_info.pretend_size + + cfun->machine->frame_info.extra_size + + cfun->machine->frame_info.reg_size)); + } + + gcc_unreachable (); +} + +static bool +arc_frame_pointer_required (void) +{ + return cfun->calls_alloca; +} + + +/* Return the destination address of a branch. */ + +int +branch_dest (rtx branch) +{ + rtx pat = PATTERN (branch); + rtx dest = (GET_CODE (pat) == PARALLEL + ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat)); + int dest_uid; + + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1); + + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + + return INSN_ADDRESSES (dest_uid); +} + + +/* Implement TARGET_ENCODE_SECTION_INFO hook. */ + +static void +arc_encode_section_info (tree decl, rtx rtl, int first) +{ + /* For sdata, SYMBOL_FLAG_LOCAL and SYMBOL_FLAG_FUNCTION. + This clears machine specific flags, so has to come first. */ + default_encode_section_info (decl, rtl, first); + + /* Check if it is a function, and whether it has the + [long/medium/short]_call attribute specified. */ + if (TREE_CODE (decl) == FUNCTION_DECL) + { + rtx symbol = XEXP (rtl, 0); + int flags = SYMBOL_REF_FLAGS (symbol); + + tree attr = (TREE_TYPE (decl) != error_mark_node + ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE); + tree long_call_attr = lookup_attribute ("long_call", attr); + tree medium_call_attr = lookup_attribute ("medium_call", attr); + tree short_call_attr = lookup_attribute ("short_call", attr); + + if (long_call_attr != NULL_TREE) + flags |= SYMBOL_FLAG_LONG_CALL; + else if (medium_call_attr != NULL_TREE) + flags |= SYMBOL_FLAG_MEDIUM_CALL; + else if (short_call_attr != NULL_TREE) + flags |= SYMBOL_FLAG_SHORT_CALL; + + SYMBOL_REF_FLAGS (symbol) = flags; + } +} + +/* This is how to output a definition of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. */ + +static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno) +{ + if (cfun) + arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current); + default_internal_label (stream, prefix, labelno); +} + +/* Set the cpu type and print out other fancy things, + at the top of the file. */ + +static void arc_file_start (void) +{ + default_file_start (); + fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string); +} + +/* Cost functions. */ + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +arc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, + int *total, bool speed) +{ + switch (code) + { + /* Small integers are as cheap as registers. */ + case CONST_INT: + { + bool nolimm = false; /* Can we do without long immediate? */ + bool fast = false; /* Is the result available immediately? */ + bool condexec = false; /* Does this allow conditiobnal execution? */ + bool compact = false; /* Is a 16 bit opcode available? */ + /* CONDEXEC also implies that we can have an unconditional + 3-address operation. */ + + nolimm = compact = condexec = false; + if (UNSIGNED_INT6 (INTVAL (x))) + nolimm = condexec = compact = true; + else + { + if (SMALL_INT (INTVAL (x))) + nolimm = fast = true; + switch (outer_code) + { + case AND: /* bclr, bmsk, ext[bw] */ + if (satisfies_constraint_Ccp (x) /* bclr */ + || satisfies_constraint_C1p (x) /* bmsk */) + nolimm = fast = condexec = compact = true; + break; + case IOR: /* bset */ + if (satisfies_constraint_C0p (x)) /* bset */ + nolimm = fast = condexec = compact = true; + break; + case XOR: + if (satisfies_constraint_C0p (x)) /* bxor */ + nolimm = fast = condexec = true; + break; + case SET: + if (satisfies_constraint_Crr (x)) /* ror b,u6 */ + nolimm = true; + default: + break; + } + } + /* FIXME: Add target options to attach a small cost if + condexec / compact is not true. */ + if (nolimm) + { + *total = 0; + return true; + } + } + /* FALLTHRU */ + + /* 4 byte values can be fetched as immediate constants - + let's give that the cost of an extra insn. */ + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (1); + return true; + + case CONST_DOUBLE: + { + rtx high, low; + + if (TARGET_DPFP) + { + *total = COSTS_N_INSNS (1); + return true; + } + /* FIXME: correct the order of high,low */ + split_double (x, &high, &low); + *total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high)) + + !SMALL_INT (INTVAL (low))); + return true; + } + + /* Encourage synth_mult to find a synthetic multiply when reasonable. + If we need more than 12 insns to do a multiply, then go out-of-line, + since the call overhead will be < 10% of the cost of the multiply. */ + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (TARGET_BARREL_SHIFTER) + { + /* If we want to shift a constant, we need a LIMM. */ + /* ??? when the optimizers want to know if a constant should be + hoisted, they ask for the cost of the constant. OUTER_CODE is + insufficient context for shifts since we don't know which operand + we are looking at. */ + if (CONSTANT_P (XEXP (x, 0))) + { + *total += (COSTS_N_INSNS (2) + + rtx_cost (XEXP (x, 1), (enum rtx_code) code, 0, speed)); + return true; + } + *total = COSTS_N_INSNS (1); + } + else if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total = COSTS_N_INSNS (16); + else + { + *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1))); + /* ??? want_to_gcse_p can throw negative shift counts at us, + and then panics when it gets a negative cost as result. + Seen for gcc.c-torture/compile/20020710-1.c -Os . */ + if (*total < 0) + *total = 0; + } + return false; + + case DIV: + case UDIV: + if (speed) + *total = COSTS_N_INSNS(30); + else + *total = COSTS_N_INSNS(1); + return false; + + case MULT: + if ((TARGET_DPFP && GET_MODE (x) == DFmode)) + *total = COSTS_N_INSNS (1); + else if (speed) + *total= arc_multcost; + /* We do not want synth_mult sequences when optimizing + for size. */ + else if (TARGET_MUL64_SET || (TARGET_ARC700 && !TARGET_NOMPY_SET)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (2); + return false; + case PLUS: + if (GET_CODE (XEXP (x, 0)) == MULT + && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) + { + *total += (rtx_cost (XEXP (x, 1), PLUS, 0, speed) + + rtx_cost (XEXP (XEXP (x, 0), 0), PLUS, 1, speed)); + return true; + } + return false; + case MINUS: + if (GET_CODE (XEXP (x, 1)) == MULT + && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode)) + { + *total += (rtx_cost (XEXP (x, 0), PLUS, 0, speed) + + rtx_cost (XEXP (XEXP (x, 1), 0), PLUS, 1, speed)); + return true; + } + return false; + case COMPARE: + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + + if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx + && XEXP (op0, 1) == const1_rtx) + { + /* btst / bbit0 / bbit1: + Small integers and registers are free; everything else can + be put in a register. */ + *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed) + + rtx_cost (XEXP (op0, 2), SET, 1, speed)); + return true; + } + if (GET_CODE (op0) == AND && op1 == const0_rtx + && satisfies_constraint_C1p (XEXP (op0, 1))) + { + /* bmsk.f */ + *total = rtx_cost (XEXP (op0, 0), SET, 1, speed); + return true; + } + /* add.f */ + if (GET_CODE (op1) == NEG) + { + /* op0 might be constant, the inside of op1 is rather + unlikely to be so. So swapping the operands might lower + the cost. */ + *total = (rtx_cost (op0, PLUS, 1, speed) + + rtx_cost (XEXP (op1, 0), PLUS, 0, speed)); + } + return false; + } + case EQ: case NE: + if (outer_code == IF_THEN_ELSE + && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT + && XEXP (x, 1) == const0_rtx + && XEXP (XEXP (x, 0), 1) == const1_rtx) + { + /* btst / bbit0 / bbit1: + Small integers and registers are free; everything else can + be put in a register. */ + rtx op0 = XEXP (x, 0); + + *total = (rtx_cost (XEXP (op0, 0), SET, 1, speed) + + rtx_cost (XEXP (op0, 2), SET, 1, speed)); + return true; + } + /* Fall through. */ + /* scc_insn expands into two insns. */ + case GTU: case GEU: case LEU: + if (GET_MODE (x) == SImode) + *total += COSTS_N_INSNS (1); + return false; + case LTU: /* might use adc. */ + if (GET_MODE (x) == SImode) + *total += COSTS_N_INSNS (1) - 1; + return false; + default: + return false; + } +} + +/* Return true if ADDR is an address that needs to be expressed as an + explicit sum of pcl + offset. */ + +bool +arc_legitimate_pc_offset_p (rtx addr) +{ + if (GET_CODE (addr) != CONST) + return false; + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 1)) != CONST_INT) + return false; + addr = XEXP (addr, 0); + } + return (GET_CODE (addr) == UNSPEC + && XVECLEN (addr, 0) == 1 + && XINT (addr, 1) == ARC_UNSPEC_GOT + && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF); +} + +/* Return true if ADDR is a valid pic address. + A valid pic address on arc should look like + const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT)) */ + +bool +arc_legitimate_pic_addr_p (rtx addr) +{ + if (GET_CODE (addr) == LABEL_REF) + return true; + if (GET_CODE (addr) != CONST) + return false; + + addr = XEXP (addr, 0); + + + if (GET_CODE (addr) == PLUS) + { + if (GET_CODE (XEXP (addr, 1)) != CONST_INT) + return false; + addr = XEXP (addr, 0); + } + + if (GET_CODE (addr) != UNSPEC + || XVECLEN (addr, 0) != 1) + return false; + + /* Must be @GOT or @GOTOFF. */ + if (XINT (addr, 1) != ARC_UNSPEC_GOT + && XINT (addr, 1) != ARC_UNSPEC_GOTOFF) + return false; + + if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF + && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF) + return false; + + return true; +} + + + +/* Return true if OP contains a symbol reference. */ + +static bool +symbolic_reference_mentioned_p (rtx op) +{ + register const char *fmt; + register int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return true; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return true; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return true; + } + + return false; +} + +/* Return true if OP contains a SYMBOL_REF that is not wrapped in an unspec. + If SKIP_LOCAL is true, skip symbols that bind locally. + This is used further down in this file, and, without SKIP_LOCAL, + in the addsi3 / subsi3 expanders when generating PIC code. */ + +bool +arc_raw_symbolic_reference_mentioned_p (rtx op, bool skip_local) +{ + register const char *fmt; + register int i; + + if (GET_CODE(op) == UNSPEC) + return false; + + if (GET_CODE (op) == SYMBOL_REF) + { + tree decl = SYMBOL_REF_DECL (op); + return !skip_local || !decl || !default_binds_local_p (decl); + } + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j), + skip_local)) + return true; + } + + else if (fmt[i] == 'e' + && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i), + skip_local)) + return true; + } + + return false; +} + +/* Legitimize a pic address reference in ORIG. + The return value is the legitimated address. + If OLDX is non-zero, it is the target to assign the address to first. */ + +rtx +arc_legitimize_pic_address (rtx orig, rtx oldx) +{ + rtx addr = orig; + rtx pat = orig; + rtx base; + + if (oldx == orig) + oldx = NULL; + + if (GET_CODE (addr) == LABEL_REF) + ; /* Do nothing. */ + else if (GET_CODE (addr) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (addr) + || SYMBOL_REF_LOCAL_P (addr))) + { + /* This symbol may be referenced via a displacement from the PIC + base address (@GOTOFF). */ + + /* FIXME: if we had a way to emit pc-relative adds that don't + create a GOT entry, we could do without the use of the gp register. */ + crtl->uses_pic_offset_table = 1; + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF); + pat = gen_rtx_CONST (Pmode, pat); + pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat); + + if (oldx == NULL) + oldx = gen_reg_rtx (Pmode); + + if (oldx != 0) + { + emit_move_insn (oldx, pat); + pat = oldx; + } + + } + else if (GET_CODE (addr) == SYMBOL_REF) + { + /* This symbol must be referenced via a load from the + Global Offset Table (@GOTPC). */ + + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT); + pat = gen_rtx_CONST (Pmode, pat); + pat = gen_const_mem (Pmode, pat); + + if (oldx == 0) + oldx = gen_reg_rtx (Pmode); + + emit_move_insn (oldx, pat); + pat = oldx; + } + else + { + if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + if (GET_CODE (addr) == UNSPEC) + { + /* Check that the unspec is one of the ones we generate? */ + } + else + gcc_assert (GET_CODE (addr) == PLUS); + } + + if (GET_CODE (addr) == PLUS) + { + rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); + + /* Check first to see if this is a constant offset from a @GOTOFF + symbol reference. */ + if ((GET_CODE (op0) == LABEL_REF + || (GET_CODE (op0) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (op0) + || SYMBOL_REF_LOCAL_P (op0)))) + && GET_CODE (op1) == CONST_INT) + { + /* FIXME: like above, could do without gp reference. */ + crtl->uses_pic_offset_table = 1; + pat + = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF); + pat = gen_rtx_PLUS (Pmode, pat, op1); + pat = gen_rtx_CONST (Pmode, pat); + pat = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, pat); + + if (oldx != 0) + { + emit_move_insn (oldx, pat); + pat = oldx; + } + } + else + { + base = arc_legitimize_pic_address (XEXP (addr, 0), oldx); + pat = arc_legitimize_pic_address (XEXP (addr, 1), + base == oldx ? NULL_RTX : oldx); + + if (GET_CODE (pat) == CONST_INT) + pat = plus_constant (Pmode, base, INTVAL (pat)); + else + { + if (GET_CODE (pat) == PLUS && CONSTANT_P (XEXP (pat, 1))) + { + base = gen_rtx_PLUS (Pmode, base, XEXP (pat, 0)); + pat = XEXP (pat, 1); + } + pat = gen_rtx_PLUS (Pmode, base, pat); + } + } + } + } + + return pat; +} + +/* Output address constant X to FILE, taking PIC into account. */ + +void +arc_output_pic_addr_const (FILE * file, rtx x, int code) +{ + char buf[256]; + + restart: + switch (GET_CODE (x)) + { + case PC: + if (flag_pic) + putc ('.', file); + else + gcc_unreachable (); + break; + + case SYMBOL_REF: + output_addr_const (file, x); + + /* Local functions do not get references through the PLT. */ + if (code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) + fputs ("@plt", file); + break; + + case LABEL_REF: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0))); + assemble_name (file, buf); + break; + + case CODE_LABEL: + ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); + assemble_name (file, buf); + break; + + case CONST_INT: + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case CONST: + arc_output_pic_addr_const (file, XEXP (x, 0), code); + break; + + case CONST_DOUBLE: + if (GET_MODE (x) == VOIDmode) + { + /* We can use %d if the number is one word and positive. */ + if (CONST_DOUBLE_HIGH (x)) + fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX, + CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x)); + else if (CONST_DOUBLE_LOW (x) < 0) + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x)); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); + } + else + /* We can't handle floating point constants; + PRINT_OPERAND must handle them. */ + output_operand_lossage ("floating constant misused"); + break; + + case PLUS: + /* FIXME: Not needed here. */ + /* Some assemblers need integer constants to appear last (eg masm). */ + if (GET_CODE (XEXP (x, 0)) == CONST_INT) + { + arc_output_pic_addr_const (file, XEXP (x, 1), code); + fprintf (file, "+"); + arc_output_pic_addr_const (file, XEXP (x, 0), code); + } + else if (GET_CODE (XEXP (x, 1)) == CONST_INT) + { + arc_output_pic_addr_const (file, XEXP (x, 0), code); + if (INTVAL (XEXP (x, 1)) >= 0) + fprintf (file, "+"); + arc_output_pic_addr_const (file, XEXP (x, 1), code); + } + else + gcc_unreachable(); + break; + + case MINUS: + /* Avoid outputting things like x-x or x+5-x, + since some assemblers can't handle that. */ + x = simplify_subtraction (x); + if (GET_CODE (x) != MINUS) + goto restart; + + arc_output_pic_addr_const (file, XEXP (x, 0), code); + fprintf (file, "-"); + if (GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) < 0) + { + fprintf (file, "("); + arc_output_pic_addr_const (file, XEXP (x, 1), code); + fprintf (file, ")"); + } + else + arc_output_pic_addr_const (file, XEXP (x, 1), code); + break; + + case ZERO_EXTEND: + case SIGN_EXTEND: + arc_output_pic_addr_const (file, XEXP (x, 0), code); + break; + + + case UNSPEC: + gcc_assert (XVECLEN (x, 0) == 1); + if (XINT (x, 1) == ARC_UNSPEC_GOT) + fputs ("pcl,", file); + arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code); + switch (XINT (x, 1)) + { + case ARC_UNSPEC_GOT: + fputs ("@gotpc", file); + break; + case ARC_UNSPEC_GOTOFF: + fputs ("@gotoff", file); + break; + case ARC_UNSPEC_PLT: + fputs ("@plt", file); + break; + default: + output_operand_lossage ("invalid UNSPEC as operand: %d", XINT (x,1)); + break; + } + break; + + default: + output_operand_lossage ("invalid expression as operand"); + } +} + +#define SYMBOLIC_CONST(X) \ +(GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) + +/* Emit insns to move operands[1] into operands[0]. */ + +void +emit_pic_move (rtx *operands, enum machine_mode) +{ + rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode); + + if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])) + operands[1] = force_reg (Pmode, operands[1]); + else + operands[1] = arc_legitimize_pic_address (operands[1], temp); +} + + +/* The function returning the number of words, at the beginning of an + argument, must be put in registers. The returned value must be + zero for arguments that are passed entirely in registers or that + are entirely pushed on the stack. + + On some machines, certain arguments must be passed partially in + registers and partially in memory. On these machines, typically + the first N words of arguments are passed in registers, and the + rest on the stack. If a multi-word argument (a `double' or a + structure) crosses that boundary, its first few words must be + passed in registers and the rest must be pushed. This function + tells the compiler when this occurs, and how many of the words + should go in registers. + + `FUNCTION_ARG' for these arguments should return the first register + to be used by the caller for this argument; likewise + `FUNCTION_INCOMING_ARG', for the called function. + + The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS. */ + +/* If REGNO is the least arg reg available then what is the total number of arg + regs available. */ +#define GPR_REST_ARG_REGS(REGNO) \ + ((REGNO) <= MAX_ARC_PARM_REGS ? MAX_ARC_PARM_REGS - (REGNO) : 0 ) + +/* Since arc parm regs are contiguous. */ +#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 ) + +/* Implement TARGET_ARG_PARTIAL_BYTES. */ + +static int +arc_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes = (mode == BLKmode + ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)); + int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + int arg_num = *cum; + int ret; + + arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type); + ret = GPR_REST_ARG_REGS (arg_num); + + /* ICEd at function.c:2361, and ret is copied to data->partial */ + ret = (ret >= words ? 0 : ret * UNITS_PER_WORD); + + return ret; +} + + + +/* This function is used to control a function argument is passed in a + register, and which register. + + The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes + (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE) + all of the previous arguments so far passed in registers; MODE, the + machine mode of the argument; TYPE, the data type of the argument + as a tree node or 0 if that is not known (which happens for C + support library functions); and NAMED, which is 1 for an ordinary + argument and 0 for nameless arguments that correspond to `...' in + the called function's prototype. + + The returned value should either be a `reg' RTX for the hard + register in which to pass the argument, or zero to pass the + argument on the stack. + + For machines like the Vax and 68000, where normally all arguments + are pushed, zero suffices as a definition. + + The usual way to make the ANSI library `stdarg.h' work on a machine + where some arguments are usually passed in registers, is to cause + nameless arguments to be passed on the stack instead. This is done + by making the function return 0 whenever NAMED is 0. + + You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the + definition of this function to determine if this argument is of a + type that must be passed in the stack. If `REG_PARM_STACK_SPACE' + is not defined and the function returns non-zero for such an + argument, the compiler will abort. If `REG_PARM_STACK_SPACE' is + defined, the argument will be computed in the stack and then loaded + into a register. + + The function is used to implement macro FUNCTION_ARG. */ +/* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers + and the rest are pushed. */ + +static rtx +arc_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int arg_num = *cum; + rtx ret; + const char *debstr ATTRIBUTE_UNUSED; + + arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type); + /* Return a marker for use in the call instruction. */ + if (mode == VOIDmode) + { + ret = const0_rtx; + debstr = "<0>"; + } + else if (GPR_REST_ARG_REGS (arg_num) > 0) + { + ret = gen_rtx_REG (mode, arg_num); + debstr = reg_names [arg_num]; + } + else + { + ret = NULL_RTX; + debstr = "memory"; + } + return ret; +} + +/* The function to update the summarizer variable *CUM to advance past + an argument in the argument list. The values MODE, TYPE and NAMED + describe that argument. Once this is done, the variable *CUM is + suitable for analyzing the *following* argument with + `FUNCTION_ARG', etc. + + This function need not do anything if the argument in question was + passed on the stack. The compiler knows how to track the amount of + stack space used for arguments without any special help. + + The function is used to implement macro FUNCTION_ARG_ADVANCE. */ +/* For the ARC: the cum set here is passed on to function_arg where we + look at its value and say which reg to use. Strategy: advance the + regnumber here till we run out of arg regs, then set *cum to last + reg. In function_arg, since *cum > last arg reg we would return 0 + and thus the arg will end up on the stack. For straddling args of + course function_arg_partial_nregs will come into play. */ + +static void +arc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes = (mode == BLKmode + ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode)); + int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + int i; + + if (words) + *cum = ROUND_ADVANCE_CUM (*cum, mode, type); + for (i = 0; i < words; i++) + *cum = ARC_NEXT_ARG_REG (*cum); + +} + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FN_DECL_OR_TYPE is its + FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type. */ + +static rtx +arc_function_value (const_tree valtype, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode = TYPE_MODE (valtype); + int unsignedp ATTRIBUTE_UNUSED; + + unsignedp = TYPE_UNSIGNED (valtype); + if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE) + PROMOTE_MODE (mode, unsignedp, valtype); + return gen_rtx_REG (mode, 0); +} + +/* Returns the return address that is used by builtin_return_address. */ + +rtx +arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM); +} + +/* Nonzero if the constant value X is a legitimate general operand + when generating PIC code. It is given that flag_pic is on and + that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ + +bool +arc_legitimate_pic_operand_p (rtx x) +{ + return !arc_raw_symbolic_reference_mentioned_p (x, true); +} + +/* Determine if a given RTX is a valid constant. We already know this + satisfies CONSTANT_P. */ + +bool +arc_legitimate_constant_p (enum machine_mode, rtx x) +{ + if (!flag_pic) + return true; + + switch (GET_CODE (x)) + { + case CONST: + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return false; + x = XEXP (x, 0); + } + + /* Only some unspecs are valid as "constants". */ + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + case ARC_UNSPEC_PLT: + case ARC_UNSPEC_GOTOFF: + case ARC_UNSPEC_GOT: + case UNSPEC_PROF: + return true; + + default: + gcc_unreachable (); + } + + /* We must have drilled down to a symbol. */ + if (arc_raw_symbolic_reference_mentioned_p (x, false)) + return false; + + /* Return true. */ + break; + + case LABEL_REF: + case SYMBOL_REF: + return false; + + default: + break; + } + + /* Otherwise we handle everything else in the move patterns. */ + return true; +} + +static bool +arc_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + if (RTX_OK_FOR_BASE_P (x, strict)) + return true; + if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x, TARGET_INDEXED_LOADS, strict)) + return true; + if (LEGITIMATE_SCALED_ADDRESS_P (mode, x, strict)) + return true; + if (LEGITIMATE_SMALL_DATA_ADDRESS_P (x)) + return true; + if (GET_CODE (x) == CONST_INT && LARGE_INT (INTVAL (x))) + return true; + if ((GET_MODE_SIZE (mode) != 16) + && (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF + || GET_CODE (x) == CONST)) + { + if (!flag_pic || arc_legitimate_pic_addr_p (x)) + return true; + } + if ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC + || GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC) + && RTX_OK_FOR_BASE_P (XEXP (x, 0), strict)) + return true; + /* We're restricted here by the `st' insn. */ + if ((GET_CODE (x) == PRE_MODIFY || GET_CODE (x) == POST_MODIFY) + && GET_CODE (XEXP ((x), 1)) == PLUS + && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP (x, 1), 0)) + && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP (x, 1), + TARGET_AUTO_MODIFY_REG, strict)) + return true; + return false; +} + +/* Return true iff ADDR (a legitimate address expression) + has an effect that depends on the machine mode it is used for. */ + +static bool +arc_mode_dependent_address_p (const_rtx addr, addr_space_t) +{ + /* SYMBOL_REF is not mode dependent: it is either a small data reference, + which is valid for loads and stores, or a limm offset, which is valid for + loads. */ + /* Scaled indices are scaled by the access mode; likewise for scaled + offsets, which are needed for maximum offset stores. */ + if (GET_CODE (addr) == PLUS + && (GET_CODE (XEXP ((addr), 0)) == MULT + || (CONST_INT_P (XEXP ((addr), 1)) + && !SMALL_INT (INTVAL (XEXP ((addr), 1)))))) + return true; + return false; +} + +/* Determine if it's legal to put X into the constant pool. */ + +static bool +arc_cannot_force_const_mem (enum machine_mode mode, rtx x) +{ + return !arc_legitimate_constant_p (mode, x); +} + + +/* Generic function to define a builtin. */ +#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ + do \ + { \ + if (MASK) \ + add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE); \ + } \ + while (0) + + +static void +arc_init_builtins (void) +{ + tree endlink = void_list_node; + + tree void_ftype_void + = build_function_type (void_type_node, + endlink); + + tree int_ftype_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink)); + + tree pcvoid_type_node + = build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST)); + tree int_ftype_pcvoid_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, pcvoid_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree int_ftype_short_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, short_integer_type_node, endlink)); + + tree void_ftype_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + tree void_ftype_usint_usint + = build_function_type (void_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, endlink))); + + tree int_ftype_int_int + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + + tree usint_ftype_usint + = build_function_type (long_unsigned_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, endlink)); + + tree void_ftype_usint + = build_function_type (void_type_node, + tree_cons (NULL_TREE, long_unsigned_type_node, endlink)); + + /* Add the builtins. */ + def_mbuiltin (1,"__builtin_arc_nop", void_ftype_void, ARC_BUILTIN_NOP); + def_mbuiltin (TARGET_NORM, "__builtin_arc_norm", int_ftype_int, ARC_BUILTIN_NORM); + def_mbuiltin (TARGET_NORM, "__builtin_arc_normw", int_ftype_short_int, ARC_BUILTIN_NORMW); + def_mbuiltin (TARGET_SWAP, "__builtin_arc_swap", int_ftype_int, ARC_BUILTIN_SWAP); + def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mul64", void_ftype_int_int, ARC_BUILTIN_MUL64); + def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mulu64", void_ftype_usint_usint, ARC_BUILTIN_MULU64); + def_mbuiltin (1,"__builtin_arc_rtie", void_ftype_void, ARC_BUILTIN_RTIE); + def_mbuiltin (TARGET_ARC700,"__builtin_arc_sync", void_ftype_void, ARC_BUILTIN_SYNC); + def_mbuiltin ((TARGET_EA_SET),"__builtin_arc_divaw", int_ftype_int_int, ARC_BUILTIN_DIVAW); + def_mbuiltin (1,"__builtin_arc_brk", void_ftype_void, ARC_BUILTIN_BRK); + def_mbuiltin (1,"__builtin_arc_flag", void_ftype_usint, ARC_BUILTIN_FLAG); + def_mbuiltin (1,"__builtin_arc_sleep", void_ftype_usint, ARC_BUILTIN_SLEEP); + def_mbuiltin (1,"__builtin_arc_swi", void_ftype_void, ARC_BUILTIN_SWI); + def_mbuiltin (1,"__builtin_arc_core_read", usint_ftype_usint, ARC_BUILTIN_CORE_READ); + def_mbuiltin (1,"__builtin_arc_core_write", void_ftype_usint_usint, ARC_BUILTIN_CORE_WRITE); + def_mbuiltin (1,"__builtin_arc_lr", usint_ftype_usint, ARC_BUILTIN_LR); + def_mbuiltin (1,"__builtin_arc_sr", void_ftype_usint_usint, ARC_BUILTIN_SR); + def_mbuiltin (TARGET_ARC700,"__builtin_arc_trap_s", void_ftype_usint, ARC_BUILTIN_TRAP_S); + def_mbuiltin (TARGET_ARC700,"__builtin_arc_unimp_s", void_ftype_void, ARC_BUILTIN_UNIMP_S); + def_mbuiltin (1,"__builtin_arc_aligned", int_ftype_pcvoid_int, ARC_BUILTIN_ALIGNED); + + if (TARGET_SIMD_SET) + arc_init_simd_builtins (); +} + +static rtx arc_expand_simd_builtin (tree, rtx, rtx, enum machine_mode, int); + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +arc_expand_builtin (tree exp, + rtx target, + rtx subtarget, + enum machine_mode mode, + int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0; + tree arg1; + rtx op0; + rtx op1; + int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + enum machine_mode mode0; + enum machine_mode mode1; + + if (fcode > ARC_SIMD_BUILTIN_BEGIN && fcode < ARC_SIMD_BUILTIN_END) + return arc_expand_simd_builtin (exp, target, subtarget, mode, ignore); + + switch (fcode) + { + case ARC_BUILTIN_NOP: + emit_insn (gen_nop ()); + return NULL_RTX; + + case ARC_BUILTIN_NORM: + icode = CODE_FOR_clrsbsi2; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_clrsbsi2 (target, op0)); + return target; + + case ARC_BUILTIN_NORMW: + + /* FIXME : This should all be HImode, not SImode. */ + icode = CODE_FOR_normw; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, convert_to_mode (mode0, op0,0)); + + emit_insn (gen_normw (target, op0)); + return target; + + case ARC_BUILTIN_MUL64: + icode = CODE_FOR_mul64; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + emit_insn (gen_mul64 (op0,op1)); + return NULL_RTX; + + case ARC_BUILTIN_MULU64: + icode = CODE_FOR_mulu64; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[0].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + emit_insn (gen_mulu64 (op0,op1)); + return NULL_RTX; + + case ARC_BUILTIN_RTIE: + icode = CODE_FOR_rtie; + emit_insn (gen_rtie (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_SYNC: + icode = CODE_FOR_sync; + emit_insn (gen_sync (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_SWAP: + icode = CODE_FOR_swap; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + target = gen_reg_rtx (SImode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_swap (target, op0)); + return target; + + case ARC_BUILTIN_DIVAW: + icode = CODE_FOR_divaw; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + target = gen_reg_rtx (SImode); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + emit_insn (gen_divaw (target, op0, op1)); + return target; + + case ARC_BUILTIN_BRK: + icode = CODE_FOR_brk; + emit_insn (gen_brk (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_SLEEP: + icode = CODE_FOR_sleep; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + emit_insn (gen_sleep (op0)); + return NULL_RTX; + + case ARC_BUILTIN_SWI: + icode = CODE_FOR_swi; + emit_insn (gen_swi (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_FLAG: + icode = CODE_FOR_flag; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[0].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + emit_insn (gen_flag (op0)); + return NULL_RTX; + + case ARC_BUILTIN_CORE_READ: + icode = CODE_FOR_core_read; + arg0 = CALL_EXPR_ARG (exp, 0); + target = gen_reg_rtx (SImode); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + emit_insn (gen_core_read (target, op0)); + return target; + + case ARC_BUILTIN_CORE_WRITE: + icode = CODE_FOR_core_write; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + fold (arg1); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + emit_insn (gen_core_write (op0, op1)); + return NULL_RTX; + + case ARC_BUILTIN_LR: + icode = CODE_FOR_lr; + arg0 = CALL_EXPR_ARG (exp, 0); + target = gen_reg_rtx (SImode); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + emit_insn (gen_lr (target, op0)); + return target; + + case ARC_BUILTIN_SR: + icode = CODE_FOR_sr; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + fold (arg1); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + emit_insn (gen_sr (op0, op1)); + return NULL_RTX; + + case ARC_BUILTIN_TRAP_S: + icode = CODE_FOR_trap_s; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[1].mode; + + /* We don't give an error for non-cost values here because + we still want to allow things to be fixed up by later inlining / + constant folding / dead code elimination. */ + if (CONST_INT_P (op0) && !satisfies_constraint_L (op0)) + { + /* Keep this message in sync with the one in arc.md:trap_s, + because *.md files don't get scanned by exgettext. */ + error ("operand to trap_s should be an unsigned 6-bit value"); + } + emit_insn (gen_trap_s (op0)); + return NULL_RTX; + + case ARC_BUILTIN_UNIMP_S: + icode = CODE_FOR_unimp_s; + emit_insn (gen_unimp_s (const1_rtx)); + return NULL_RTX; + + case ARC_BUILTIN_ALIGNED: + /* __builtin_arc_aligned (void* val, int alignval) */ + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + fold (arg1); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + target = gen_reg_rtx (SImode); + + if (!CONST_INT_P (op1)) + { + /* If we can't fold the alignment to a constant integer + whilst optimizing, this is probably a user error. */ + if (optimize) + warning (0, "__builtin_arc_aligned with non-constant alignment"); + } + else + { + HOST_WIDE_INT alignTest = INTVAL (op1); + /* Check alignTest is positive, and a power of two. */ + if (alignTest <= 0 || alignTest != (alignTest & -alignTest)) + { + error ("invalid alignment value for __builtin_arc_aligned"); + return NULL_RTX; + } + + if (CONST_INT_P (op0)) + { + HOST_WIDE_INT pnt = INTVAL (op0); + + if ((pnt & (alignTest - 1)) == 0) + return const1_rtx; + } + else + { + unsigned align = get_pointer_alignment (arg0); + unsigned numBits = alignTest * BITS_PER_UNIT; + + if (align && align >= numBits) + return const1_rtx; + /* Another attempt to ascertain alignment. Check the type + we are pointing to. */ + if (POINTER_TYPE_P (TREE_TYPE (arg0)) + && TYPE_ALIGN (TREE_TYPE (TREE_TYPE (arg0))) >= numBits) + return const1_rtx; + } + } + + /* Default to false. */ + return const0_rtx; + + default: + break; + } + + /* @@@ Should really do something sensible here. */ + return NULL_RTX; +} + +/* Returns true if the operands[opno] is a valid compile-time constant to be + used as register number in the code for builtins. Else it flags an error + and returns false. */ + +bool +check_if_valid_regno_const (rtx *operands, int opno) +{ + + switch (GET_CODE (operands[opno])) + { + case SYMBOL_REF : + case CONST : + case CONST_INT : + return true; + default: + error ("register number must be a compile-time constant. Try giving higher optimization levels"); + break; + } + return false; +} + +/* Check that after all the constant folding, whether the operand to + __builtin_arc_sleep is an unsigned int of 6 bits. If not, flag an error. */ + +bool +check_if_valid_sleep_operand (rtx *operands, int opno) +{ + switch (GET_CODE (operands[opno])) + { + case CONST : + case CONST_INT : + if( UNSIGNED_INT6 (INTVAL (operands[opno]))) + return true; + default: + fatal_error("operand for sleep instruction must be an unsigned 6 bit compile-time constant"); + break; + } + return false; +} + +/* Return true if it is ok to make a tail-call to DECL. */ + +static bool +arc_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, + tree exp ATTRIBUTE_UNUSED) +{ + /* Never tailcall from an ISR routine - it needs a special exit sequence. */ + if (ARC_INTERRUPT_P (arc_compute_function_type (cfun))) + return false; + + /* Everything else is ok. */ + return true; +} + +/* Output code to add DELTA to the first argument, and then jump + to FUNCTION. Used for C++ multiple inheritance. */ + +static void +arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function) +{ + int mi_delta = delta; + const char *const mi_op = mi_delta < 0 ? "sub" : "add"; + int shift = 0; + int this_regno + = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0; + rtx fnaddr; + + if (mi_delta < 0) + mi_delta = - mi_delta; + + /* Add DELTA. When possible use a plain add, otherwise load it into + a register first. */ + + while (mi_delta != 0) + { + if ((mi_delta & (3 << shift)) == 0) + shift += 2; + else + { + asm_fprintf (file, "\t%s\t%s, %s, %d\n", + mi_op, reg_names[this_regno], reg_names[this_regno], + mi_delta & (0xff << shift)); + mi_delta &= ~(0xff << shift); + shift += 8; + } + } + + /* If needed, add *(*THIS + VCALL_OFFSET) to THIS. */ + if (vcall_offset != 0) + { + /* ld r12,[this] --> temp = *this + add r12,r12,vcall_offset --> temp = *(*this + vcall_offset) + ld r12,[r12] + add this,this,r12 --> this+ = *(*this + vcall_offset) */ + asm_fprintf (file, "\tld\t%s, [%s]\n", + ARC_TEMP_SCRATCH_REG, reg_names[this_regno]); + asm_fprintf (file, "\tadd\t%s, %s, %ld\n", + ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset); + asm_fprintf (file, "\tld\t%s, [%s]\n", + ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG); + asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno], + reg_names[this_regno], ARC_TEMP_SCRATCH_REG); + } + + fnaddr = XEXP (DECL_RTL (function), 0); + + if (arc_is_longcall_p (fnaddr)) + fputs ("\tj\t", file); + else + fputs ("\tb\t", file); + assemble_name (file, XSTR (fnaddr, 0)); + fputc ('\n', file); +} + +/* Return true if a 32 bit "long_call" should be generated for + this calling SYM_REF. We generate a long_call if the function: + + a. has an __attribute__((long call)) + or b. the -mlong-calls command line switch has been specified + + However we do not generate a long call if the function has an + __attribute__ ((short_call)) or __attribute__ ((medium_call)) + + This function will be called by C fragments contained in the machine + description file. */ + +bool +arc_is_longcall_p (rtx sym_ref) +{ + if (GET_CODE (sym_ref) != SYMBOL_REF) + return false; + + return (SYMBOL_REF_LONG_CALL_P (sym_ref) + || (TARGET_LONG_CALLS_SET + && !SYMBOL_REF_SHORT_CALL_P (sym_ref) + && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref))); + +} + +/* Likewise for short calls. */ + +bool +arc_is_shortcall_p (rtx sym_ref) +{ + if (GET_CODE (sym_ref) != SYMBOL_REF) + return false; + + return (SYMBOL_REF_SHORT_CALL_P (sym_ref) + || (!TARGET_LONG_CALLS_SET && !TARGET_MEDIUM_CALLS + && !SYMBOL_REF_LONG_CALL_P (sym_ref) + && !SYMBOL_REF_MEDIUM_CALL_P (sym_ref))); + +} + +/* Emit profiling code for calling CALLEE. Return true if a special + call pattern needs to be generated. */ + +bool +arc_profile_call (rtx callee) +{ + rtx from = XEXP (DECL_RTL (current_function_decl), 0); + + if (TARGET_UCB_MCOUNT) + /* Profiling is done by instrumenting the callee. */ + return false; + + if (CONSTANT_P (callee)) + { + rtx count_ptr + = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, from, callee, + CONST0_RTX (Pmode)), + UNSPEC_PROF)); + rtx counter = gen_rtx_MEM (SImode, count_ptr); + /* ??? The increment would better be done atomically, but as there is + no proper hardware support, that would be too expensive. */ + emit_move_insn (counter, + force_reg (SImode, plus_constant (SImode, counter, 1))); + return false; + } + else + { + rtx count_list_ptr + = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (3, from, CONST0_RTX (Pmode), + CONST0_RTX (Pmode)), + UNSPEC_PROF)); + emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr); + emit_move_insn (gen_rtx_REG (Pmode, 9), callee); + return true; + } +} + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type)) + return true; + else + { + HOST_WIDE_INT size = int_size_in_bytes (type); + return (size == -1 || size > 8); + } +} + + +/* This was in rtlanal.c, and can go in there when we decide we want + to submit the change for inclusion in the GCC tree. */ +/* Like note_stores, but allow the callback to have side effects on the rtl + (like the note_stores of yore): + Call FUN on each register or MEM that is stored into or clobbered by X. + (X would be the pattern of an insn). DATA is an arbitrary pointer, + ignored by note_stores, but passed to FUN. + FUN may alter parts of the RTL. + + FUN receives three arguments: + 1. the REG, MEM, CC0 or PC being stored in or clobbered, + 2. the SET or CLOBBER rtx that does the store, + 3. the pointer DATA provided to note_stores. + + If the item being stored in or clobbered is a SUBREG of a hard register, + the SUBREG will be passed. */ + +/* For now. */ static +void +walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data) +{ + int i; + + if (GET_CODE (x) == COND_EXEC) + x = COND_EXEC_CODE (x); + + if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) + { + rtx dest = SET_DEST (x); + + while ((GET_CODE (dest) == SUBREG + && (!REG_P (SUBREG_REG (dest)) + || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER)) + || GET_CODE (dest) == ZERO_EXTRACT + || GET_CODE (dest) == STRICT_LOW_PART) + dest = XEXP (dest, 0); + + /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions, + each of whose first operand is a register. */ + if (GET_CODE (dest) == PARALLEL) + { + for (i = XVECLEN (dest, 0) - 1; i >= 0; i--) + if (XEXP (XVECEXP (dest, 0, i), 0) != 0) + (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data); + } + else + (*fun) (dest, x, data); + } + + else if (GET_CODE (x) == PARALLEL) + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + walk_stores (XVECEXP (x, 0, i), fun, data); +} + +static bool +arc_pass_by_reference (cumulative_args_t ca_v ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + return (type != 0 + && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST + || TREE_ADDRESSABLE (type))); +} + +/* Implement TARGET_CAN_USE_DOLOOP_P. */ + +static bool +arc_can_use_doloop_p (double_int iterations, double_int, + unsigned int loop_depth, bool entered_at_top) +{ + if (loop_depth > 1) + return false; + /* Setting up the loop with two sr instructions costs 6 cycles. */ + if (TARGET_ARC700 + && !entered_at_top + && iterations.high == 0 + && iterations.low > 0 + && iterations.low <= (flag_pic ? 6 : 3)) + return false; + return true; +} + +/* NULL if INSN insn is valid within a low-overhead loop. + Otherwise return why doloop cannot be applied. */ + +static const char * +arc_invalid_within_doloop (const_rtx insn) +{ + if (CALL_P (insn)) + return "Function call in the loop."; + return NULL; +} + +static int arc_reorg_in_progress = 0; + +/* ARC's machince specific reorg function. */ + +static void +arc_reorg (void) +{ + rtx insn, pattern; + rtx pc_target; + long offset; + int changed; + + cfun->machine->arc_reorg_started = 1; + arc_reorg_in_progress = 1; + + /* Emit special sections for profiling. */ + if (crtl->profile) + { + section *save_text_section; + rtx insn; + int size = get_max_uid () >> 4; + htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq, + NULL); + + save_text_section = in_section; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (NONJUMP_INSN_P (insn)) + walk_stores (PATTERN (insn), write_profile_sections, htab); + if (htab_elements (htab)) + in_section = 0; + switch_to_section (save_text_section); + htab_delete (htab); + } + + /* Link up loop ends with their loop start. */ + { + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (GET_CODE (insn) == JUMP_INSN + && recog_memoized (insn) == CODE_FOR_doloop_end_i) + { + rtx top_label + = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0); + rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label)); + rtx lp, prev = prev_nonnote_insn (top_label); + rtx lp_simple = NULL_RTX; + rtx next = NULL_RTX; + rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0); + HOST_WIDE_INT loop_end_id + = -INTVAL (XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)); + int seen_label = 0; + + for (lp = prev; + (lp && NONJUMP_INSN_P (lp) + && recog_memoized (lp) != CODE_FOR_doloop_begin_i); + lp = prev_nonnote_insn (lp)) + ; + if (!lp || !NONJUMP_INSN_P (lp) + || dead_or_set_regno_p (lp, LP_COUNT)) + { + for (prev = next = insn, lp = NULL_RTX ; prev || next;) + { + if (prev) + { + if (NONJUMP_INSN_P (prev) + && recog_memoized (prev) == CODE_FOR_doloop_begin_i + && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0)) + == loop_end_id)) + { + lp = prev; + break; + } + else if (LABEL_P (prev)) + seen_label = 1; + prev = prev_nonnote_insn (prev); + } + if (next) + { + if (NONJUMP_INSN_P (next) + && recog_memoized (next) == CODE_FOR_doloop_begin_i + && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0)) + == loop_end_id)) + { + lp = next; + break; + } + next = next_nonnote_insn (next); + } + } + prev = NULL_RTX; + } + else + lp_simple = lp; + if (lp && !dead_or_set_regno_p (lp, LP_COUNT)) + { + rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0); + if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0))) + /* The loop end insn has been duplicated. That can happen + when there is a conditional block at the very end of + the loop. */ + goto failure; + /* If Register allocation failed to allocate to the right + register, There is no point into teaching reload to + fix this up with reloads, as that would cost more + than using an ordinary core register with the + doloop_fallback pattern. */ + if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt)) + /* Likewise, if the loop setup is evidently inside the loop, + we loose. */ + || (!lp_simple && lp != next && !seen_label)) + { + remove_insn (lp); + goto failure; + } + /* It is common that the optimizers copy the loop count from + another register, and doloop_begin_i is stuck with the + source of the move. Making doloop_begin_i only accept "l" + is nonsentical, as this then makes reload evict the pseudo + used for the loop end. The underlying cause is that the + optimizers don't understand that the register allocation for + doloop_begin_i should be treated as part of the loop. + Try to work around this problem by verifying the previous + move exists. */ + if (true_regnum (begin_cnt) != LP_COUNT) + { + rtx mov, set, note; + + for (mov = prev_nonnote_insn (lp); mov; + mov = prev_nonnote_insn (mov)) + { + if (!NONJUMP_INSN_P (mov)) + mov = 0; + else if ((set = single_set (mov)) + && rtx_equal_p (SET_SRC (set), begin_cnt) + && rtx_equal_p (SET_DEST (set), op0)) + break; + } + if (mov) + { + XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0; + note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt)); + if (note) + remove_note (lp, note); + } + else + { + remove_insn (lp); + goto failure; + } + } + XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num; + XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num; + if (next == lp) + XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx; + else if (!lp_simple) + XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx; + else if (prev != lp) + { + remove_insn (lp); + add_insn_after (lp, prev, NULL); + } + if (!lp_simple) + { + XEXP (XVECEXP (PATTERN (lp), 0, 7), 0) + = gen_rtx_LABEL_REF (Pmode, top_label); + add_reg_note (lp, REG_LABEL_OPERAND, top_label); + LABEL_NUSES (top_label)++; + } + /* We can avoid tedious loop start / end setting for empty loops + be merely setting the loop count to its final value. */ + if (next_active_insn (top_label) == insn) + { + rtx lc_set + = gen_rtx_SET (VOIDmode, + XEXP (XVECEXP (PATTERN (lp), 0, 3), 0), + const0_rtx); + + lc_set = emit_insn_before (lc_set, insn); + delete_insn (lp); + delete_insn (insn); + insn = lc_set; + } + /* If the loop is non-empty with zero length, we can't make it + a zero-overhead loop. That can happen for empty asms. */ + else + { + rtx scan; + + for (scan = top_label; + (scan && scan != insn + && (!NONJUMP_INSN_P (scan) || !get_attr_length (scan))); + scan = NEXT_INSN (scan)); + if (scan == insn) + { + remove_insn (lp); + goto failure; + } + } + } + else + { + /* Sometimes the loop optimizer makes a complete hash of the + loop. If it were only that the loop is not entered at the + top, we could fix this up by setting LP_START with SR . + However, if we can't find the loop begin were it should be, + chances are that it does not even dominate the loop, but is + inside the loop instead. Using SR there would kill + performance. + We use the doloop_fallback pattern here, which executes + in two cycles on the ARC700 when predicted correctly. */ + failure: + if (!REG_P (op0)) + { + rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0); + + emit_insn_before (gen_move_insn (op3, op0), insn); + PATTERN (insn) + = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0); + } + else + XVEC (PATTERN (insn), 0) + = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0), + XVECEXP (PATTERN (insn), 0, 1)); + INSN_CODE (insn) = -1; + } + } + } + +/* FIXME: should anticipate ccfsm action, generate special patterns for + to-be-deleted branches that have no delay slot and have at least the + length of the size increase forced on other insns that are conditionalized. + This can also have an insn_list inside that enumerates insns which are + not actually conditionalized because the destinations are dead in the + not-execute case. + Could also tag branches that we want to be unaligned if they get no delay + slot, or even ones that we don't want to do delay slot sheduling for + because we can unalign them. + + However, there are cases when conditional execution is only possible after + delay slot scheduling: + + - If a delay slot is filled with a nocond/set insn from above, the previous + basic block can become elegible for conditional execution. + - If a delay slot is filled with a nocond insn from the fall-through path, + the branch with that delay slot can become eligble for conditional + execution (however, with the same sort of data flow analysis that dbr + does, we could have figured out before that we don't need to + conditionalize this insn.) + - If a delay slot insn is filled with an insn from the target, the + target label gets its uses decremented (even deleted if falling to zero), + thus possibly creating more condexec opportunities there. + Therefore, we should still be prepared to apply condexec optimization on + non-prepared branches if the size increase of conditionalized insns is no + more than the size saved from eliminating the branch. An invocation option + could also be used to reserve a bit of extra size for condbranches so that + this'll work more often (could also test in arc_reorg if the block is + 'close enough' to be eligible for condexec to make this likely, and + estimate required size increase). */ + /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible. */ + if (TARGET_NO_BRCC_SET) + return; + + do + { + init_insn_lengths(); + changed = 0; + + if (optimize > 1 && !TARGET_NO_COND_EXEC) + { + arc_ifcvt (); + unsigned int flags = pass_data_arc_ifcvt.todo_flags_finish; + df_finish_pass ((flags & TODO_df_verify) != 0); + } + + /* Call shorten_branches to calculate the insn lengths. */ + shorten_branches (get_insns()); + cfun->machine->ccfsm_current_insn = NULL_RTX; + + if (!INSN_ADDRESSES_SET_P()) + fatal_error ("Insn addresses not set after shorten_branches"); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx label; + enum attr_type insn_type; + + /* If a non-jump insn (or a casesi jump table), continue. */ + if (GET_CODE (insn) != JUMP_INSN || + GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + continue; + + /* If we already have a brcc, note if it is suitable for brcc_s. + Be a bit generous with the brcc_s range so that we can take + advantage of any code shortening from delay slot scheduling. */ + if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch) + { + rtx pat = PATTERN (insn); + rtx op = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0); + rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0); + + offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + if ((offset >= -140 && offset < 140) + && rtx_equal_p (XEXP (op, 1), const0_rtx) + && compact_register_operand (XEXP (op, 0), VOIDmode) + && equality_comparison_operator (op, VOIDmode)) + PUT_MODE (*ccp, CC_Zmode); + else if (GET_MODE (*ccp) == CC_Zmode) + PUT_MODE (*ccp, CC_ZNmode); + continue; + } + if ((insn_type = get_attr_type (insn)) == TYPE_BRCC + || insn_type == TYPE_BRCC_NO_DELAY_SLOT) + continue; + + /* OK. so we have a jump insn. */ + /* We need to check that it is a bcc. */ + /* Bcc => set (pc) (if_then_else ) */ + pattern = PATTERN (insn); + if (GET_CODE (pattern) != SET + || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE + || ANY_RETURN_P (XEXP (SET_SRC (pattern), 1))) + continue; + + /* Now check if the jump is beyond the s9 range. */ + if (find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)) + continue; + offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + + if(offset > 253 || offset < -254) + continue; + + pc_target = SET_SRC (pattern); + + /* Now go back and search for the set cc insn. */ + + label = XEXP (pc_target, 1); + + { + rtx pat, scan, link_insn = NULL; + + for (scan = PREV_INSN (insn); + scan && GET_CODE (scan) != CODE_LABEL; + scan = PREV_INSN (scan)) + { + if (! INSN_P (scan)) + continue; + pat = PATTERN (scan); + if (GET_CODE (pat) == SET + && cc_register (SET_DEST (pat), VOIDmode)) + { + link_insn = scan; + break; + } + } + if (! link_insn) + continue; + else + /* Check if this is a data dependency. */ + { + rtx op, cc_clob_rtx, op0, op1, brcc_insn, note; + rtx cmp0, cmp1; + + /* Ok this is the set cc. copy args here. */ + op = XEXP (pc_target, 0); + + op0 = cmp0 = XEXP (SET_SRC (pat), 0); + op1 = cmp1 = XEXP (SET_SRC (pat), 1); + if (GET_CODE (op0) == ZERO_EXTRACT + && XEXP (op0, 1) == const1_rtx + && (GET_CODE (op) == EQ + || GET_CODE (op) == NE)) + { + /* btst / b{eq,ne} -> bbit{0,1} */ + op0 = XEXP (cmp0, 0); + op1 = XEXP (cmp0, 2); + } + else if (!register_operand (op0, VOIDmode) + || !general_operand (op1, VOIDmode)) + continue; + /* Be careful not to break what cmpsfpx_raw is + trying to create for checking equality of + single-precision floats. */ + else if (TARGET_SPFP + && GET_MODE (op0) == SFmode + && GET_MODE (op1) == SFmode) + continue; + + /* None of the two cmp operands should be set between the + cmp and the branch. */ + if (reg_set_between_p (op0, link_insn, insn)) + continue; + + if (reg_set_between_p (op1, link_insn, insn)) + continue; + + /* Since the MODE check does not work, check that this is + CC reg's last set location before insn, and also no + instruction between the cmp and branch uses the + condition codes. */ + if ((reg_set_between_p (SET_DEST (pat), link_insn, insn)) + || (reg_used_between_p (SET_DEST (pat), link_insn, insn))) + continue; + + /* CC reg should be dead after insn. */ + if (!find_regno_note (insn, REG_DEAD, CC_REG)) + continue; + + op = gen_rtx_fmt_ee (GET_CODE (op), + GET_MODE (op), cmp0, cmp1); + /* If we create a LIMM where there was none before, + we only benefit if we can avoid a scheduling bubble + for the ARC600. Otherwise, we'd only forgo chances + at short insn generation, and risk out-of-range + branches. */ + if (!brcc_nolimm_operator (op, VOIDmode) + && !long_immediate_operand (op1, VOIDmode) + && (TARGET_ARC700 + || next_active_insn (link_insn) != insn)) + continue; + + /* Emit bbit / brcc (or brcc_s if possible). + CC_Zmode indicates that brcc_s is possible. */ + + if (op0 != cmp0) + cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG); + else if ((offset >= -140 && offset < 140) + && rtx_equal_p (op1, const0_rtx) + && compact_register_operand (op0, VOIDmode) + && (GET_CODE (op) == EQ + || GET_CODE (op) == NE)) + cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG); + else + cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG); + + brcc_insn + = gen_rtx_IF_THEN_ELSE (VOIDmode, op, label, pc_rtx); + brcc_insn = gen_rtx_SET (VOIDmode, pc_rtx, brcc_insn); + cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx); + brcc_insn + = gen_rtx_PARALLEL + (VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx)); + brcc_insn = emit_jump_insn_before (brcc_insn, insn); + + JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn); + note = find_reg_note (insn, REG_BR_PROB, 0); + if (note) + { + XEXP (note, 1) = REG_NOTES (brcc_insn); + REG_NOTES (brcc_insn) = note; + } + note = find_reg_note (link_insn, REG_DEAD, op0); + if (note) + { + remove_note (link_insn, note); + XEXP (note, 1) = REG_NOTES (brcc_insn); + REG_NOTES (brcc_insn) = note; + } + note = find_reg_note (link_insn, REG_DEAD, op1); + if (note) + { + XEXP (note, 1) = REG_NOTES (brcc_insn); + REG_NOTES (brcc_insn) = note; + } + + changed = 1; + + /* Delete the bcc insn. */ + set_insn_deleted (insn); + + /* Delete the cmp insn. */ + set_insn_deleted (link_insn); + + } + } + } + /* Clear out insn_addresses. */ + INSN_ADDRESSES_FREE (); + + } while (changed); + + if (INSN_ADDRESSES_SET_P()) + fatal_error ("insn addresses not freed"); + + arc_reorg_in_progress = 0; +} + + /* Check if the operands are valid for BRcc.d generation + Valid Brcc.d patterns are + Brcc.d b, c, s9 + Brcc.d b, u6, s9 + + For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed, + since they are encoded by the assembler as {GE, LT, HS, LS} 64, which + does not have a delay slot + + Assumed precondition: Second operand is either a register or a u6 value. */ + +bool +valid_brcc_with_delay_p (rtx *operands) +{ + if (optimize_size && GET_MODE (operands[4]) == CC_Zmode) + return false; + return brcc_nolimm_operator (operands[0], VOIDmode); +} + +/* ??? Hack. This should no really be here. See PR32143. */ +static bool +arc_decl_anon_ns_mem_p (const_tree decl) +{ + while (1) + { + if (decl == NULL_TREE || decl == error_mark_node) + return false; + if (TREE_CODE (decl) == NAMESPACE_DECL + && DECL_NAME (decl) == NULL_TREE) + return true; + /* Classes and namespaces inside anonymous namespaces have + TREE_PUBLIC == 0, so we can shortcut the search. */ + else if (TYPE_P (decl)) + return (TREE_PUBLIC (TYPE_NAME (decl)) == 0); + else if (TREE_CODE (decl) == NAMESPACE_DECL) + return (TREE_PUBLIC (decl) == 0); + else + decl = DECL_CONTEXT (decl); + } +} + +/* Implement TARGET_IN_SMALL_DATA_P. Return true if it would be safe to + access DECL using %gp_rel(...)($gp). */ + +static bool +arc_in_small_data_p (const_tree decl) +{ + HOST_WIDE_INT size; + + if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL) + return false; + + + /* We don't yet generate small-data references for -mabicalls. See related + -G handling in override_options. */ + if (TARGET_NO_SDATA_SET) + return false; + + if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0) + { + const char *name; + + /* Reject anything that isn't in a known small-data section. */ + name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl)); + if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0) + return false; + + /* If a symbol is defined externally, the assembler will use the + usual -G rules when deciding how to implement macros. */ + if (!DECL_EXTERNAL (decl)) + return true; + } + /* Only global variables go into sdata section for now. */ + else if (1) + { + /* Don't put constants into the small data section: we want them + to be in ROM rather than RAM. */ + if (TREE_CODE (decl) != VAR_DECL) + return false; + + if (TREE_READONLY (decl) + && !TREE_SIDE_EFFECTS (decl) + && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl)))) + return false; + + /* TREE_PUBLIC might change after the first call, because of the patch + for PR19238. */ + if (default_binds_local_p_1 (decl, 1) + || arc_decl_anon_ns_mem_p (decl)) + return false; + + /* To ensure -mvolatile-cache works + ld.di does not have a gp-relative variant. */ + if (TREE_THIS_VOLATILE (decl)) + return false; + } + + /* Disable sdata references to weak variables. */ + if (DECL_WEAK (decl)) + return false; + + size = int_size_in_bytes (TREE_TYPE (decl)); + +/* if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */ +/* return false; */ + + /* Allow only <=4B long data types into sdata. */ + return (size > 0 && size <= 4); +} + +/* Return true if X is a small data address that can be rewritten + as a gp+symref. */ + +static bool +arc_rewrite_small_data_p (rtx x) +{ + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + + if (GET_CODE (x) == PLUS) + { + if (GET_CODE (XEXP (x, 1)) == CONST_INT) + x = XEXP (x, 0); + } + + return (GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_SMALL_P(x)); +} + +/* A for_each_rtx callback, used by arc_rewrite_small_data. */ + +static int +arc_rewrite_small_data_1 (rtx *loc, void *data) +{ + if (arc_rewrite_small_data_p (*loc)) + { + rtx top; + + gcc_assert (SDATA_BASE_REGNUM == PIC_OFFSET_TABLE_REGNUM); + *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc); + if (loc == data) + return -1; + top = *(rtx*) data; + if (GET_CODE (top) == MEM && &XEXP (top, 0) == loc) + ; /* OK. */ + else if (GET_CODE (top) == MEM + && GET_CODE (XEXP (top, 0)) == PLUS + && GET_CODE (XEXP (XEXP (top, 0), 0)) == MULT) + *loc = force_reg (Pmode, *loc); + else + gcc_unreachable (); + return -1; + } + + if (GET_CODE (*loc) == PLUS + && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx)) + return -1; + + return 0; +} + +/* If possible, rewrite OP so that it refers to small data using + explicit relocations. */ + +rtx +arc_rewrite_small_data (rtx op) +{ + op = copy_insn (op); + for_each_rtx (&op, arc_rewrite_small_data_1, &op); + return op; +} + +/* A for_each_rtx callback for small_data_pattern. */ + +static int +small_data_pattern_1 (rtx *loc, void *data ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*loc) == PLUS + && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx)) + return -1; + + return arc_rewrite_small_data_p (*loc); +} + +/* Return true if OP refers to small data symbols directly, not through + a PLUS. */ + +bool +small_data_pattern (rtx op, enum machine_mode) +{ + return (GET_CODE (op) != SEQUENCE + && for_each_rtx (&op, small_data_pattern_1, 0)); +} + +/* Return true if OP is an acceptable memory operand for ARCompact + 16-bit gp-relative load instructions. + op shd look like : [r26, symref@sda] + i.e. (mem (plus (reg 26) (symref with smalldata flag set)) + */ +/* volatile cache option still to be handled. */ + +bool +compact_sda_memory_operand (rtx op, enum machine_mode mode) +{ + rtx addr; + int size; + + /* Eliminate non-memory operations. */ + if (GET_CODE (op) != MEM) + return false; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + size = GET_MODE_SIZE (mode); + + /* dword operations really put out 2 instructions, so eliminate them. */ + if (size > UNITS_PER_WORD) + return false; + + /* Decode the address now. */ + addr = XEXP (op, 0); + + return LEGITIMATE_SMALL_DATA_ADDRESS_P (addr); +} + +/* Implement ASM_OUTPUT_ALIGNED_DECL_LOCAL. */ + +void +arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name, + unsigned HOST_WIDE_INT size, + unsigned HOST_WIDE_INT align, + unsigned HOST_WIDE_INT globalize_p) +{ + int in_small_data = arc_in_small_data_p (decl); + + if (in_small_data) + switch_to_section (get_named_section (NULL, ".sbss", 0)); + /* named_section (0,".sbss",0); */ + else + switch_to_section (bss_section); + + if (globalize_p) + (*targetm.asm_out.globalize_label) (stream, name); + + ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT)); + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object"); + ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size); + ASM_OUTPUT_LABEL (stream, name); + + if (size != 0) + ASM_OUTPUT_SKIP (stream, size); +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +/* SIMD builtins support. */ +enum simd_insn_args_type { + Va_Vb_Vc, + Va_Vb_rlimm, + Va_Vb_Ic, + Va_Vb_u6, + Va_Vb_u8, + Va_rlimm_u8, + + Va_Vb, + + void_rlimm, + void_u6, + + Da_u3_rlimm, + Da_rlimm_rlimm, + + Va_Ib_u8, + void_Va_Ib_u8, + + Va_Vb_Ic_u8, + void_Va_u3_Ib_u8 +}; + +struct builtin_description +{ + enum simd_insn_args_type args_type; + const enum insn_code icode; + const char * const name; + const enum arc_builtins code; +}; + +static const struct builtin_description arc_simd_builtin_desc_list[] = +{ + /* VVV builtins go first. */ +#define SIMD_BUILTIN(type, code, string, builtin) \ + { type,CODE_FOR_##code, "__builtin_arc_" string, \ + ARC_SIMD_BUILTIN_##builtin }, + + SIMD_BUILTIN (Va_Vb_Vc, vaddaw_insn, "vaddaw", VADDAW) + SIMD_BUILTIN (Va_Vb_Vc, vaddw_insn, "vaddw", VADDW) + SIMD_BUILTIN (Va_Vb_Vc, vavb_insn, "vavb", VAVB) + SIMD_BUILTIN (Va_Vb_Vc, vavrb_insn, "vavrb", VAVRB) + SIMD_BUILTIN (Va_Vb_Vc, vdifaw_insn, "vdifaw", VDIFAW) + SIMD_BUILTIN (Va_Vb_Vc, vdifw_insn, "vdifw", VDIFW) + SIMD_BUILTIN (Va_Vb_Vc, vmaxaw_insn, "vmaxaw", VMAXAW) + SIMD_BUILTIN (Va_Vb_Vc, vmaxw_insn, "vmaxw", VMAXW) + SIMD_BUILTIN (Va_Vb_Vc, vminaw_insn, "vminaw", VMINAW) + SIMD_BUILTIN (Va_Vb_Vc, vminw_insn, "vminw", VMINW) + SIMD_BUILTIN (Va_Vb_Vc, vmulaw_insn, "vmulaw", VMULAW) + SIMD_BUILTIN (Va_Vb_Vc, vmulfaw_insn, "vmulfaw", VMULFAW) + SIMD_BUILTIN (Va_Vb_Vc, vmulfw_insn, "vmulfw", VMULFW) + SIMD_BUILTIN (Va_Vb_Vc, vmulw_insn, "vmulw", VMULW) + SIMD_BUILTIN (Va_Vb_Vc, vsubaw_insn, "vsubaw", VSUBAW) + SIMD_BUILTIN (Va_Vb_Vc, vsubw_insn, "vsubw", VSUBW) + SIMD_BUILTIN (Va_Vb_Vc, vsummw_insn, "vsummw", VSUMMW) + SIMD_BUILTIN (Va_Vb_Vc, vand_insn, "vand", VAND) + SIMD_BUILTIN (Va_Vb_Vc, vandaw_insn, "vandaw", VANDAW) + SIMD_BUILTIN (Va_Vb_Vc, vbic_insn, "vbic", VBIC) + SIMD_BUILTIN (Va_Vb_Vc, vbicaw_insn, "vbicaw", VBICAW) + SIMD_BUILTIN (Va_Vb_Vc, vor_insn, "vor", VOR) + SIMD_BUILTIN (Va_Vb_Vc, vxor_insn, "vxor", VXOR) + SIMD_BUILTIN (Va_Vb_Vc, vxoraw_insn, "vxoraw", VXORAW) + SIMD_BUILTIN (Va_Vb_Vc, veqw_insn, "veqw", VEQW) + SIMD_BUILTIN (Va_Vb_Vc, vlew_insn, "vlew", VLEW) + SIMD_BUILTIN (Va_Vb_Vc, vltw_insn, "vltw", VLTW) + SIMD_BUILTIN (Va_Vb_Vc, vnew_insn, "vnew", VNEW) + SIMD_BUILTIN (Va_Vb_Vc, vmr1aw_insn, "vmr1aw", VMR1AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr1w_insn, "vmr1w", VMR1W) + SIMD_BUILTIN (Va_Vb_Vc, vmr2aw_insn, "vmr2aw", VMR2AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr2w_insn, "vmr2w", VMR2W) + SIMD_BUILTIN (Va_Vb_Vc, vmr3aw_insn, "vmr3aw", VMR3AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr3w_insn, "vmr3w", VMR3W) + SIMD_BUILTIN (Va_Vb_Vc, vmr4aw_insn, "vmr4aw", VMR4AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr4w_insn, "vmr4w", VMR4W) + SIMD_BUILTIN (Va_Vb_Vc, vmr5aw_insn, "vmr5aw", VMR5AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr5w_insn, "vmr5w", VMR5W) + SIMD_BUILTIN (Va_Vb_Vc, vmr6aw_insn, "vmr6aw", VMR6AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr6w_insn, "vmr6w", VMR6W) + SIMD_BUILTIN (Va_Vb_Vc, vmr7aw_insn, "vmr7aw", VMR7AW) + SIMD_BUILTIN (Va_Vb_Vc, vmr7w_insn, "vmr7w", VMR7W) + SIMD_BUILTIN (Va_Vb_Vc, vmrb_insn, "vmrb", VMRB) + SIMD_BUILTIN (Va_Vb_Vc, vh264f_insn, "vh264f", VH264F) + SIMD_BUILTIN (Va_Vb_Vc, vh264ft_insn, "vh264ft", VH264FT) + SIMD_BUILTIN (Va_Vb_Vc, vh264fw_insn, "vh264fw", VH264FW) + SIMD_BUILTIN (Va_Vb_Vc, vvc1f_insn, "vvc1f", VVC1F) + SIMD_BUILTIN (Va_Vb_Vc, vvc1ft_insn, "vvc1ft", VVC1FT) + + SIMD_BUILTIN (Va_Vb_rlimm, vbaddw_insn, "vbaddw", VBADDW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmaxw_insn, "vbmaxw", VBMAXW) + SIMD_BUILTIN (Va_Vb_rlimm, vbminw_insn, "vbminw", VBMINW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmulaw_insn, "vbmulaw", VBMULAW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmulfw_insn, "vbmulfw", VBMULFW) + SIMD_BUILTIN (Va_Vb_rlimm, vbmulw_insn, "vbmulw", VBMULW) + SIMD_BUILTIN (Va_Vb_rlimm, vbrsubw_insn, "vbrsubw", VBRSUBW) + SIMD_BUILTIN (Va_Vb_rlimm, vbsubw_insn, "vbsubw", VBSUBW) + + /* Va, Vb, Ic instructions. */ + SIMD_BUILTIN (Va_Vb_Ic, vasrw_insn, "vasrw", VASRW) + SIMD_BUILTIN (Va_Vb_Ic, vsr8_insn, "vsr8", VSR8) + SIMD_BUILTIN (Va_Vb_Ic, vsr8aw_insn, "vsr8aw", VSR8AW) + + /* Va, Vb, u6 instructions. */ + SIMD_BUILTIN (Va_Vb_u6, vasrrwi_insn, "vasrrwi", VASRRWi) + SIMD_BUILTIN (Va_Vb_u6, vasrsrwi_insn, "vasrsrwi", VASRSRWi) + SIMD_BUILTIN (Va_Vb_u6, vasrwi_insn, "vasrwi", VASRWi) + SIMD_BUILTIN (Va_Vb_u6, vasrpwbi_insn, "vasrpwbi", VASRPWBi) + SIMD_BUILTIN (Va_Vb_u6, vasrrpwbi_insn,"vasrrpwbi", VASRRPWBi) + SIMD_BUILTIN (Va_Vb_u6, vsr8awi_insn, "vsr8awi", VSR8AWi) + SIMD_BUILTIN (Va_Vb_u6, vsr8i_insn, "vsr8i", VSR8i) + + /* Va, Vb, u8 (simm) instructions. */ + SIMD_BUILTIN (Va_Vb_u8, vmvaw_insn, "vmvaw", VMVAW) + SIMD_BUILTIN (Va_Vb_u8, vmvw_insn, "vmvw", VMVW) + SIMD_BUILTIN (Va_Vb_u8, vmvzw_insn, "vmvzw", VMVZW) + SIMD_BUILTIN (Va_Vb_u8, vd6tapf_insn, "vd6tapf", VD6TAPF) + + /* Va, rlimm, u8 (simm) instructions. */ + SIMD_BUILTIN (Va_rlimm_u8, vmovaw_insn, "vmovaw", VMOVAW) + SIMD_BUILTIN (Va_rlimm_u8, vmovw_insn, "vmovw", VMOVW) + SIMD_BUILTIN (Va_rlimm_u8, vmovzw_insn, "vmovzw", VMOVZW) + + /* Va, Vb instructions. */ + SIMD_BUILTIN (Va_Vb, vabsaw_insn, "vabsaw", VABSAW) + SIMD_BUILTIN (Va_Vb, vabsw_insn, "vabsw", VABSW) + SIMD_BUILTIN (Va_Vb, vaddsuw_insn, "vaddsuw", VADDSUW) + SIMD_BUILTIN (Va_Vb, vsignw_insn, "vsignw", VSIGNW) + SIMD_BUILTIN (Va_Vb, vexch1_insn, "vexch1", VEXCH1) + SIMD_BUILTIN (Va_Vb, vexch2_insn, "vexch2", VEXCH2) + SIMD_BUILTIN (Va_Vb, vexch4_insn, "vexch4", VEXCH4) + SIMD_BUILTIN (Va_Vb, vupbaw_insn, "vupbaw", VUPBAW) + SIMD_BUILTIN (Va_Vb, vupbw_insn, "vupbw", VUPBW) + SIMD_BUILTIN (Va_Vb, vupsbaw_insn, "vupsbaw", VUPSBAW) + SIMD_BUILTIN (Va_Vb, vupsbw_insn, "vupsbw", VUPSBW) + + /* DIb, rlimm, rlimm instructions. */ + SIMD_BUILTIN (Da_rlimm_rlimm, vdirun_insn, "vdirun", VDIRUN) + SIMD_BUILTIN (Da_rlimm_rlimm, vdorun_insn, "vdorun", VDORUN) + + /* DIb, limm, rlimm instructions. */ + SIMD_BUILTIN (Da_u3_rlimm, vdiwr_insn, "vdiwr", VDIWR) + SIMD_BUILTIN (Da_u3_rlimm, vdowr_insn, "vdowr", VDOWR) + + /* rlimm instructions. */ + SIMD_BUILTIN (void_rlimm, vrec_insn, "vrec", VREC) + SIMD_BUILTIN (void_rlimm, vrun_insn, "vrun", VRUN) + SIMD_BUILTIN (void_rlimm, vrecrun_insn, "vrecrun", VRECRUN) + SIMD_BUILTIN (void_rlimm, vendrec_insn, "vendrec", VENDREC) + + /* Va, [Ib,u8] instructions. */ + SIMD_BUILTIN (Va_Vb_Ic_u8, vld32wh_insn, "vld32wh", VLD32WH) + SIMD_BUILTIN (Va_Vb_Ic_u8, vld32wl_insn, "vld32wl", VLD32WL) + SIMD_BUILTIN (Va_Vb_Ic_u8, vld64_insn, "vld64", VLD64) + SIMD_BUILTIN (Va_Vb_Ic_u8, vld32_insn, "vld32", VLD32) + + SIMD_BUILTIN (Va_Ib_u8, vld64w_insn, "vld64w", VLD64W) + SIMD_BUILTIN (Va_Ib_u8, vld128_insn, "vld128", VLD128) + SIMD_BUILTIN (void_Va_Ib_u8, vst128_insn, "vst128", VST128) + SIMD_BUILTIN (void_Va_Ib_u8, vst64_insn, "vst64", VST64) + + /* Va, [Ib, u8] instructions. */ + SIMD_BUILTIN (void_Va_u3_Ib_u8, vst16_n_insn, "vst16_n", VST16_N) + SIMD_BUILTIN (void_Va_u3_Ib_u8, vst32_n_insn, "vst32_n", VST32_N) + + SIMD_BUILTIN (void_u6, vinti_insn, "vinti", VINTI) +}; + +static void +arc_init_simd_builtins (void) +{ + int i; + tree endlink = void_list_node; + tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); + + tree v8hi_ftype_v8hi_v8hi + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + endlink))); + tree v8hi_ftype_v8hi_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree v8hi_ftype_v8hi_int_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + + tree void_ftype_v8hi_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))); + + tree void_ftype_v8hi_int_int_int + = (build_function_type + (void_type_node, + tree_cons (NULL_TREE, V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, + integer_type_node, + endlink)))))); + + tree v8hi_ftype_int_int + = build_function_type (V8HI_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree void_ftype_int_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink))); + + tree void_ftype_int + = build_function_type (void_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink)); + + tree v8hi_ftype_v8hi + = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node, + endlink)); + + /* These asserts have been introduced to ensure that the order of builtins + does not get messed up, else the initialization goes wrong. */ + gcc_assert (arc_simd_builtin_desc_list [0].args_type == Va_Vb_Vc); + for (i=0; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Vc; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_v8hi, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name, + void_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_rlimm); + for (; arc_simd_builtin_desc_list [i].args_type == void_rlimm; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8); + for (; arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + v8hi_ftype_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8); + for (; arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name, + void_ftype_v8hi_int_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8); + for (; arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_v8hi_int_int_int, + arc_simd_builtin_desc_list[i].code); + + gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_u6); + for (; arc_simd_builtin_desc_list [i].args_type == void_u6; i++) + def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list[i].name, + void_ftype_int, arc_simd_builtin_desc_list[i].code); + + gcc_assert(i == ARRAY_SIZE (arc_simd_builtin_desc_list)); +} + +/* Helper function of arc_expand_builtin; has the same parameters, + except that EXP is now known to be a call to a simd builtin. */ + +static rtx +arc_expand_simd_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0; + tree arg1; + tree arg2; + tree arg3; + rtx op0; + rtx op1; + rtx op2; + rtx op3; + rtx op4; + rtx pat; + unsigned int i; + int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + enum machine_mode mode0; + enum machine_mode mode1; + enum machine_mode mode2; + enum machine_mode mode3; + enum machine_mode mode4; + const struct builtin_description * d; + + for (i = 0, d = arc_simd_builtin_desc_list; + i < ARRAY_SIZE (arc_simd_builtin_desc_list); i++, d++) + if (d->code == (const enum arc_builtins) fcode) + break; + + /* We must get an entry here. */ + gcc_assert (i < ARRAY_SIZE (arc_simd_builtin_desc_list)); + + switch (d->args_type) + { + case Va_Vb_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb_u6: + case Va_Vb_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1) + || (d->args_type == Va_Vb_u6 && !UNSIGNED_INT6 (INTVAL (op1))) + || (d->args_type == Va_Vb_u8 && !UNSIGNED_INT8 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned %d-bit value", + d->name, + (d->args_type == Va_Vb_u6)? 6: 8); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_rlimm_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + || !(UNSIGNED_INT8 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 8-bit value", + d->name); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb_Ic: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + || !(UNSIGNED_INT3 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb_Vc: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, V8HImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Va_Vb: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + + target = gen_reg_rtx (V8HImode); + mode0 = insn_data[icode].operand[1].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case Da_rlimm_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + + if (icode == CODE_FOR_vdirun_insn) + target = gen_rtx_REG (SImode, 131); + else if (icode == CODE_FOR_vdorun_insn) + target = gen_rtx_REG (SImode, 139); + else + gcc_unreachable (); + + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case Da_u3_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); + + + if (! (GET_CODE (op0) == CONST_INT) + || !(UNSIGNED_INT3 (INTVAL (op0)))) + error ("operand 1 of %s instruction should be an unsigned 3-bit value (DR0-DR7)", + d->name); + + mode1 = insn_data[icode].operand[1].mode; + + if (icode == CODE_FOR_vdiwr_insn) + target = gen_rtx_REG (SImode, + ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0)); + else if (icode == CODE_FOR_vdowr_insn) + target = gen_rtx_REG (SImode, + ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0)); + else + gcc_unreachable (); + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op1); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case void_u6: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[0].mode; + + /* op0 should be u6. */ + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0) + || !(UNSIGNED_INT6 (INTVAL (op0)))) + error ("operand of %s instruction should be an unsigned 6-bit value", + d->name); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case void_rlimm: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); + + fold (arg0); + + op0 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); + mode0 = insn_data[icode].operand[0].mode; + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case Va_Vb_Ic_u8: + { + rtx src_vreg; + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */ + arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */ + + src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); + op0 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7 */ + op1 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR0 */ + + /* target <- src vreg */ + emit_insn (gen_move_insn (target, src_vreg)); + + /* target <- vec_concat: target, mem(Ib, u8) */ + mode0 = insn_data[icode].operand[3].mode; + mode1 = insn_data[icode].operand[1].mode; + + if ( (!(*insn_data[icode].operand[3].predicate) (op0, mode0)) + || !(UNSIGNED_INT3 (INTVAL (op0)))) + error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1)) + || !(UNSIGNED_INT8 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 8-bit value", + d->name); + + pat = GEN_FCN (icode) (target, op1, op2, op0); + if (! pat) + return 0; + + emit_insn (pat); + return target; + } + + case void_Va_Ib_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */ + arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */ + + op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR0 */ + op1 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */ + op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL); /* Vdest */ + + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + + if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1)) + || !(UNSIGNED_INT3 (INTVAL (op1)))) + error ("operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2)) + || !(UNSIGNED_INT8 (INTVAL (op2)))) + error ("operand 3 of %s instruction should be an unsigned 8-bit value", + d->name); + + if (!(*insn_data[icode].operand[3].predicate) (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + + pat = GEN_FCN (icode) (op0, op1, op2, op3); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + case Va_Ib_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */ + + op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR0 */ + op1 = expand_expr (arg0, NULL_RTX, SImode, EXPAND_NORMAL); /* I[0-7] */ + op2 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + + /* target <- src vreg */ + target = gen_reg_rtx (V8HImode); + + /* target <- vec_concat: target, mem(Ib, u8) */ + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + || !(UNSIGNED_INT3 (INTVAL (op1)))) + error ("operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if ( (!(*insn_data[icode].operand[3].predicate) (op2, mode2)) + || !(UNSIGNED_INT8 (INTVAL (op2)))) + error ("operand 2 of %s instruction should be an unsigned 8-bit value", + d->name); + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + + emit_insn (pat); + return target; + + case void_Va_u3_Ib_u8: + icode = d->icode; + arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */ + arg1 = CALL_EXPR_ARG (exp, 1); /* u3 */ + arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7 */ + arg3 = CALL_EXPR_ARG (exp, 3); /* u8 */ + + op0 = expand_expr (arg3, NULL_RTX, SImode, EXPAND_NORMAL); /* u8 */ + op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG); /* VR */ + op2 = expand_expr (arg2, NULL_RTX, SImode, EXPAND_NORMAL); /* [I]0-7 */ + op3 = expand_expr (arg0, NULL_RTX, V8HImode, EXPAND_NORMAL);/* vreg to be stored */ + op4 = expand_expr (arg1, NULL_RTX, SImode, EXPAND_NORMAL); /* vreg 0-7 subreg no. */ + + mode0 = insn_data[icode].operand[0].mode; + mode2 = insn_data[icode].operand[2].mode; + mode3 = insn_data[icode].operand[3].mode; + mode4 = insn_data[icode].operand[4].mode; + + /* Do some correctness checks for the operands. */ + if ( (!(*insn_data[icode].operand[0].predicate) (op0, mode0)) + || !(UNSIGNED_INT8 (INTVAL (op0)))) + error ("operand 4 of %s instruction should be an unsigned 8-bit value (0-255)", + d->name); + + if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2)) + || !(UNSIGNED_INT3 (INTVAL (op2)))) + error ("operand 3 of %s instruction should be an unsigned 3-bit value (I0-I7)", + d->name); + + if (!(*insn_data[icode].operand[3].predicate) (op3, mode3)) + op3 = copy_to_mode_reg (mode3, op3); + + if ( (!(*insn_data[icode].operand[4].predicate) (op4, mode4)) + || !(UNSIGNED_INT3 (INTVAL (op4)))) + error ("operand 2 of %s instruction should be an unsigned 3-bit value (subreg 0-7)", + d->name); + else if (icode == CODE_FOR_vst32_n_insn + && ((INTVAL(op4) % 2 ) != 0)) + error ("operand 2 of %s instruction should be an even 3-bit value (subreg 0,2,4,6)", + d->name); + + pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); + if (! pat) + return 0; + + emit_insn (pat); + return NULL_RTX; + + default: + gcc_unreachable (); + } + return NULL_RTX; +} + +static bool +arc_preserve_reload_p (rtx in) +{ + return (GET_CODE (in) == PLUS + && RTX_OK_FOR_BASE_P (XEXP (in, 0), true) + && CONST_INT_P (XEXP (in, 1)) + && !((INTVAL (XEXP (in, 1)) & 511))); +} + +int +arc_register_move_cost (enum machine_mode, + enum reg_class from_class, enum reg_class to_class) +{ + /* The ARC600 has no bypass for extension registers, hence a nop might be + needed to be inserted after a write so that reads are safe. */ + if (TARGET_ARC600) + { + if (to_class == MPY_WRITABLE_CORE_REGS) + return 3; + /* Instructions modifying LP_COUNT need 4 additional cycles before + the register will actually contain the value. */ + else if (to_class == LPCOUNT_REG) + return 6; + else if (to_class == WRITABLE_CORE_REGS) + return 6; + } + + /* The ARC700 stalls for 3 cycles when *reading* from lp_count. */ + if (TARGET_ARC700 + && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS + || from_class == WRITABLE_CORE_REGS)) + return 8; + + /* Force an attempt to 'mov Dy,Dx' to spill. */ + if (TARGET_ARC700 && TARGET_DPFP + && from_class == DOUBLE_REGS && to_class == DOUBLE_REGS) + return 100; + + return 2; +} + +/* Emit code for an addsi3 instruction with OPERANDS. + COND_P indicates if this will use conditional execution. + Return the length of the instruction. + If OUTPUT_P is false, don't actually output the instruction, just return + its length. */ +int +arc_output_addsi (rtx *operands, bool cond_p, bool output_p) +{ + char format[32]; + + int match = operands_match_p (operands[0], operands[1]); + int match2 = operands_match_p (operands[0], operands[2]); + int intval = (REG_P (operands[2]) ? 1 + : CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057); + int neg_intval = -intval; + int short_0 = satisfies_constraint_Rcq (operands[0]); + int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1])); + int ret = 0; + +#define ADDSI_OUTPUT1(FORMAT) do {\ + if (output_p) \ + output_asm_insn (FORMAT, operands);\ + return ret; \ +} while (0) +#define ADDSI_OUTPUT(LIST) do {\ + if (output_p) \ + sprintf LIST;\ + ADDSI_OUTPUT1 (format);\ + return ret; \ +} while (0) + + /* First try to emit a 16 bit insn. */ + ret = 2; + if (!cond_p + /* If we are actually about to output this insn, don't try a 16 bit + variant if we already decided that we don't want that + (I.e. we upsized this insn to align some following insn.) + E.g. add_s r0,sp,70 is 16 bit, but add r0,sp,70 requires a LIMM - + but add1 r0,sp,35 doesn't. */ + && (!output_p || (get_attr_length (current_output_insn) & 2))) + { + if (short_p + && (REG_P (operands[2]) + ? (match || satisfies_constraint_Rcq (operands[2])) + : (unsigned) intval <= (match ? 127 : 7))) + ADDSI_OUTPUT1 ("add%? %0,%1,%2"); + if (short_0 && REG_P (operands[1]) && match2) + ADDSI_OUTPUT1 ("add%? %0,%2,%1"); + if ((short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM) + && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124)) + ADDSI_OUTPUT1 ("add%? %0,%1,%2"); + + if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7)) + || (REGNO (operands[0]) == STACK_POINTER_REGNUM + && match && !(neg_intval & ~124))) + ADDSI_OUTPUT1 ("sub%? %0,%1,%n2"); + } + + /* Now try to emit a 32 bit insn without long immediate. */ + ret = 4; + if (!match && match2 && REG_P (operands[1])) + ADDSI_OUTPUT1 ("add%? %0,%2,%1"); + if (match || !cond_p) + { + int limit = (match && !cond_p) ? 0x7ff : 0x3f; + int range_factor = neg_intval & intval; + int shift; + + if (intval == -1 << 31) + ADDSI_OUTPUT1 ("bxor%? %0,%1,31"); + + /* If we can use a straight add / sub instead of a {add,sub}[123] of + same size, do, so - the insn latency is lower. */ + /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but + 0x800 is not. */ + if ((intval >= 0 && intval <= limit) + || (intval == -0x800 && limit == 0x7ff)) + ADDSI_OUTPUT1 ("add%? %0,%1,%2"); + else if ((intval < 0 && neg_intval <= limit) + || (intval == 0x800 && limit == 0x7ff)) + ADDSI_OUTPUT1 ("sub%? %0,%1,%n2"); + shift = range_factor >= 8 ? 3 : (range_factor >> 1); + gcc_assert (shift == 0 || shift == 1 || shift == 2 || shift == 3); + gcc_assert ((((1 << shift) - 1) & intval) == 0); + if (((intval < 0 && intval != -0x4000) + /* sub[123] is slower than add_s / sub, only use it if it + avoids a long immediate. */ + && neg_intval <= limit << shift) + || (intval == 0x4000 && limit == 0x7ff)) + ADDSI_OUTPUT ((format, "sub%d%%? %%0,%%1,%d", + shift, neg_intval >> shift)); + else if ((intval >= 0 && intval <= limit << shift) + || (intval == -0x4000 && limit == 0x7ff)) + ADDSI_OUTPUT ((format, "add%d%%? %%0,%%1,%d", shift, intval >> shift)); + } + /* Try to emit a 16 bit opcode with long immediate. */ + ret = 6; + if (short_p && match) + ADDSI_OUTPUT1 ("add%? %0,%1,%S2"); + + /* We have to use a 32 bit opcode, and with a long immediate. */ + ret = 8; + ADDSI_OUTPUT1 (intval < 0 ? "sub%? %0,%1,%n2" : "add%? %0,%1,%S2"); +} + +/* Emit code for an commutative_cond_exec instruction with OPERANDS. + Return the length of the instruction. + If OUTPUT_P is false, don't actually output the instruction, just return + its length. */ +int +arc_output_commutative_cond_exec (rtx *operands, bool output_p) +{ + enum rtx_code commutative_op = GET_CODE (operands[3]); + const char *pat = NULL; + + /* Canonical rtl should not have a constant in the first operand position. */ + gcc_assert (!CONSTANT_P (operands[1])); + + switch (commutative_op) + { + case AND: + if (satisfies_constraint_C1p (operands[2])) + pat = "bmsk%? %0,%1,%Z2"; + else if (satisfies_constraint_Ccp (operands[2])) + pat = "bclr%? %0,%1,%M2"; + else if (satisfies_constraint_CnL (operands[2])) + pat = "bic%? %0,%1,%n2-1"; + break; + case IOR: + if (satisfies_constraint_C0p (operands[2])) + pat = "bset%? %0,%1,%z2"; + break; + case XOR: + if (satisfies_constraint_C0p (operands[2])) + pat = "bxor%? %0,%1,%z2"; + break; + case PLUS: + return arc_output_addsi (operands, true, output_p); + default: break; + } + if (output_p) + output_asm_insn (pat ? pat : "%O3.%d5 %0,%1,%2", operands); + if (pat || REG_P (operands[2]) || satisfies_constraint_L (operands[2])) + return 4; + return 8; +} + +/* Helper function of arc_expand_movmem. ADDR points to a chunk of memory. + Emit code and return an potentially modified address such that offsets + up to SIZE are can be added to yield a legitimate address. + if REUSE is set, ADDR is a register that may be modified. */ + +static rtx +force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse) +{ + rtx base = addr; + rtx offs = const0_rtx; + + if (GET_CODE (base) == PLUS) + { + offs = XEXP (base, 1); + base = XEXP (base, 0); + } + if (!REG_P (base) + || (REGNO (base) != STACK_POINTER_REGNUM + && REGNO_PTR_FRAME_P (REGNO (addr))) + || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs)) + || !SMALL_INT (INTVAL (offs) + size)) + { + if (reuse) + emit_insn (gen_add2_insn (addr, offs)); + else + addr = copy_to_mode_reg (Pmode, addr); + } + return addr; +} + +/* Like move_by_pieces, but take account of load latency, + and actual offset ranges. + Return true on success. */ + +bool +arc_expand_movmem (rtx *operands) +{ + rtx dst = operands[0]; + rtx src = operands[1]; + rtx dst_addr, src_addr; + HOST_WIDE_INT size; + int align = INTVAL (operands[3]); + unsigned n_pieces; + int piece = align; + rtx store[2]; + rtx tmpx[2]; + int i; + + if (!CONST_INT_P (operands[2])) + return false; + size = INTVAL (operands[2]); + /* move_by_pieces_ninsns is static, so we can't use it. */ + if (align >= 4) + n_pieces = (size + 2) / 4U + (size & 1); + else if (align == 2) + n_pieces = (size + 1) / 2U; + else + n_pieces = size; + if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15)) + return false; + if (piece > 4) + piece = 4; + dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0); + src_addr = force_offsettable (XEXP (operands[1], 0), size, 0); + store[0] = store[1] = NULL_RTX; + tmpx[0] = tmpx[1] = NULL_RTX; + for (i = 0; size > 0; i ^= 1, size -= piece) + { + rtx tmp; + enum machine_mode mode; + + if (piece > size) + piece = size & -size; + mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT); + /* If we don't re-use temporaries, the scheduler gets carried away, + and the register pressure gets unnecessarily high. */ + if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode) + tmp = tmpx[i]; + else + tmpx[i] = tmp = gen_reg_rtx (mode); + dst_addr = force_offsettable (dst_addr, piece, 1); + src_addr = force_offsettable (src_addr, piece, 1); + if (store[i]) + emit_insn (store[i]); + emit_move_insn (tmp, change_address (src, mode, src_addr)); + store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp); + dst_addr = plus_constant (Pmode, dst_addr, piece); + src_addr = plus_constant (Pmode, src_addr, piece); + } + if (store[i]) + emit_insn (store[i]); + if (store[i^1]) + emit_insn (store[i^1]); + return true; +} + +/* Prepare operands for move in MODE. Return true iff the move has + been emitted. */ + +bool +prepare_move_operands (rtx *operands, enum machine_mode mode) +{ + /* We used to do this only for MODE_INT Modes, but addresses to floating + point variables may well be in the small data section. */ + if (1) + { + if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode)) + operands[0] = arc_rewrite_small_data (operands[0]); + else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1])) + { + emit_pic_move (operands, SImode); + + /* Disable any REG_EQUALs associated with the symref + otherwise the optimization pass undoes the work done + here and references the variable directly. */ + } + else if (GET_CODE (operands[0]) != MEM + && !TARGET_NO_SDATA_SET + && small_data_pattern (operands[1], Pmode)) + { + /* This is to take care of address calculations involving sdata + variables. */ + operands[1] = arc_rewrite_small_data (operands[1]); + + emit_insn (gen_rtx_SET (mode, operands[0],operands[1])); + /* ??? This note is useless, since it only restates the set itself. + We should rather use the original SYMBOL_REF. However, there is + the problem that we are lying to the compiler about these + SYMBOL_REFs to start with. symbol@sda should be encoded specially + so that we can tell it apart from an actual symbol. */ + set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]); + + /* Take care of the REG_EQUAL note that will be attached to mark the + output reg equal to the initial symbol_ref after this code is + executed. */ + emit_move_insn (operands[0], operands[0]); + return true; + } + } + + if (MEM_P (operands[0]) + && !(reload_in_progress || reload_completed)) + { + operands[1] = force_reg (mode, operands[1]); + if (!move_dest_operand (operands[0], mode)) + { + rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + /* This is like change_address_1 (operands[0], mode, 0, 1) , + except that we can't use that function because it is static. */ + rtx pat = change_address (operands[0], mode, addr); + MEM_COPY_ATTRIBUTES (pat, operands[0]); + operands[0] = pat; + } + if (!cse_not_expected) + { + rtx pat = XEXP (operands[0], 0); + + pat = arc_legitimize_address_0 (pat, pat, mode); + if (pat) + { + pat = change_address (operands[0], mode, pat); + MEM_COPY_ATTRIBUTES (pat, operands[0]); + operands[0] = pat; + } + } + } + + if (MEM_P (operands[1]) && !cse_not_expected) + { + rtx pat = XEXP (operands[1], 0); + + pat = arc_legitimize_address_0 (pat, pat, mode); + if (pat) + { + pat = change_address (operands[1], mode, pat); + MEM_COPY_ATTRIBUTES (pat, operands[1]); + operands[1] = pat; + } + } + + return false; +} + +/* Prepare OPERANDS for an extension using CODE to OMODE. + Return true iff the move has been emitted. */ + +bool +prepare_extend_operands (rtx *operands, enum rtx_code code, + enum machine_mode omode) +{ + if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode)) + { + /* This is to take care of address calculations involving sdata + variables. */ + operands[1] + = gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1])); + emit_insn (gen_rtx_SET (omode, operands[0], operands[1])); + set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]); + + /* Take care of the REG_EQUAL note that will be attached to mark the + output reg equal to the initial extension after this code is + executed. */ + emit_move_insn (operands[0], operands[0]); + return true; + } + return false; +} + +/* Output a library call to a function called FNAME that has been arranged + to be local to any dso. */ + +const char * +arc_output_libcall (const char *fname) +{ + unsigned len = strlen (fname); + static char buf[64]; + + gcc_assert (len < sizeof buf - 35); + if (TARGET_LONG_CALLS_SET + || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ())) + { + if (flag_pic) + sprintf (buf, "add r12,pcl,@%s-(.&-4)\n\tjl%%!%%* [r12]", fname); + else + sprintf (buf, "jl%%! @%s", fname); + } + else + sprintf (buf, "bl%%!%%* @%s", fname); + return buf; +} + +/* Return the SImode highpart of the DImode value IN. */ + +rtx +disi_highpart (rtx in) +{ + return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4); +} + +/* Called by arc600_corereg_hazard via for_each_rtx. + If a hazard is found, return a conservative estimate of the required + length adjustment to accomodate a nop. */ + +static int +arc600_corereg_hazard_1 (rtx *xp, void *data) +{ + rtx x = *xp; + rtx dest; + rtx pat = (rtx) data; + + switch (GET_CODE (x)) + { + case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + break; + default: + /* This is also fine for PRE/POST_MODIFY, because they contain a SET. */ + return 0; + } + dest = XEXP (x, 0); + /* Check if this sets a an extension register. N.B. we use 61 for the + condition codes, which is definitely not an extension register. */ + if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61 + /* Check if the same register is used by the PAT. */ + && (refers_to_regno_p + (REGNO (dest), + REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, pat, 0))) + return 4; + + return 0; +} + +/* Return length adjustment for INSN. + For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +static int +arc600_corereg_hazard (rtx pred, rtx succ) +{ + if (!TARGET_ARC600) + return 0; + /* If SUCC is a doloop_end_i with a preceding label, we must output a nop + in front of SUCC anyway, so there will be separation between PRED and + SUCC. */ + if (recog_memoized (succ) == CODE_FOR_doloop_end_i + && LABEL_P (prev_nonnote_insn (succ))) + return 0; + if (recog_memoized (succ) == CODE_FOR_doloop_begin_i) + return 0; + if (GET_CODE (PATTERN (pred)) == SEQUENCE) + pred = XVECEXP (PATTERN (pred), 0, 1); + if (GET_CODE (PATTERN (succ)) == SEQUENCE) + succ = XVECEXP (PATTERN (succ), 0, 0); + if (recog_memoized (pred) == CODE_FOR_mulsi_600 + || recog_memoized (pred) == CODE_FOR_umul_600 + || recog_memoized (pred) == CODE_FOR_mac_600 + || recog_memoized (pred) == CODE_FOR_mul64_600 + || recog_memoized (pred) == CODE_FOR_mac64_600 + || recog_memoized (pred) == CODE_FOR_umul64_600 + || recog_memoized (pred) == CODE_FOR_umac64_600) + return 0; + return for_each_rtx (&PATTERN (pred), arc600_corereg_hazard_1, + PATTERN (succ)); +} + +/* For ARC600: + A write to a core reg greater or equal to 32 must not be immediately + followed by a use. Anticipate the length requirement to insert a nop + between PRED and SUCC to prevent a hazard. */ + +int +arc_hazard (rtx pred, rtx succ) +{ + if (!TARGET_ARC600) + return 0; + if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ)) + return 0; + /* We might have a CALL to a non-returning function before a loop end. + ??? Although the manual says that's OK (the target is outside the loop, + and the loop counter unused there), the assembler barfs on this, so we + must instert a nop before such a call too. */ + if (recog_memoized (succ) == CODE_FOR_doloop_end_i + && (JUMP_P (pred) || CALL_P (pred) + || GET_CODE (PATTERN (pred)) == SEQUENCE)) + return 4; + return arc600_corereg_hazard (pred, succ); +} + +/* Return length adjustment for INSN. */ + +int +arc_adjust_insn_length (rtx insn, int len, bool) +{ + if (!INSN_P (insn)) + return len; + /* We already handle sequences by ignoring the delay sequence flag. */ + if (GET_CODE (PATTERN (insn)) == SEQUENCE) + return len; + + /* It is impossible to jump to the very end of a Zero-Overhead Loop, as + the ZOL mechanism only triggers when advancing to the end address, + so if there's a label at the end of a ZOL, we need to insert a nop. + The ARC600 ZOL also has extra restrictions on jumps at the end of a + loop. */ + if (recog_memoized (insn) == CODE_FOR_doloop_end_i) + { + rtx prev = prev_nonnote_insn (insn); + + return ((LABEL_P (prev) + || (TARGET_ARC600 + && (JUMP_P (prev) + || CALL_P (prev) /* Could be a noreturn call. */ + || (NONJUMP_INSN_P (prev) + && GET_CODE (PATTERN (prev)) == SEQUENCE)))) + ? len + 4 : len); + } + + /* Check for return with but one preceding insn since function + start / call. */ + if (TARGET_PAD_RETURN + && JUMP_P (insn) + && GET_CODE (PATTERN (insn)) != ADDR_VEC + && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC + && get_attr_type (insn) == TYPE_RETURN) + { + rtx prev = prev_active_insn (insn); + + if (!prev || !(prev = prev_active_insn (prev)) + || ((NONJUMP_INSN_P (prev) + && GET_CODE (PATTERN (prev)) == SEQUENCE) + ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL) + : CALL_ATTR (prev, NON_SIBCALL))) + return len + 4; + } + if (TARGET_ARC600) + { + rtx succ = next_real_insn (insn); + + /* One the ARC600, a write to an extension register must be separated + from a read. */ + if (succ && INSN_P (succ)) + len += arc600_corereg_hazard (insn, succ); + } + + /* Restore extracted operands - otherwise splitters like the addsi3_mixed one + can go awry. */ + extract_constrain_insn_cached (insn); + + return len; +} + +/* Values for length_sensitive. */ +enum +{ + ARC_LS_NONE,// Jcc + ARC_LS_25, // 25 bit offset, B + ARC_LS_21, // 21 bit offset, Bcc + ARC_LS_U13,// 13 bit unsigned offset, LP + ARC_LS_10, // 10 bit offset, B_s, Beq_s, Bne_s + ARC_LS_9, // 9 bit offset, BRcc + ARC_LS_8, // 8 bit offset, BRcc_s + ARC_LS_U7, // 7 bit unsigned offset, LPcc + ARC_LS_7 // 7 bit offset, Bcc_s +}; + +/* While the infrastructure patch is waiting for review, duplicate the + struct definitions, to allow this file to compile. */ +#if 1 +typedef struct +{ + unsigned align_set; + /* Cost as a branch / call target or call return address. */ + int target_cost; + int fallthrough_cost; + int branch_cost; + int length; + /* 0 for not length sensitive, 1 for largest offset range, + * 2 for next smaller etc. */ + unsigned length_sensitive : 8; + bool enabled; +} insn_length_variant_t; + +typedef struct insn_length_parameters_s +{ + int align_unit_log; + int align_base_log; + int max_variants; + int (*get_variants) (rtx, int, bool, bool, insn_length_variant_t *); +} insn_length_parameters_t; + +static void +arc_insn_length_parameters (insn_length_parameters_t *ilp) ATTRIBUTE_UNUSED; +#endif + +static int +arc_get_insn_variants (rtx insn, int len, bool, bool target_p, + insn_length_variant_t *ilv) +{ + if (!NONDEBUG_INSN_P (insn)) + return 0; + enum attr_type type; + /* shorten_branches doesn't take optimize_size into account yet for the + get_variants mechanism, so turn this off for now. */ + if (optimize_size) + return 0; + if (GET_CODE (PATTERN (insn)) == SEQUENCE) + { + /* The interaction of a short delay slot insn with a short branch is + too weird for shorten_branches to piece together, so describe the + entire SEQUENCE. */ + rtx pat, inner; + if (TARGET_UPSIZE_DBR + && get_attr_length (XVECEXP ((pat = PATTERN (insn)), 0, 1)) <= 2 + && (((type = get_attr_type (inner = XVECEXP (pat, 0, 0))) + == TYPE_UNCOND_BRANCH) + || type == TYPE_BRANCH) + && get_attr_delay_slot_filled (inner) == DELAY_SLOT_FILLED_YES) + { + int n_variants + = arc_get_insn_variants (inner, get_attr_length (inner), true, + target_p, ilv+1); + /* The short variant gets split into a higher-cost aligned + and a lower cost unaligned variant. */ + gcc_assert (n_variants); + gcc_assert (ilv[1].length_sensitive == ARC_LS_7 + || ilv[1].length_sensitive == ARC_LS_10); + gcc_assert (ilv[1].align_set == 3); + ilv[0] = ilv[1]; + ilv[0].align_set = 1; + ilv[0].branch_cost += 1; + ilv[1].align_set = 2; + n_variants++; + for (int i = 0; i < n_variants; i++) + ilv[i].length += 2; + /* In case an instruction with aligned size is wanted, and + the short variants are unavailable / too expensive, add + versions of long branch + long delay slot. */ + for (int i = 2, end = n_variants; i < end; i++, n_variants++) + { + ilv[n_variants] = ilv[i]; + ilv[n_variants].length += 2; + } + return n_variants; + } + return 0; + } + insn_length_variant_t *first_ilv = ilv; + type = get_attr_type (insn); + bool delay_filled + = (get_attr_delay_slot_filled (insn) == DELAY_SLOT_FILLED_YES); + int branch_align_cost = delay_filled ? 0 : 1; + int branch_unalign_cost = delay_filled ? 0 : TARGET_UNALIGN_BRANCH ? 0 : 1; + /* If the previous instruction is an sfunc call, this insn is always + a target, even though the middle-end is unaware of this. */ + bool force_target = false; + rtx prev = prev_active_insn (insn); + if (prev && arc_next_active_insn (prev, 0) == insn + && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE) + ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL) + : (CALL_ATTR (prev, NON_SIBCALL) + && NEXT_INSN (PREV_INSN (prev)) == prev))) + force_target = true; + + switch (type) + { + case TYPE_BRCC: + /* Short BRCC only comes in no-delay-slot version, and without limm */ + if (!delay_filled) + { + ilv->align_set = 3; + ilv->length = 2; + ilv->branch_cost = 1; + ilv->enabled = (len == 2); + ilv->length_sensitive = ARC_LS_8; + ilv++; + } + /* Fall through. */ + case TYPE_BRCC_NO_DELAY_SLOT: + /* doloop_fallback* patterns are TYPE_BRCC_NO_DELAY_SLOT for + (delay slot) scheduling purposes, but they are longer. */ + if (GET_CODE (PATTERN (insn)) == PARALLEL + && GET_CODE (XVECEXP (PATTERN (insn), 0, 1)) == SET) + return 0; + /* Standard BRCC: 4 bytes, or 8 bytes with limm. */ + ilv->length = ((type == TYPE_BRCC) ? 4 : 8); + ilv->align_set = 3; + ilv->branch_cost = branch_align_cost; + ilv->enabled = (len <= ilv->length); + ilv->length_sensitive = ARC_LS_9; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->branch_cost = branch_unalign_cost; + } + ilv++; + + rtx op, op0; + op = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); + op0 = XEXP (op, 0); + + if (GET_CODE (op0) == ZERO_EXTRACT + && satisfies_constraint_L (XEXP (op0, 2))) + op0 = XEXP (op0, 0); + if (satisfies_constraint_Rcq (op0)) + { + ilv->length = ((type == TYPE_BRCC) ? 6 : 10); + ilv->align_set = 3; + ilv->branch_cost = 1 + branch_align_cost; + ilv->fallthrough_cost = 1; + ilv->enabled = true; + ilv->length_sensitive = ARC_LS_21; + if (!delay_filled && TARGET_UNALIGN_BRANCH) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->branch_cost = 1 + branch_unalign_cost; + } + ilv++; + } + ilv->length = ((type == TYPE_BRCC) ? 8 : 12); + ilv->align_set = 3; + ilv->branch_cost = 1 + branch_align_cost; + ilv->fallthrough_cost = 1; + ilv->enabled = true; + ilv->length_sensitive = ARC_LS_21; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->branch_cost = 1 + branch_unalign_cost; + } + ilv++; + break; + + case TYPE_SFUNC: + ilv->length = 12; + goto do_call; + case TYPE_CALL_NO_DELAY_SLOT: + ilv->length = 8; + goto do_call; + case TYPE_CALL: + ilv->length = 4; + ilv->length_sensitive + = GET_CODE (PATTERN (insn)) == COND_EXEC ? ARC_LS_21 : ARC_LS_25; + do_call: + ilv->align_set = 3; + ilv->fallthrough_cost = branch_align_cost; + ilv->enabled = true; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->fallthrough_cost = branch_unalign_cost; + } + ilv++; + break; + case TYPE_UNCOND_BRANCH: + /* Strictly speaking, this should be ARC_LS_10 for equality comparisons, + but that makes no difference at the moment. */ + ilv->length_sensitive = ARC_LS_7; + ilv[1].length_sensitive = ARC_LS_25; + goto do_branch; + case TYPE_BRANCH: + ilv->length_sensitive = ARC_LS_10; + ilv[1].length_sensitive = ARC_LS_21; + do_branch: + ilv->align_set = 3; + ilv->length = 2; + ilv->branch_cost = branch_align_cost; + ilv->enabled = (len == ilv->length); + ilv++; + ilv->length = 4; + ilv->align_set = 3; + ilv->branch_cost = branch_align_cost; + ilv->enabled = true; + if ((target_p || force_target) + || (!delay_filled && TARGET_UNALIGN_BRANCH)) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + ilv->branch_cost = branch_unalign_cost; + } + ilv++; + break; + case TYPE_JUMP: + return 0; + default: + /* For every short insn, there is generally also a long insn. + trap_s is an exception. */ + if ((len & 2) == 0 || recog_memoized (insn) == CODE_FOR_trap_s) + return 0; + ilv->align_set = 3; + ilv->length = len; + ilv->enabled = 1; + ilv++; + ilv->align_set = 3; + ilv->length = len + 2; + ilv->enabled = 1; + if (target_p || force_target) + { + ilv[1] = *ilv; + ilv->align_set = 1; + ilv++; + ilv->align_set = 2; + ilv->target_cost = 1; + } + ilv++; + } + /* If the previous instruction is an sfunc call, this insn is always + a target, even though the middle-end is unaware of this. + Therefore, if we have a call predecessor, transfer the target cost + to the fallthrough and branch costs. */ + if (force_target) + { + for (insn_length_variant_t *p = first_ilv; p < ilv; p++) + { + p->fallthrough_cost += p->target_cost; + p->branch_cost += p->target_cost; + p->target_cost = 0; + } + } + + return ilv - first_ilv; +} + +static void +arc_insn_length_parameters (insn_length_parameters_t *ilp) +{ + ilp->align_unit_log = 1; + ilp->align_base_log = 1; + ilp->max_variants = 7; + ilp->get_variants = arc_get_insn_variants; +} + +/* Return a copy of COND from *STATEP, inverted if that is indicated by the + CC field of *STATEP. */ + +static rtx +arc_get_ccfsm_cond (struct arc_ccfsm *statep, bool reverse) +{ + rtx cond = statep->cond; + int raw_cc = get_arc_condition_code (cond); + if (reverse) + raw_cc = ARC_INVERSE_CONDITION_CODE (raw_cc); + + if (statep->cc == raw_cc) + return copy_rtx (cond); + + gcc_assert (ARC_INVERSE_CONDITION_CODE (raw_cc) == statep->cc); + + enum machine_mode ccm = GET_MODE (XEXP (cond, 0)); + enum rtx_code code = reverse_condition (GET_CODE (cond)); + if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode) + code = reverse_condition_maybe_unordered (GET_CODE (cond)); + + return gen_rtx_fmt_ee (code, GET_MODE (cond), + copy_rtx (XEXP (cond, 0)), copy_rtx (XEXP (cond, 1))); +} + +/* Return version of PAT conditionalized with COND, which is part of INSN. + ANNULLED indicates if INSN is an annulled delay-slot insn. + Register further changes if necessary. */ +static rtx +conditionalize_nonjump (rtx pat, rtx cond, rtx insn, bool annulled) +{ + /* For commutative operators, we generally prefer to have + the first source match the destination. */ + if (GET_CODE (pat) == SET) + { + rtx src = SET_SRC (pat); + + if (COMMUTATIVE_P (src)) + { + rtx src0 = XEXP (src, 0); + rtx src1 = XEXP (src, 1); + rtx dst = SET_DEST (pat); + + if (rtx_equal_p (src1, dst) && !rtx_equal_p (src0, dst) + /* Leave add_n alone - the canonical form is to + have the complex summand first. */ + && REG_P (src0)) + pat = gen_rtx_SET (VOIDmode, dst, + gen_rtx_fmt_ee (GET_CODE (src), GET_MODE (src), + src1, src0)); + } + } + + /* dwarf2out.c:dwarf2out_frame_debug_expr doesn't know + what to do with COND_EXEC. */ + if (RTX_FRAME_RELATED_P (insn)) + { + /* If this is the delay slot insn of an anulled branch, + dwarf2out.c:scan_trace understands the anulling semantics + without the COND_EXEC. */ + gcc_assert (annulled); + rtx note = alloc_reg_note (REG_FRAME_RELATED_EXPR, pat, + REG_NOTES (insn)); + validate_change (insn, ®_NOTES (insn), note, 1); + } + pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat); + return pat; +} + +/* Use the ccfsm machinery to do if conversion. */ + +static unsigned +arc_ifcvt (void) +{ + struct arc_ccfsm *statep = &cfun->machine->ccfsm_current; + basic_block merge_bb = 0; + + memset (statep, 0, sizeof *statep); + for (rtx insn = get_insns (); insn; insn = next_insn (insn)) + { + arc_ccfsm_advance (insn, statep); + + switch (statep->state) + { + case 0: + if (JUMP_P (insn)) + merge_bb = 0; + break; + case 1: case 2: + { + /* Deleted branch. */ + gcc_assert (!merge_bb); + merge_bb = BLOCK_FOR_INSN (insn); + basic_block succ_bb + = BLOCK_FOR_INSN (NEXT_INSN (NEXT_INSN (PREV_INSN (insn)))); + arc_ccfsm_post_advance (insn, statep); + gcc_assert (!IN_RANGE (statep->state, 1, 2)); + rtx seq = NEXT_INSN (PREV_INSN (insn)); + if (seq != insn) + { + rtx slot = XVECEXP (PATTERN (seq), 0, 1); + rtx pat = PATTERN (slot); + if (INSN_ANNULLED_BRANCH_P (insn)) + { + rtx cond + = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (slot)); + pat = gen_rtx_COND_EXEC (VOIDmode, cond, pat); + } + if (!validate_change (seq, &PATTERN (seq), pat, 0)) + gcc_unreachable (); + PUT_CODE (slot, NOTE); + NOTE_KIND (slot) = NOTE_INSN_DELETED; + if (merge_bb && succ_bb) + merge_blocks (merge_bb, succ_bb); + } + else if (merge_bb && succ_bb) + { + set_insn_deleted (insn); + merge_blocks (merge_bb, succ_bb); + } + else + { + PUT_CODE (insn, NOTE); + NOTE_KIND (insn) = NOTE_INSN_DELETED; + } + continue; + } + case 3: + if (LABEL_P (insn) + && statep->target_label == CODE_LABEL_NUMBER (insn)) + { + arc_ccfsm_post_advance (insn, statep); + basic_block succ_bb = BLOCK_FOR_INSN (insn); + if (merge_bb && succ_bb) + merge_blocks (merge_bb, succ_bb); + else if (--LABEL_NUSES (insn) == 0) + { + const char *name = LABEL_NAME (insn); + PUT_CODE (insn, NOTE); + NOTE_KIND (insn) = NOTE_INSN_DELETED_LABEL; + NOTE_DELETED_LABEL_NAME (insn) = name; + } + merge_bb = 0; + continue; + } + /* Fall through. */ + case 4: case 5: + if (!NONDEBUG_INSN_P (insn)) + break; + + /* Conditionalized insn. */ + + rtx prev, pprev, *patp, pat, cond; + bool annulled; annulled = false; + + /* If this is a delay slot insn in a non-annulled branch, + don't conditionalize it. N.B., this should be fine for + conditional return too. However, don't do this for + unconditional branches, as these would be encountered when + processing an 'else' part. */ + prev = PREV_INSN (insn); + pprev = PREV_INSN (prev); + if (pprev && NEXT_INSN (NEXT_INSN (pprev)) == NEXT_INSN (insn) + && JUMP_P (prev) && get_attr_cond (prev) == COND_USE) + { + if (!INSN_ANNULLED_BRANCH_P (prev)) + break; + annulled = true; + } + + patp = &PATTERN (insn); + pat = *patp; + cond = arc_get_ccfsm_cond (statep, INSN_FROM_TARGET_P (insn)); + if (NONJUMP_INSN_P (insn) || CALL_P (insn)) + { + /* ??? don't conditionalize if all side effects are dead + in the not-execute case. */ + + pat = conditionalize_nonjump (pat, cond, insn, annulled); + } + else if (simplejump_p (insn)) + { + patp = &SET_SRC (pat); + pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, *patp, pc_rtx); + } + else if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn))) + { + pat = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, pat, pc_rtx); + pat = gen_rtx_SET (VOIDmode, pc_rtx, pat); + } + else + gcc_unreachable (); + validate_change (insn, patp, pat, 1); + if (!apply_change_group ()) + gcc_unreachable (); + if (JUMP_P (insn)) + { + rtx next = next_nonnote_insn (insn); + if (GET_CODE (next) == BARRIER) + delete_insn (next); + if (statep->state == 3) + continue; + } + break; + default: + gcc_unreachable (); + } + arc_ccfsm_post_advance (insn, statep); + } + return 0; +} + +/* Find annulled delay insns and convert them to use the appropriate predicate. + This allows branch shortening to size up these insns properly. */ + +static unsigned +arc_predicate_delay_insns (void) +{ + for (rtx insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat, jump, dlay, src, cond, *patp; + int reverse; + + if (!NONJUMP_INSN_P (insn) + || GET_CODE (pat = PATTERN (insn)) != SEQUENCE) + continue; + jump = XVECEXP (pat, 0, 0); + dlay = XVECEXP (pat, 0, 1); + if (!JUMP_P (jump) || !INSN_ANNULLED_BRANCH_P (jump)) + continue; + /* If the branch insn does the annulling, leave the delay insn alone. */ + if (!TARGET_AT_DBR_CONDEXEC && !INSN_FROM_TARGET_P (dlay)) + continue; + /* ??? Could also leave DLAY un-conditionalized if its target is dead + on the other path. */ + gcc_assert (GET_CODE (PATTERN (jump)) == SET); + gcc_assert (SET_DEST (PATTERN (jump)) == pc_rtx); + src = SET_SRC (PATTERN (jump)); + gcc_assert (GET_CODE (src) == IF_THEN_ELSE); + cond = XEXP (src, 0); + if (XEXP (src, 2) == pc_rtx) + reverse = 0; + else if (XEXP (src, 1) == pc_rtx) + reverse = 1; + else + gcc_unreachable (); + if (!INSN_FROM_TARGET_P (dlay) != reverse) + { + enum machine_mode ccm = GET_MODE (XEXP (cond, 0)); + enum rtx_code code = reverse_condition (GET_CODE (cond)); + if (code == UNKNOWN || ccm == CC_FP_GTmode || ccm == CC_FP_GEmode) + code = reverse_condition_maybe_unordered (GET_CODE (cond)); + + cond = gen_rtx_fmt_ee (code, GET_MODE (cond), + copy_rtx (XEXP (cond, 0)), + copy_rtx (XEXP (cond, 1))); + } + else + cond = copy_rtx (cond); + patp = &PATTERN (dlay); + pat = *patp; + pat = conditionalize_nonjump (pat, cond, dlay, true); + validate_change (dlay, patp, pat, 1); + if (!apply_change_group ()) + gcc_unreachable (); + } + return 0; +} + +/* For ARC600: If a write to a core reg >=32 appears in a delay slot + (other than of a forward brcc), it creates a hazard when there is a read + of the same register at the branch target. We can't know what is at the + branch target of calls, and for branches, we don't really know before the + end of delay slot scheduling, either. Not only can individual instruction + be hoisted out into a delay slot, a basic block can also be emptied this + way, and branch and/or fall through targets be redirected. Hence we don't + want such writes in a delay slot. */ +/* Called by arc_write_ext_corereg via for_each_rtx. */ + +static int +write_ext_corereg_1 (rtx *xp, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *xp; + rtx dest; + + switch (GET_CODE (x)) + { + case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + break; + default: + /* This is also fine for PRE/POST_MODIFY, because they contain a SET. */ + return 0; + } + dest = XEXP (x, 0); + if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61) + return 1; + return 0; +} + +/* Return nonzreo iff INSN writes to an extension core register. */ + +int +arc_write_ext_corereg (rtx insn) +{ + return for_each_rtx (&PATTERN (insn), write_ext_corereg_1, 0); +} + +/* This is like the hook, but returns NULL when it can't / won't generate + a legitimate address. */ + +static rtx +arc_legitimize_address_0 (rtx x, rtx oldx ATTRIBUTE_UNUSED, + enum machine_mode mode) +{ + rtx addr, inner; + + if (flag_pic && SYMBOLIC_CONST (x)) + (x) = arc_legitimize_pic_address (x, 0); + addr = x; + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + if (GET_CODE (addr) == PLUS + && CONST_INT_P (XEXP (addr, 1)) + && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF + && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0))) + || (REG_P (XEXP (addr, 0)) + && (INTVAL (XEXP (addr, 1)) & 252)))) + { + HOST_WIDE_INT offs, upper; + int size = GET_MODE_SIZE (mode); + + offs = INTVAL (XEXP (addr, 1)); + upper = (offs + 256 * size) & ~511 * size; + inner = plus_constant (Pmode, XEXP (addr, 0), upper); +#if 0 /* ??? this produces worse code for EEMBC idctrn01 */ + if (GET_CODE (x) == CONST) + inner = gen_rtx_CONST (Pmode, inner); +#endif + addr = plus_constant (Pmode, force_reg (Pmode, inner), offs - upper); + x = addr; + } + else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr)) + x = force_reg (Pmode, x); + if (memory_address_p ((enum machine_mode) mode, x)) + return x; + return NULL_RTX; +} + +static rtx +arc_legitimize_address (rtx orig_x, rtx oldx, enum machine_mode mode) +{ + rtx new_x = arc_legitimize_address_0 (orig_x, oldx, mode); + + if (new_x) + return new_x; + return orig_x; +} + +static rtx +arc_delegitimize_address_0 (rtx x) +{ + rtx u, gp; + + if (GET_CODE (x) == CONST && GET_CODE (u = XEXP (x, 0)) == UNSPEC) + { + if (XINT (u, 1) == ARC_UNSPEC_GOT) + return XVECEXP (u, 0, 0); + } + else if (GET_CODE (x) == PLUS + && ((REG_P (gp = XEXP (x, 0)) + && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM) + || (GET_CODE (gp) == CONST + && GET_CODE (u = XEXP (gp, 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOT + && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF + && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC"))) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOTOFF) + return XVECEXP (u, 0, 0); + else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS + && ((REG_P (gp = XEXP (XEXP (x, 0), 1)) + && REGNO (gp) == PIC_OFFSET_TABLE_REGNUM) + || (GET_CODE (gp) == CONST + && GET_CODE (u = XEXP (gp, 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOT + && GET_CODE (XVECEXP (u, 0, 0)) == SYMBOL_REF + && !strcmp (XSTR (XVECEXP (u, 0, 0), 0), "_DYNAMIC"))) + && GET_CODE (XEXP (x, 1)) == CONST + && GET_CODE (u = XEXP (XEXP (x, 1), 0)) == UNSPEC + && XINT (u, 1) == ARC_UNSPEC_GOTOFF) + return gen_rtx_PLUS (GET_MODE (x), XEXP (XEXP (x, 0), 0), + XVECEXP (u, 0, 0)); + else if (GET_CODE (x) == PLUS + && (u = arc_delegitimize_address_0 (XEXP (x, 1)))) + return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), u); + return NULL_RTX; +} + +static rtx +arc_delegitimize_address (rtx x) +{ + rtx orig_x = x = delegitimize_mem_from_attrs (x); + if (GET_CODE (x) == MEM) + x = XEXP (x, 0); + x = arc_delegitimize_address_0 (x); + if (x) + { + if (MEM_P (orig_x)) + x = replace_equiv_address_nv (orig_x, x); + return x; + } + return orig_x; +} + +/* Return a REG rtx for acc1. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of acc1 and acc2. */ + +rtx +gen_acc1 (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57); +} + +/* Return a REG rtx for acc2. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of acc1 and acc2. */ + +rtx +gen_acc2 (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56); +} + +/* Return a REG rtx for mlo. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of mhi and mlo. */ + +rtx +gen_mlo (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58); +} + +/* Return a REG rtx for mhi. N.B. the gcc-internal representation may + differ from the hardware register number in order to allow the generic + code to correctly split the concatenation of mhi and mlo. */ + +rtx +gen_mhi (void) +{ + return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59); +} + +/* FIXME: a parameter should be added, and code added to final.c, + to reproduce this functionality in shorten_branches. */ +#if 0 +/* Return nonzero iff BRANCH should be unaligned if possible by upsizing + a previous instruction. */ +int +arc_unalign_branch_p (rtx branch) +{ + rtx note; + + if (!TARGET_UNALIGN_BRANCH) + return 0; + /* Do not do this if we have a filled delay slot. */ + if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES + && !INSN_DELETED_P (NEXT_INSN (branch))) + return 0; + note = find_reg_note (branch, REG_BR_PROB, 0); + return (!note + || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note)) + || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold); +} +#endif + +/* When estimating sizes during arc_reorg, when optimizing for speed, there + are three reasons why we need to consider branches to be length 6: + - annull-false delay slot insns are implemented using conditional execution, + thus preventing short insn formation where used. + - for ARC600: annul-true delay slot insns are implemented where possible + using conditional execution, preventing short insn formation where used. + - for ARC700: likely or somewhat likely taken branches are made long and + unaligned if possible to avoid branch penalty. */ + +bool +arc_branch_size_unknown_p (void) +{ + return !optimize_size && arc_reorg_in_progress; +} + +/* We are about to output a return insn. Add padding if necessary to avoid + a mispredict. A return could happen immediately after the function + start, but after a call we know that there will be at least a blink + restore. */ + +void +arc_pad_return (void) +{ + rtx insn = current_output_insn; + rtx prev = prev_active_insn (insn); + int want_long; + + if (!prev) + { + fputs ("\tnop_s\n", asm_out_file); + cfun->machine->unalign ^= 2; + want_long = 1; + } + /* If PREV is a sequence, we know it must be a branch / jump or a tailcall, + because after a call, we'd have to restore blink first. */ + else if (GET_CODE (PATTERN (prev)) == SEQUENCE) + return; + else + { + want_long = (get_attr_length (prev) == 2); + prev = prev_active_insn (prev); + } + if (!prev + || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE) + ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL) + : CALL_ATTR (prev, NON_SIBCALL))) + { + if (want_long) + cfun->machine->size_reason + = "call/return and return/return must be 6 bytes apart to avoid mispredict"; + else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign) + { + cfun->machine->size_reason + = "Long unaligned jump avoids non-delay slot penalty"; + want_long = 1; + } + /* Disgorge delay insn, if there is any, and it may be moved. */ + if (final_sequence + /* ??? Annulled would be OK if we can and do conditionalize + the delay slot insn accordingly. */ + && !INSN_ANNULLED_BRANCH_P (insn) + && (get_attr_cond (insn) != COND_USE + || !reg_set_p (gen_rtx_REG (CCmode, CC_REG), + XVECEXP (final_sequence, 0, 1)))) + { + prev = XVECEXP (final_sequence, 0, 1); + gcc_assert (!prev_real_insn (insn) + || !arc_hazard (prev_real_insn (insn), prev)); + cfun->machine->force_short_suffix = !want_long; + rtx save_pred = current_insn_predicate; + final_scan_insn (prev, asm_out_file, optimize, 1, NULL); + cfun->machine->force_short_suffix = -1; + INSN_DELETED_P (prev) = 1; + current_output_insn = insn; + current_insn_predicate = save_pred; + } + else if (want_long) + fputs ("\tnop\n", asm_out_file); + else + { + fputs ("\tnop_s\n", asm_out_file); + cfun->machine->unalign ^= 2; + } + } + return; +} + +/* The usual; we set up our machine_function data. */ + +static struct machine_function * +arc_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_alloc_cleared_machine_function (); + machine->fn_type = ARC_FUNCTION_UNKNOWN; + machine->force_short_suffix = -1; + + return machine; +} + +/* Implements INIT_EXPANDERS. We just set up to call the above + function. */ + +void +arc_init_expanders (void) +{ + init_machine_status = arc_init_machine_status; +} + +/* Check if OP is a proper parallel of a millicode call pattern. OFFSET + indicates a number of elements to ignore - that allows to have a + sibcall pattern that starts with (return). LOAD_P is zero for store + multiple (for prologues), and one for load multiples (for epilogues), + and two for load multiples where no final clobber of blink is required. + We also skip the first load / store element since this is supposed to + be checked in the instruction pattern. */ + +int +arc_check_millicode (rtx op, int offset, int load_p) +{ + int len = XVECLEN (op, 0) - offset; + int i; + + if (load_p == 2) + { + if (len < 2 || len > 13) + return 0; + load_p = 1; + } + else + { + rtx elt = XVECEXP (op, 0, --len); + + if (GET_CODE (elt) != CLOBBER + || !REG_P (XEXP (elt, 0)) + || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM + || len < 3 || len > 13) + return 0; + } + for (i = 1; i < len; i++) + { + rtx elt = XVECEXP (op, 0, i + offset); + rtx reg, mem, addr; + + if (GET_CODE (elt) != SET) + return 0; + mem = XEXP (elt, load_p); + reg = XEXP (elt, 1-load_p); + if (!REG_P (reg) || REGNO (reg) != 13U+i || !MEM_P (mem)) + return 0; + addr = XEXP (mem, 0); + if (GET_CODE (addr) != PLUS + || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0)) + || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4) + return 0; + } + return 1; +} + +/* Accessor functions for cfun->machine->unalign. */ + +int +arc_get_unalign (void) +{ + return cfun->machine->unalign; +} + +void +arc_clear_unalign (void) +{ + if (cfun) + cfun->machine->unalign = 0; +} + +void +arc_toggle_unalign (void) +{ + cfun->machine->unalign ^= 2; +} + +/* Operands 0..2 are the operands of a addsi which uses a 12 bit + constant in operand 2, but which would require a LIMM because of + operand mismatch. + operands 3 and 4 are new SET_SRCs for operands 0. */ + +void +split_addsi (rtx *operands) +{ + int val = INTVAL (operands[2]); + + /* Try for two short insns first. Lengths being equal, we prefer + expansions with shorter register lifetimes. */ + if (val > 127 && val <= 255 + && satisfies_constraint_Rcq (operands[0])) + { + operands[3] = operands[2]; + operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); + } + else + { + operands[3] = operands[1]; + operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]); + } +} + +/* Operands 0..2 are the operands of a subsi which uses a 12 bit + constant in operand 1, but which would require a LIMM because of + operand mismatch. + operands 3 and 4 are new SET_SRCs for operands 0. */ + +void +split_subsi (rtx *operands) +{ + int val = INTVAL (operands[1]); + + /* Try for two short insns first. Lengths being equal, we prefer + expansions with shorter register lifetimes. */ + if (satisfies_constraint_Rcq (operands[0]) + && satisfies_constraint_Rcq (operands[2])) + { + if (val >= -31 && val <= 127) + { + operands[3] = gen_rtx_NEG (SImode, operands[2]); + operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); + return; + } + else if (val >= 0 && val < 255) + { + operands[3] = operands[1]; + operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]); + return; + } + } + /* If the destination is not an ARCompact16 register, we might + still have a chance to make a short insn if the source is; + we need to start with a reg-reg move for this. */ + operands[3] = operands[2]; + operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]); +} + +/* Handle DOUBLE_REGS uses. + Operand 0: destination register + Operand 1: source register */ + +static rtx +arc_process_double_reg_moves (rtx *operands) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx val; + + enum usesDxState { none, srcDx, destDx, maxDx }; + enum usesDxState state = none; + + if (refers_to_regno_p (40, 44, src, 0)) + state = srcDx; + if (refers_to_regno_p (40, 44, dest, 0)) + { + /* Via arc_register_move_cost, we should never see D,D moves. */ + gcc_assert (state == none); + state = destDx; + } + + if (state == none) + return NULL_RTX; + + start_sequence (); + + if (state == srcDx) + { + /* Without the LR insn, we need to split this into a + sequence of insns which will use the DEXCLx and DADDHxy + insns to be able to read the Dx register in question. */ + if (TARGET_DPFP_DISABLE_LRSR) + { + /* gen *movdf_insn_nolrsr */ + rtx set = gen_rtx_SET (VOIDmode, dest, src); + rtx use1 = gen_rtx_USE (VOIDmode, const1_rtx); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, use1))); + } + else + { + /* When we have 'mov D, r' or 'mov D, D' then get the target + register pair for use with LR insn. */ + rtx destHigh = simplify_gen_subreg(SImode, dest, DFmode, 4); + rtx destLow = simplify_gen_subreg(SImode, dest, DFmode, 0); + + /* Produce the two LR insns to get the high and low parts. */ + emit_insn (gen_rtx_SET (VOIDmode, + destHigh, + gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src), + VUNSPEC_LR_HIGH))); + emit_insn (gen_rtx_SET (VOIDmode, + destLow, + gen_rtx_UNSPEC_VOLATILE (Pmode, gen_rtvec (1, src), + VUNSPEC_LR))); + } + } + else if (state == destDx) + { + /* When we have 'mov r, D' or 'mov D, D' and we have access to the + LR insn get the target register pair. */ + rtx srcHigh = simplify_gen_subreg(SImode, src, DFmode, 4); + rtx srcLow = simplify_gen_subreg(SImode, src, DFmode, 0); + + emit_insn (gen_rtx_UNSPEC_VOLATILE (Pmode, + gen_rtvec (3, dest, srcHigh, srcLow), + VUNSPEC_DEXCL_NORES)); + + } + else + gcc_unreachable (); + + val = get_insns (); + end_sequence (); + return val; +} + +/* operands 0..1 are the operands of a 64 bit move instruction. + split it into two moves with operands 2/3 and 4/5. */ + +rtx +arc_split_move (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + int i; + int swap = 0; + rtx xop[4]; + rtx val; + + if (TARGET_DPFP) + { + val = arc_process_double_reg_moves (operands); + if (val) + return val; + } + + for (i = 0; i < 2; i++) + { + if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0))) + { + rtx addr = XEXP (operands[i], 0); + rtx r, o; + enum rtx_code code; + + gcc_assert (!reg_overlap_mentioned_p (operands[0], addr)); + switch (GET_CODE (addr)) + { + case PRE_DEC: o = GEN_INT (-8); goto pre_modify; + case PRE_INC: o = GEN_INT (8); goto pre_modify; + case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1); + pre_modify: + code = PRE_MODIFY; + break; + case POST_DEC: o = GEN_INT (-8); goto post_modify; + case POST_INC: o = GEN_INT (8); goto post_modify; + case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1); + post_modify: + code = POST_MODIFY; + swap = 2; + break; + default: + gcc_unreachable (); + } + r = XEXP (addr, 0); + xop[0+i] = adjust_automodify_address_nv + (operands[i], SImode, + gen_rtx_fmt_ee (code, Pmode, r, + gen_rtx_PLUS (Pmode, r, o)), + 0); + xop[2+i] = adjust_automodify_address_nv + (operands[i], SImode, plus_constant (Pmode, r, 4), 4); + } + else + { + xop[0+i] = operand_subword (operands[i], 0, 0, mode); + xop[2+i] = operand_subword (operands[i], 1, 0, mode); + } + } + if (reg_overlap_mentioned_p (xop[0], xop[3])) + { + swap = 2; + gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1])); + } + operands[2+swap] = xop[0]; + operands[3+swap] = xop[1]; + operands[4-swap] = xop[2]; + operands[5-swap] = xop[3]; + + start_sequence (); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[3])); + emit_insn (gen_rtx_SET (VOIDmode, operands[4], operands[5])); + val = get_insns (); + end_sequence (); + + return val; +} + +/* Select between the instruction output templates s_tmpl (for short INSNs) + and l_tmpl (for long INSNs). */ + +const char * +arc_short_long (rtx insn, const char *s_tmpl, const char *l_tmpl) +{ + int is_short = arc_verify_short (insn, cfun->machine->unalign, -1); + + extract_constrain_insn_cached (insn); + return is_short ? s_tmpl : l_tmpl; +} + +/* Searches X for any reference to REGNO, returning the rtx of the + reference found if any. Otherwise, returns NULL_RTX. */ + +rtx +arc_regno_use_in (unsigned int regno, rtx x) +{ + const char *fmt; + int i, j; + rtx tem; + + if (REG_P (x) && refers_to_regno_p (regno, regno+1, x, (rtx *) 0)) + return x; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + if ((tem = regno_use_in (regno, XEXP (x, i)))) + return tem; + } + else if (fmt[i] == 'E') + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if ((tem = regno_use_in (regno , XVECEXP (x, i, j)))) + return tem; + } + + return NULL_RTX; +} + +/* Return the integer value of the "type" attribute for INSN, or -1 if + INSN can't have attributes. */ + +int +arc_attr_type (rtx insn) +{ + if (NONJUMP_INSN_P (insn) + ? (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + : JUMP_P (insn) + ? (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + : !CALL_P (insn)) + return -1; + return get_attr_type (insn); +} + +/* Return true if insn sets the condition codes. */ + +bool +arc_sets_cc_p (rtx insn) +{ + if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE) + insn = XVECEXP (PATTERN (insn), 0, XVECLEN (PATTERN (insn), 0) - 1); + return arc_attr_type (insn) == TYPE_COMPARE; +} + +/* Return true if INSN is an instruction with a delay slot we may want + to fill. */ + +bool +arc_need_delay (rtx insn) +{ + rtx next; + + if (!flag_delayed_branch) + return false; + /* The return at the end of a function needs a delay slot. */ + if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE + && (!(next = next_active_insn (insn)) + || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE) + && arc_attr_type (next) == TYPE_RETURN)) + && (!TARGET_PAD_RETURN + || (prev_active_insn (insn) + && prev_active_insn (prev_active_insn (insn)) + && prev_active_insn (prev_active_insn (prev_active_insn (insn)))))) + return true; + if (NONJUMP_INSN_P (insn) + ? (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == SEQUENCE) + : JUMP_P (insn) + ? (GET_CODE (PATTERN (insn)) == ADDR_VEC + || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) + : !CALL_P (insn)) + return false; + return num_delay_slots (insn) != 0; +} + +/* Return true if the scheduling pass(es) has/have already run, + i.e. where possible, we should try to mitigate high latencies + by different instruction selection. */ + +bool +arc_scheduling_not_expected (void) +{ + return cfun->machine->arc_reorg_started; +} + +/* Oddly enough, sometimes we get a zero overhead loop that branch + shortening doesn't think is a loop - observed with compile/pr24883.c + -O3 -fomit-frame-pointer -funroll-loops. Make sure to include the + alignment visible for branch shortening (we actually align the loop + insn before it, but that is equivalent since the loop insn is 4 byte + long.) */ + +int +arc_label_align (rtx label) +{ + int loop_align = LOOP_ALIGN (LABEL); + + if (loop_align > align_labels_log) + { + rtx prev = prev_nonnote_insn (label); + + if (prev && NONJUMP_INSN_P (prev) + && GET_CODE (PATTERN (prev)) == PARALLEL + && recog_memoized (prev) == CODE_FOR_doloop_begin_i) + return loop_align; + } + /* Code has a minimum p2 alignment of 1, which we must restore after an + ADDR_DIFF_VEC. */ + if (align_labels_log < 1) + { + rtx next = next_nonnote_nondebug_insn (label); + if (INSN_P (next) && recog_memoized (next) >= 0) + return 1; + } + return align_labels_log; +} + +/* Return true if LABEL is in executable code. */ + +bool +arc_text_label (rtx label) +{ + rtx next; + + /* ??? We use deleted labels like they were still there, see + gcc.c-torture/compile/20000326-2.c . */ + gcc_assert (GET_CODE (label) == CODE_LABEL + || (GET_CODE (label) == NOTE + && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL)); + next = next_nonnote_insn (label); + if (next) + return (!JUMP_TABLE_DATA_P (next) + || GET_CODE (PATTERN (next)) != ADDR_VEC); + else if (!PREV_INSN (label)) + /* ??? sometimes text labels get inserted very late, see + gcc.dg/torture/stackalign/comp-goto-1.c */ + return true; + return false; +} + +/* Return the size of the pretend args for DECL. */ + +int +arc_decl_pretend_args (tree decl) +{ + /* struct function is in DECL_STRUCT_FUNCTION (decl), but no + pretend_args there... See PR38391. */ + gcc_assert (decl == current_function_decl); + return crtl->args.pretend_args_size; +} + +/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble + when compiling with -O2 -freorder-blocks-and-partition -fprofile-use + -D_PROFILE_USE; delay branch scheduling then follows a REG_CROSSING_JUMP + to redirect two breqs. */ + +static bool +arc_can_follow_jump (const_rtx follower, const_rtx followee) +{ + /* ??? get_attr_type is declared to take an rtx. */ + union { const_rtx c; rtx r; } u; + + u.c = follower; + if (find_reg_note (followee, REG_CROSSING_JUMP, NULL_RTX)) + switch (get_attr_type (u.r)) + { + case TYPE_BRCC: + case TYPE_BRCC_NO_DELAY_SLOT: + return false; + default: + return true; + } + return true; +} + +/* Implement EPILOGUE__USES. + Return true if REGNO should be added to the deemed uses of the epilogue. + + We use the return address + arc_return_address_regs[arc_compute_function_type (cfun)] . + But also, we have to make sure all the register restore instructions + are known to be live in interrupt functions. */ + +bool +arc_epilogue_uses (int regno) +{ + if (reload_completed) + { + if (ARC_INTERRUPT_P (cfun->machine->fn_type)) + { + if (!fixed_regs[regno]) + return true; + return regno == arc_return_address_regs[cfun->machine->fn_type]; + } + else + return regno == RETURN_ADDR_REGNUM; + } + else + return regno == arc_return_address_regs[arc_compute_function_type (cfun)]; +} + +#ifndef TARGET_NO_LRA +#define TARGET_NO_LRA !TARGET_LRA +#endif + +static bool +arc_lra_p (void) +{ + return !TARGET_NO_LRA; +} + +/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use + Rcq registers, because some insn are shorter with them. OTOH we already + have separate alternatives for this purpose, and other insns don't + mind, so maybe we should rather prefer the other registers? + We need more data, and we can only get that if we allow people to + try all options. */ +static int +arc_register_priority (int r) +{ + switch (arc_lra_priority_tag) + { + case ARC_LRA_PRIORITY_NONE: + return 0; + case ARC_LRA_PRIORITY_NONCOMPACT: + return ((((r & 7) ^ 4) - 4) & 15) != r; + case ARC_LRA_PRIORITY_COMPACT: + return ((((r & 7) ^ 4) - 4) & 15) == r; + default: + gcc_unreachable (); + } +} + +static reg_class_t +arc_spill_class (reg_class_t /* orig_class */, enum machine_mode) +{ + return GENERAL_REGS; +} + +bool +arc_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum, + int itype) +{ + rtx x = *p; + enum reload_type type = (enum reload_type) itype; + + if (GET_CODE (x) == PLUS + && CONST_INT_P (XEXP (x, 1)) + && (RTX_OK_FOR_BASE_P (XEXP (x, 0), true) + || (REG_P (XEXP (x, 0)) + && reg_equiv_constant (REGNO (XEXP (x, 0)))))) + { + int scale = GET_MODE_SIZE (mode); + int shift; + rtx index_rtx = XEXP (x, 1); + HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; + rtx reg, sum, sum2; + + if (scale > 4) + scale = 4; + if ((scale-1) & offset) + scale = 1; + shift = scale >> 1; + offset_base = (offset + (256 << shift)) & (-512 << shift); + /* Sometimes the normal form does not suit DImode. We + could avoid that by using smaller ranges, but that + would give less optimized code when SImode is + prevalent. */ + if (GET_MODE_SIZE (mode) + offset - offset_base <= (256 << shift)) + { + int regno; + + reg = XEXP (x, 0); + regno = REGNO (reg); + sum2 = sum = plus_constant (Pmode, reg, offset_base); + + if (reg_equiv_constant (regno)) + { + sum2 = plus_constant (Pmode, reg_equiv_constant (regno), + offset_base); + if (GET_CODE (sum2) == PLUS) + sum2 = gen_rtx_CONST (Pmode, sum2); + } + *p = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base)); + push_reload (sum2, NULL_RTX, &XEXP (*p, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, + type); + return true; + } + } + /* We must re-recognize what we created before. */ + else if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && REG_P (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (x, 1))) + { + /* Because this address is so complex, we know it must have + been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, + it is already unshared, and needs no further unsharing. */ + push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, + BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); + return true; + } + return false; +} + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-arc.h" diff --git a/gcc-4.9/gcc/config/arc/arc.h b/gcc-4.9/gcc/config/arc/arc.h new file mode 100644 index 000000000..8c7350f3e --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc.h @@ -0,0 +1,1696 @@ +/* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu. + Copyright (C) 1994-2014 Free Software Foundation, Inc. + + Sources derived from work done by Sankhya Technologies (www.sankhya.com) on + behalf of Synopsys Inc. + + Position Independent Code support added,Code cleaned up, + Comments and Support For ARC700 instructions added by + Saurabh Verma (saurabh.verma@codito.com) + Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_ARC_H +#define GCC_ARC_H + +/* Things to do: + + - incscc, decscc? + +*/ + +#define SYMBOL_FLAG_SHORT_CALL (SYMBOL_FLAG_MACH_DEP << 0) +#define SYMBOL_FLAG_MEDIUM_CALL (SYMBOL_FLAG_MACH_DEP << 1) +#define SYMBOL_FLAG_LONG_CALL (SYMBOL_FLAG_MACH_DEP << 2) + +/* Check if this symbol has a long_call attribute in its declaration */ +#define SYMBOL_REF_LONG_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_LONG_CALL) != 0) + +/* Check if this symbol has a medium_call attribute in its declaration */ +#define SYMBOL_REF_MEDIUM_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_MEDIUM_CALL) != 0) + +/* Check if this symbol has a short_call attribute in its declaration */ +#define SYMBOL_REF_SHORT_CALL_P(X) \ + ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_SHORT_CALL) != 0) + +#undef ASM_SPEC +#undef LINK_SPEC +#undef STARTFILE_SPEC +#undef ENDFILE_SPEC +#undef SIZE_TYPE +#undef PTRDIFF_TYPE +#undef WCHAR_TYPE +#undef WCHAR_TYPE_SIZE +#undef ASM_APP_ON +#undef ASM_APP_OFF +#undef CC1_SPEC + +/* Names to predefine in the preprocessor for this target machine. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do { \ + builtin_define ("__arc__"); \ + if (TARGET_A5) \ + builtin_define ("__A5__"); \ + else if (TARGET_ARC600) \ + { \ + builtin_define ("__A6__"); \ + builtin_define ("__ARC600__"); \ + } \ + else if (TARGET_ARC601) \ + { \ + builtin_define ("__ARC601__"); \ + } \ + else if (TARGET_ARC700) \ + { \ + builtin_define ("__A7__"); \ + builtin_define ("__ARC700__"); \ + } \ + if (TARGET_NORM) \ + { \ + builtin_define ("__ARC_NORM__");\ + builtin_define ("__Xnorm"); \ + } \ + if (TARGET_MUL64_SET) \ + builtin_define ("__ARC_MUL64__");\ + if (TARGET_MULMAC_32BY16_SET) \ + builtin_define ("__ARC_MUL32BY16__");\ + if (TARGET_SIMD_SET) \ + builtin_define ("__ARC_SIMD__"); \ + if (TARGET_BARREL_SHIFTER) \ + builtin_define ("__Xbarrel_shifter");\ + builtin_assert ("cpu=arc"); \ + builtin_assert ("machine=arc"); \ + builtin_define (TARGET_BIG_ENDIAN \ + ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); \ + if (TARGET_BIG_ENDIAN) \ + builtin_define ("__big_endian__"); \ +} while(0) + +#if DEFAULT_LIBC == LIBC_UCLIBC + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS (); \ + } \ + while (0) +#endif + +/* Match the macros used in the assembler. */ +#define CPP_SPEC "\ +%{msimd:-D__Xsimd} %{mno-mpy:-D__Xno_mpy} %{mswap:-D__Xswap} \ +%{mmin-max:-D__Xmin_max} %{mEA:-D__Xea} \ +%{mspfp*:-D__Xspfp} %{mdpfp*:-D__Xdpfp} \ +%{mmac-d16:-D__Xxmac_d16} %{mmac-24:-D__Xxmac_24} \ +%{mdsp-packa:-D__Xdsp_packa} %{mcrc:-D__Xcrc} %{mdvbf:-D__Xdvbf} \ +%{mtelephony:-D__Xtelephony} %{mxy:-D__Xxy} %{mmul64: -D__Xmult32} \ +%{mlock:-D__Xlock} %{mswape:-D__Xswape} %{mrtsc:-D__Xrtsc} \ +" + +#define CC1_SPEC "\ +%{EB:%{EL:%emay not use both -EB and -EL}} \ +%{EB:-mbig-endian} %{EL:-mlittle-endian} \ +" + +#define ASM_DEFAULT "-mARC700 -mEA" + +#define ASM_SPEC "\ +%{mbig-endian|EB:-EB} %{EL} \ +%{mcpu=A5|mcpu=a5|mA5:-mA5} \ +%{mcpu=ARC600:-mARC600} \ +%{mcpu=ARC601:-mARC601} \ +%{mcpu=ARC700:-mARC700} \ +%{mcpu=ARC700:-mEA} \ +%{!mcpu=*:" ASM_DEFAULT "} \ +%{mbarrel-shifter} %{mno-mpy} %{mmul64} %{mmul32x16:-mdsp-packa} %{mnorm} \ +%{mswap} %{mEA} %{mmin-max} %{mspfp*} %{mdpfp*} \ +%{msimd} \ +%{mmac-d16} %{mmac-24} %{mdsp-packa} %{mcrc} %{mdvbf} %{mtelephony} %{mxy} \ +%{mcpu=ARC700|!mcpu=*:%{mlock}} \ +%{mcpu=ARC700|!mcpu=*:%{mswape}} \ +%{mcpu=ARC700|!mcpu=*:%{mrtsc}} \ +" + +#if DEFAULT_LIBC == LIBC_UCLIBC +/* Note that the default is to link against dynamic libraries, if they are + available. Override with -static. */ +#define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{symbolic:-Bsymbolic} \ + %{rdynamic:-export-dynamic}\ + -dynamic-linker /lib/ld-uClibc.so.0 \ + -X %{mbig-endian:-EB} \ + %{EB} %{EL} \ + %{marclinux*} \ + %{!marclinux*: %{pg|p|profile:-marclinux_prof;: -marclinux}} \ + %{!z:-z max-page-size=0x2000 -z common-page-size=0x2000} \ + %{shared:-shared}" +/* Like the standard LINK_COMMAND_SPEC, but add %G when building + a shared library with -nostdlib, so that the hidden functions of libgcc + will be incorporated. + N.B., we don't want a plain -lgcc, as this would lead to re-exporting + non-hidden functions, so we have to consider libgcc_s.so.* first, which in + turn should be wrapped with --as-needed. */ +#define LINK_COMMAND_SPEC "\ +%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\ + %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\ + %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\ + %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\ + %{fopenmp|ftree-parallelize-loops=*:%:include(libgomp.spec)%(link_gomp)}\ + %(mflib)\ + %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\ + %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\ + %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} }}}}}}" + +#else +#define LINK_SPEC "%{mbig-endian:-EB} %{EB} %{EL}\ + %{pg|p:-marcelf_prof;mA7|mARC700|mcpu=arc700|mcpu=ARC700: -marcelf}" +#endif + +#if DEFAULT_LIBC != LIBC_UCLIBC +#define STARTFILE_SPEC "%{!shared:crt0.o%s} crti%O%s %{pg|p:crtg.o%s} crtbegin.o%s" +#else +#define STARTFILE_SPEC "%{!shared:%{!mkernel:crt1.o%s}} crti.o%s \ + %{!shared:%{pg|p|profile:crtg.o%s} crtbegin.o%s} %{shared:crtbeginS.o%s}" + +#endif + +#if DEFAULT_LIBC != LIBC_UCLIBC +#define ENDFILE_SPEC "%{pg|p:crtgend.o%s} crtend.o%s crtn%O%s" +#else +#define ENDFILE_SPEC "%{!shared:%{pg|p|profile:crtgend.o%s} crtend.o%s} \ + %{shared:crtendS.o%s} crtn.o%s" + +#endif + +#if DEFAULT_LIBC == LIBC_UCLIBC +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{pg|p|profile:-lgmon -u profil --defsym __profil=profil} -lc}" +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack +#else +#undef LIB_SPEC +/* -lc_p not present for arc-elf32-* : ashwin */ +#define LIB_SPEC "%{!shared:%{g*:-lg} %{pg|p:-lgmon} -lc}" +#endif + +#ifndef DRIVER_ENDIAN_SELF_SPECS +#define DRIVER_ENDIAN_SELF_SPECS "" +#endif +#ifndef TARGET_SDATA_DEFAULT +#define TARGET_SDATA_DEFAULT 1 +#endif +#ifndef TARGET_MMEDIUM_CALLS_DEFAULT +#define TARGET_MMEDIUM_CALLS_DEFAULT 0 +#endif + +#define DRIVER_SELF_SPECS DRIVER_ENDIAN_SELF_SPECS \ + "%{mARC5|mA5: -mcpu=A5 %= ARC_FIRST_SIMD_VR_REG && REGNO <= ARC_LAST_SIMD_VR_REG) ? 1 \ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ +extern unsigned int arc_hard_regno_mode_ok[]; +extern unsigned int arc_mode_class[]; +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ +((arc_hard_regno_mode_ok[REGNO] & arc_mode_class[MODE]) != 0) + +/* A C expression that is nonzero if it is desirable to choose + register allocation so as to avoid move instructions between a + value of mode MODE1 and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, + MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1, + MODE2)' must be zero. */ + +/* Tie QI/HI/SI modes together. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ +(GET_MODE_CLASS (MODE1) == MODE_INT \ + && GET_MODE_CLASS (MODE2) == MODE_INT \ + && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD \ + && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD) + +/* Internal macros to classify a register number as to whether it's a + general purpose register for compact insns (r0-r3,r12-r15), or + stack pointer (r28). */ + +#define COMPACT_GP_REG_P(REGNO) \ + (((signed)(REGNO) >= 0 && (REGNO) <= 3) || ((REGNO) >= 12 && (REGNO) <= 15)) +#define SP_REG_P(REGNO) ((REGNO) == 28) + + + +/* Register classes and constants. */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. + + It is important that any condition codes have class NO_REGS. + See `register_operand'. */ + +enum reg_class +{ + NO_REGS, + R0_REGS, /* 'x' */ + GP_REG, /* 'Rgp' */ + FP_REG, /* 'f' */ + SP_REGS, /* 'b' */ + LPCOUNT_REG, /* 'l' */ + LINK_REGS, /* 'k' */ + DOUBLE_REGS, /* D0, D1 */ + SIMD_VR_REGS, /* VR00-VR63 */ + SIMD_DMA_CONFIG_REGS, /* DI0-DI7,DO0-DO7 */ + ARCOMPACT16_REGS, /* 'q' */ + AC16_BASE_REGS, /* 'e' */ + SIBCALL_REGS, /* "Rsc" */ + GENERAL_REGS, /* 'r' */ + MPY_WRITABLE_CORE_REGS, /* 'W' */ + WRITABLE_CORE_REGS, /* 'w' */ + CHEAP_CORE_REGS, /* 'c' */ + ALL_CORE_REGS, /* 'Rac' */ + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "R0_REGS", \ + "GP_REG", \ + "FP_REG", \ + "SP_REGS", \ + "LPCOUNT_REG", \ + "LINK_REGS", \ + "DOUBLE_REGS", \ + "SIMD_VR_REGS", \ + "SIMD_DMA_CONFIG_REGS", \ + "ARCOMPACT16_REGS", \ + "AC16_BASE_REGS", \ + "SIBCALL_REGS", \ + "GENERAL_REGS", \ + "MPY_WRITABLE_CORE_REGS", \ + "WRITABLE_CORE_REGS", \ + "CHEAP_CORE_REGS", \ + "ALL_CORE_REGS", \ + "ALL_REGS" \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ \ + {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* No Registers */ \ + {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'x', r0 register , r0 */ \ + {0x04000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'Rgp', Global Pointer, r26 */ \ + {0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'f', Frame Pointer, r27 */ \ + {0x10000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'b', Stack Pointer, r28 */ \ + {0x00000000, 0x10000000, 0x00000000, 0x00000000, 0x00000000}, /* 'l', LPCOUNT Register, r60 */ \ + {0xe0000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'k', LINK Registers, r29-r31 */ \ + {0x00000000, 0x00000f00, 0x00000000, 0x00000000, 0x00000000}, /* 'D', D1, D2 Registers */ \ + {0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000}, /* 'V', VR00-VR63 Registers */ \ + {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ffff}, /* 'V', DI0-7,DO0-7 Registers */ \ + {0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'q', r0-r3, r12-r15 */ \ + {0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'e', r0-r3, r12-r15, sp */ \ + {0x1c001fff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* "Rsc", r0-r12 */ \ + {0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'r', r0-r28, blink, ap and pcl */ \ + {0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, /* 'W', r0-r31 */ \ + /* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry. As these \ + registers are fixed, it does not affect the literal meaning of the \ + constraints, but it makes it a superset of GENERAL_REGS, thus \ + enabling some operations that would otherwise not be possible. */ \ + {0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000}, /* 'w', r0-r31, r60 */ \ + {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'c', r0-r60, ap, pcl */ \ + {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000}, /* 'Rac', r0-r60, ap, pcl */ \ + {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff} /* All Registers */ \ +} + +/* Local macros to mark the first and last regs of different classes. */ +#define ARC_FIRST_SIMD_VR_REG 64 +#define ARC_LAST_SIMD_VR_REG 127 + +#define ARC_FIRST_SIMD_DMA_CONFIG_REG 128 +#define ARC_FIRST_SIMD_DMA_CONFIG_IN_REG 128 +#define ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG 136 +#define ARC_LAST_SIMD_DMA_CONFIG_REG 143 + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +extern enum reg_class arc_regno_reg_class[]; + +#define REGNO_REG_CLASS(REGNO) (arc_regno_reg_class[REGNO]) + +/* The class value for valid index registers. An index register is + one used in an address where its value is either multiplied by + a scale factor or added to another register (as well as added to a + displacement). */ + +#define INDEX_REG_CLASS (TARGET_MIXED_CODE ? ARCOMPACT16_REGS : GENERAL_REGS) + +/* The class value for valid base registers. A base register is one used in + an address which is the register value plus a displacement. */ + +#define BASE_REG_CLASS (TARGET_MIXED_CODE ? AC16_BASE_REGS : GENERAL_REGS) + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in local-alloc.c. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < 29 || ((REGNO) == ARG_POINTER_REGNUM) || ((REGNO) == 63) ||\ + (unsigned) reg_renumber[REGNO] < 29) + +#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO) + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ + +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + arc_preferred_reload_class((X), (CLASS)) + + extern enum reg_class arc_preferred_reload_class (rtx, enum reg_class); + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ + +#define CLASS_MAX_NREGS(CLASS, MODE) \ +(( GET_MODE_SIZE (MODE) == 16 && CLASS == SIMD_VR_REGS) ? 1: \ +((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +#define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200) +#define SMALL_INT_RANGE(X, OFFSET, SHIFT) \ + ((unsigned) (((X) >> (SHIFT)) + 0x100) \ + < 0x200 - ((unsigned) (OFFSET) >> (SHIFT))) +#define SIGNED_INT12(X) ((unsigned) ((X) + 0x800) < 0x1000) +#define LARGE_INT(X) \ +(((X) < 0) \ + ? (X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \ + : (unsigned HOST_WIDE_INT) (X) <= (unsigned HOST_WIDE_INT) 0xffffffff) +#define UNSIGNED_INT3(X) ((unsigned) (X) < 0x8) +#define UNSIGNED_INT5(X) ((unsigned) (X) < 0x20) +#define UNSIGNED_INT6(X) ((unsigned) (X) < 0x40) +#define UNSIGNED_INT7(X) ((unsigned) (X) < 0x80) +#define UNSIGNED_INT8(X) ((unsigned) (X) < 0x100) +#define IS_ONE(X) ((X) == 1) +#define IS_ZERO(X) ((X) == 0) + +/* Stack layout and stack pointer usage. */ + +/* Define this macro if pushing a word onto the stack moves the stack + pointer to a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. */ +#define STACK_POINTER_OFFSET (0) + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) (0) + +/* A C expression whose value is RTL representing the address in a + stack frame where the pointer to the caller's frame is stored. + Assume that FRAMEADDR is an RTL expression for the address of the + stack frame itself. + + If you don't define this macro, the default is to return the value + of FRAMEADDR--that is, the stack frame address is also the address + of the stack word that points to the previous frame. */ +/* ??? unfinished */ +/*define DYNAMIC_CHAIN_ADDRESS (FRAMEADDR)*/ + +/* A C expression whose value is RTL representing the value of the + return address for the frame COUNT steps up from the current frame. + FRAMEADDR is the frame pointer of the COUNT frame, or the frame + pointer of the COUNT - 1 frame if `RETURN_ADDR_IN_PREVIOUS_FRAME' + is defined. */ +/* The current return address is in r31. The return address of anything + farther back is at [%fp,4]. */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ +arc_return_addr_rtx(COUNT,FRAME) + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM 28 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 27 + +/* Base register for access to arguments of the function. This register + will be eliminated into either fp or sp. */ +#define ARG_POINTER_REGNUM 62 + +#define RETURN_ADDR_REGNUM 31 + +/* TODO - check usage of STATIC_CHAIN_REGNUM with a testcase */ +/* Register in which static-chain is passed to a function. This must + not be a register used by the prologue. */ +#define STATIC_CHAIN_REGNUM 11 + +/* Function argument passing. */ + +/* If defined, the maximum amount of space required for outgoing + arguments will be computed and placed into the variable + `crtl->outgoing_args_size'. No space will be pushed + onto the stack for each call; instead, the function prologue should + increase the stack frame size by this amount. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. */ +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \ +((CUM) = 0) + +/* The number of registers used for parameter passing. Local to this file. */ +#define MAX_ARC_PARM_REGS 8 + +/* 1 if N is a possible register number for function argument passing. */ +#define FUNCTION_ARG_REGNO_P(N) \ +((unsigned) (N) < MAX_ARC_PARM_REGS) + +/* The ROUND_ADVANCE* macros are local to this file. */ +/* Round SIZE up to a word boundary. */ +#define ROUND_ADVANCE(SIZE) \ +(((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Round arg MODE/TYPE up to the next word boundary. */ +#define ROUND_ADVANCE_ARG(MODE, TYPE) \ +((MODE) == BLKmode \ + ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \ + : ROUND_ADVANCE (GET_MODE_SIZE (MODE))) + +#define ARC_FUNCTION_ARG_BOUNDARY(MODE,TYPE) PARM_BOUNDARY +/* Round CUM up to the necessary point for argument MODE/TYPE. */ +/* N.B. Vectors have alignment exceeding BIGGEST_ALIGNMENT. + ARC_FUNCTION_ARG_BOUNDARY reduces this to no more than 32 bit. */ +#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \ + ((((CUM) - 1) | (ARC_FUNCTION_ARG_BOUNDARY ((MODE), (TYPE)) - 1)/BITS_PER_WORD)\ + + 1) + +/* Return boolean indicating arg of type TYPE and mode MODE will be passed in + a reg. This includes arguments that have to be passed by reference as the + pointer to them is passed in a reg if one is available (and that is what + we're given). + When passing arguments NAMED is always 1. When receiving arguments NAMED + is 1 for each argument except the last in a stdarg/varargs function. In + a stdarg function we want to treat the last named arg as named. In a + varargs function we want to treat the last named arg (which is + `__builtin_va_alist') as unnamed. + This macro is only used in this file. */ +#define PASS_IN_REG_P(CUM, MODE, TYPE) \ +((CUM) < MAX_ARC_PARM_REGS) + + +/* Function results. */ + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, 0) + +/* 1 if N is a possible register number for a function value + as seen by the caller. */ +/* ??? What about r1 in DI/DF values. */ +#define FUNCTION_VALUE_REGNO_P(N) ((N) == 0) + +/* Tell GCC to use RETURN_IN_MEMORY. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Register in which address to store a structure value + is passed to a function, or 0 to use `invisible' first argument. */ +#define STRUCT_VALUE 0 + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ +#define EXIT_IGNORE_STACK 0 + +#define EPILOGUE_USES(REGNO) arc_epilogue_uses ((REGNO)) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + We have two registers that can be eliminated on the ARC. First, the + argument pointer register can always be eliminated in favor of the stack + pointer register or frame pointer register. Secondly, the frame pointer + register can often be eliminated in favor of the stack pointer register. +*/ + +#define ELIMINABLE_REGS \ +{{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ +extern int arc_initial_elimination_offset(int from, int to); +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = arc_initial_elimination_offset ((FROM), (TO)) + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. + We actually emit the profiler code at the call site, so leave this one + empty. */ +#define FUNCTION_PROFILER(FILE, LABELNO) \ + if (TARGET_UCB_MCOUNT) \ + fprintf (FILE, "\t%s\n", arc_output_libcall ("__mcount")) + +#define NO_PROFILE_COUNTERS 1 + +/* Trampolines. */ + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE 20 + +/* Alignment required for a trampoline in bits . */ +/* For actual data alignment we just need 32, no more than the stack; + however, to reduce cache coherency issues, we want to make sure that + trampoline instructions always appear the same in any given cache line. */ +#define TRAMPOLINE_ALIGNMENT 256 + +/* Library calls. */ + +/* Addressing modes, and classification of registers for them. */ + +/* Maximum number of registers that can appear in a valid memory address. */ +/* The `ld' insn allows 2, but the `st' insn only allows 1. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* We have pre inc/dec (load/store with update). */ +#define HAVE_PRE_INCREMENT 1 +#define HAVE_PRE_DECREMENT 1 +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_MODIFY_DISP 1 +#define HAVE_POST_MODIFY_DISP 1 +#define HAVE_PRE_MODIFY_REG 1 +#define HAVE_POST_MODIFY_REG 1 +/* ??? should also do PRE_MODIFY_REG / POST_MODIFY_REG, but that requires + a special predicate for the memory operand of stores, like for the SH. */ + +/* Recognize any constant value that is a valid address. */ +#define CONSTANT_ADDRESS_P(X) \ +(flag_pic?arc_legitimate_pic_addr_p (X): \ +(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST)) + +/* Is the argument a const_int rtx, containing an exact power of 2 */ +#define IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X)) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The *_NONSTRICT definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + + Most source files want to accept pseudo regs in the hope that + they will get allocated to the class that the insn wants them to be in. + Source files for reload pass need to be strict. + After reload, it makes no difference, since pseudo regs have + been eliminated by then. */ + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. */ +#define REG_OK_FOR_INDEX_P_NONSTRICT(X) \ +((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \ + (unsigned) REGNO (X) < 29 || \ + (unsigned) REGNO (X) == 63 || \ + (unsigned) REGNO (X) == ARG_POINTER_REGNUM) +/* Nonzero if X is a hard reg that can be used as a base reg + or if it is a pseudo reg. */ +#define REG_OK_FOR_BASE_P_NONSTRICT(X) \ +((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \ + (unsigned) REGNO (X) < 29 || \ + (unsigned) REGNO (X) == 63 || \ + (unsigned) REGNO (X) == ARG_POINTER_REGNUM) + +/* Nonzero if X is a hard reg that can be used as an index. */ +#define REG_OK_FOR_INDEX_P_STRICT(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +/* Nonzero if X is a hard reg that can be used as a base reg. */ +#define REG_OK_FOR_BASE_P_STRICT(X) REGNO_OK_FOR_BASE_P (REGNO (X)) + +/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression + that is a valid memory address for an instruction. + The MODE argument is the machine mode for the MEM expression + that wants to use this address. */ +/* The `ld' insn allows [reg],[reg+shimm],[reg+limm],[reg+reg],[limm] + but the `st' insn only allows [reg],[reg+shimm],[limm]. + The only thing we can do is only allow the most strict case `st' and hope + other parts optimize out the restrictions for `ld'. */ + +#define RTX_OK_FOR_BASE_P(X, STRICT) \ +(REG_P (X) \ + && ((STRICT) ? REG_OK_FOR_BASE_P_STRICT (X) : REG_OK_FOR_BASE_P_NONSTRICT (X))) + +#define RTX_OK_FOR_INDEX_P(X, STRICT) \ +(REG_P (X) \ + && ((STRICT) ? REG_OK_FOR_INDEX_P_STRICT (X) : REG_OK_FOR_INDEX_P_NONSTRICT (X))) + +/* A C compound statement that attempts to replace X, which is an address + that needs reloading, with a valid memory address for an operand of + mode MODE. WIN is a C statement label elsewhere in the code. + + We try to get a normal form + of the address. That will allow inheritance of the address reloads. */ + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN) \ + do { \ + if (arc_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE))) \ + goto WIN; \ + } while (0) + +/* Reading lp_count for anything but the lp instruction is very slow on the + ARC700. */ +#define DONT_REALLOC(REGNO,MODE) \ + (TARGET_ARC700 && (REGNO) == 60) + + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ +/*extern enum machine_mode arc_select_cc_mode ();*/ +#define SELECT_CC_MODE(OP, X, Y) \ +arc_select_cc_mode (OP, X, Y) + +/* Return non-zero if SELECT_CC_MODE will never return MODE for a + floating point inequality comparison. */ +#define REVERSIBLE_CC_MODE(MODE) 1 /*???*/ + +/* Costs. */ + +/* Compute extra cost of moving data between one register class + and another. */ +#define REGISTER_MOVE_COST(MODE, CLASS, TO_CLASS) \ + arc_register_move_cost ((MODE), (CLASS), (TO_CLASS)) + +/* Compute the cost of moving data between registers and memory. */ +/* Memory is 3 times as expensive as registers. + ??? Is that the right way to look at it? */ +#define MEMORY_MOVE_COST(MODE,CLASS,IN) \ +(GET_MODE_SIZE (MODE) <= UNITS_PER_WORD ? 6 : 12) + +/* The cost of a branch insn. */ +/* ??? What's the right value here? Branches are certainly more + expensive than reg->reg moves. */ +#define BRANCH_COST(speed_p, predictable_p) 2 + +/* Scc sets the destination to 1 and then conditionally zeroes it. + Best case, ORed SCCs can be made into clear - condset - condset. + But it could also end up as five insns. So say it costs four on + average. + These extra instructions - and the second comparison - will also be + an extra cost if the first comparison would have been decisive. + So get an average saving, with a probability of the first branch + beging decisive of p0, we want: + p0 * (branch_cost - 4) > (1 - p0) * 5 + ??? We don't get to see that probability to evaluate, so we can + only wildly guess that it might be 50%. + ??? The compiler also lacks the notion of branch predictability. */ +#define LOGICAL_OP_NON_SHORT_CIRCUIT \ + (BRANCH_COST (optimize_function_for_speed_p (cfun), \ + false) > 9) + +/* Nonzero if access to memory by bytes is slow and undesirable. + For RISC chips, it means that access to memory by bytes is no + better than access by words when possible, so grab a whole word + and maybe make use of that. */ +#define SLOW_BYTE_ACCESS 0 + +/* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. */ +/* On the ARC, calling through registers is slow. */ +#define NO_FUNCTION_CSE + +/* Section selection. */ +/* WARNING: These section names also appear in dwarfout.c. */ + +#define TEXT_SECTION_ASM_OP "\t.section\t.text" +#define DATA_SECTION_ASM_OP "\t.section\t.data" + +#define BSS_SECTION_ASM_OP "\t.section\t.bss" +#define SDATA_SECTION_ASM_OP "\t.section\t.sdata" +#define SBSS_SECTION_ASM_OP "\t.section\t.sbss" + +/* Expression whose value is a string, including spacing, containing the + assembler operation to identify the following data as initialization/termination + code. If not defined, GCC will assume such a section does not exist. */ +#define INIT_SECTION_ASM_OP "\t.section\t.init" +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +/* Define this macro if jump tables (for tablejump insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. + This macro is irrelevant if there is no separate readonly data section. */ +#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic || CASE_VECTOR_PC_RELATIVE) + +/* For DWARF. Marginally different than default so output is "prettier" + (and consistent with above). */ +#define PUSHSECTION_FORMAT "\t%s %s\n" + +/* Tell crtstuff.c we're using ELF. */ +#define OBJECT_FORMAT_ELF + +/* PIC */ + +/* The register number of the register used to address a table of static + data addresses in memory. In some cases this register is defined by a + processor's ``application binary interface'' (ABI). When this macro + is defined, RTL is generated for this register once, as with the stack + pointer and frame pointer registers. If this macro is not defined, it + is up to the machine-dependent files to allocate such a register (if + necessary). */ +#define PIC_OFFSET_TABLE_REGNUM 26 + +/* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is + clobbered by calls. Do not define this macro if PIC_OFFSET_TABLE_REGNUM + is not defined. */ +/* This register is call-saved on the ARC. */ +/*#define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED*/ + +/* A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent code. + You can assume that X satisfies CONSTANT_P, so you need not + check this. You can also assume `flag_pic' is true, so you need not + check it either. You need not define this macro if all constants + (including SYMBOL_REF) can be immediate operands when generating + position independent code. */ +#define LEGITIMATE_PIC_OPERAND_P(X) (arc_legitimate_pic_operand_p(X)) + +/* PIC and small data don't mix on ARC because they use the same register. */ +#define SDATA_BASE_REGNUM 26 + +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \ + (flag_pic \ + ? (GLOBAL ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4 \ + : DW_EH_PE_absptr) + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will + end at the end of the line. */ +/* Gas needs this to be "#" in order to recognize line directives. */ +#define ASM_COMMENT_START "#" + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF "" + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* This is how to output an assembler line defining a `char' constant. */ +#define ASM_OUTPUT_CHAR(FILE, VALUE) \ +( fprintf (FILE, "\t.byte\t"), \ + output_addr_const (FILE, (VALUE)), \ + fprintf (FILE, "\n")) + +/* This is how to output an assembler line defining a `short' constant. */ +#define ASM_OUTPUT_SHORT(FILE, VALUE) \ +( fprintf (FILE, "\t.hword\t"), \ + output_addr_const (FILE, (VALUE)), \ + fprintf (FILE, "\n")) + +/* This is how to output an assembler line defining an `int' constant. + We also handle symbol output here. Code addresses must be right shifted + by 2 because that's how the jump instruction wants them. */ +#define ASM_OUTPUT_INT(FILE, VALUE) \ +do { \ + fprintf (FILE, "\t.word\t"); \ + if (GET_CODE (VALUE) == LABEL_REF) \ + { \ + fprintf (FILE, "%%st(@"); \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, ")"); \ + } \ + else \ + output_addr_const (FILE, (VALUE)); \ + fprintf (FILE, "\n"); \ +} while (0) + +/* This is how to output an assembler line defining a `float' constant. */ +#define ASM_OUTPUT_FLOAT(FILE, VALUE) \ +{ \ + long t; \ + char str[30]; \ + REAL_VALUE_TO_TARGET_SINGLE ((VALUE), t); \ + REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str); \ + fprintf (FILE, "\t.word\t0x%lx %s %s\n", \ + t, ASM_COMMENT_START, str); \ +} + +/* This is how to output an assembler line defining a `double' constant. */ +#define ASM_OUTPUT_DOUBLE(FILE, VALUE) \ +{ \ + long t[2]; \ + char str[30]; \ + REAL_VALUE_TO_TARGET_DOUBLE ((VALUE), t); \ + REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str); \ + fprintf (FILE, "\t.word\t0x%lx %s %s\n\t.word\t0x%lx\n", \ + t[0], ASM_COMMENT_START, str, t[1]); \ +} + +/* This is how to output the definition of a user-level label named NAME, + such as the label on a static function or variable NAME. */ +#define ASM_OUTPUT_LABEL(FILE, NAME) \ +do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0) + +#define ASM_NAME_P(NAME) ( NAME[0]=='*') + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ +/* We work around a dwarfout.c deficiency by watching for labels from it and + not adding the '_' prefix. There is a comment in + dwarfout.c that says it should be using ASM_OUTPUT_INTERNAL_LABEL. */ +#define ASM_OUTPUT_LABELREF(FILE, NAME1) \ +do { \ + const char *NAME; \ + NAME = (*targetm.strip_name_encoding)(NAME1); \ + if ((NAME)[0] == '.' && (NAME)[1] == 'L') \ + fprintf (FILE, "%s", NAME); \ + else \ + { \ + if (!ASM_NAME_P (NAME1)) \ + fprintf (FILE, "%s", user_label_prefix); \ + fprintf (FILE, "%s", NAME); \ + } \ +} while (0) + +/* This is how to output a reference to a symbol_ref / label_ref as + (part of) an operand. To disambiguate from register names like + a1 / a2 / status etc, symbols are preceded by '@'. */ +#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \ + ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0)) +#define ASM_OUTPUT_LABEL_REF(FILE,STR) \ + do \ + { \ + fputc ('@', file); \ + assemble_name ((FILE), (STR)); \ + } \ + while (0) + +/* Store in OUTPUT a string (made with alloca) containing + an assembler-name for a local static variable named NAME. + LABELNO is an integer which is different for each call. */ +#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO) \ +( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10), \ + sprintf ((OUTPUT), "%s.%d", (NAME), (LABELNO))) + +/* The following macro defines the format used to output the second + operand of the .type assembler directive. Different svr4 assemblers + expect various different forms for this operand. The one given here + is just a default. You may need to override it in your machine- + specific tm.h file (depending upon the particulars of your assembler). */ + +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "@%s" + +/* A C string containing the appropriate assembler directive to + specify the size of a symbol, without any arguments. On systems + that use ELF, the default (in `config/elfos.h') is `"\t.size\t"'; + on other systems, the default is not to define this macro. */ +#undef SIZE_ASM_OP +#define SIZE_ASM_OP "\t.size\t" + +/* Assembler pseudo-op to equate one value with another. */ +/* ??? This is needed because dwarfout.c provides a default definition too + late for defaults.h (which contains the default definition of ASM_OTPUT_DEF + that we use). */ +#ifdef SET_ASM_OP +#undef SET_ASM_OP +#endif +#define SET_ASM_OP "\t.set\t" + +extern char rname56[], rname57[], rname58[], rname59[]; +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ +#define REGISTER_NAMES \ +{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \ + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "gp", "fp", "sp", "ilink1", "ilink2", "blink", \ + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \ + "d1", "d1", "d2", "d2", "r44", "r45", "r46", "r47", \ + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \ + rname56,rname57,rname58,rname59,"lp_count", "cc", "ap", "pcl", \ + "vr0", "vr1", "vr2", "vr3", "vr4", "vr5", "vr6", "vr7", \ + "vr8", "vr9", "vr10", "vr11", "vr12", "vr13", "vr14", "vr15", \ + "vr16", "vr17", "vr18", "vr19", "vr20", "vr21", "vr22", "vr23", \ + "vr24", "vr25", "vr26", "vr27", "vr28", "vr29", "vr30", "vr31", \ + "vr32", "vr33", "vr34", "vr35", "vr36", "vr37", "vr38", "vr39", \ + "vr40", "vr41", "vr42", "vr43", "vr44", "vr45", "vr46", "vr47", \ + "vr48", "vr49", "vr50", "vr51", "vr52", "vr53", "vr54", "vr55", \ + "vr56", "vr57", "vr58", "vr59", "vr60", "vr61", "vr62", "vr63", \ + "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", \ + "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", \ + "lp_start", "lp_end" \ +} + +/* Entry to the insn conditionalizer. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + arc_final_prescan_insn (INSN, OPVEC, NOPERANDS) + +/* A C expression which evaluates to true if CODE is a valid + punctuation character for use in the `PRINT_OPERAND' macro. */ +extern char arc_punct_chars[]; +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \ +arc_punct_chars[(unsigned char) (CHAR)] + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ +#define PRINT_OPERAND(FILE, X, CODE) \ +arc_print_operand (FILE, X, CODE) + +/* A C compound statement to output to stdio stream STREAM the + assembler syntax for an instruction operand that is a memory + reference whose address is ADDR. ADDR is an RTL expression. + + On some machines, the syntax for a symbolic address depends on + the section that the address refers to. On these machines, + define the macro `ENCODE_SECTION_INFO' to store the information + into the `symbol_ref', and then check for it here. */ +#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \ +arc_print_operand_address (FILE, ADDR) + +/* This is how to output an element of a case-vector that is absolute. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + fprintf (FILE, "\t.word "); \ + assemble_name (FILE, label); \ + fprintf(FILE, "\n"); \ +} while (0) + +/* This is how to output an element of a case-vector that is relative. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ +do { \ + char label[30]; \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE); \ + switch (GET_MODE (BODY)) \ + { \ + case QImode: fprintf (FILE, "\t.byte "); break; \ + case HImode: fprintf (FILE, "\t.hword "); break; \ + case SImode: fprintf (FILE, "\t.word "); break; \ + default: gcc_unreachable (); \ + } \ + assemble_name (FILE, label); \ + fprintf (FILE, "-"); \ + ASM_GENERATE_INTERNAL_LABEL (label, "L", REL); \ + assemble_name (FILE, label); \ + if (TARGET_COMPACT_CASESI) \ + fprintf (FILE, " + %d", 4 + arc_get_unalign ()); \ + fprintf(FILE, "\n"); \ +} while (0) + +/* ADDR_DIFF_VECs are in the text section and thus can affect the + current alignment. */ +#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE) \ + do \ + { \ + if (GET_CODE (PATTERN (JUMPTABLE)) == ADDR_DIFF_VEC \ + && ((GET_MODE_SIZE (GET_MODE (PATTERN (JUMPTABLE))) \ + * XVECLEN (PATTERN (JUMPTABLE), 1) + 1) \ + & 2)) \ + arc_toggle_unalign (); \ + } \ + while (0) + +#define JUMP_ALIGN(LABEL) (arc_size_opt_level < 2 ? 2 : 0) +#define LABEL_ALIGN_AFTER_BARRIER(LABEL) \ + (JUMP_ALIGN(LABEL) \ + ? JUMP_ALIGN(LABEL) \ + : GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \ + ? 1 : 0) +/* The desired alignment for the location counter at the beginning + of a loop. */ +/* On the ARC, align loops to 4 byte boundaries unless doing all-out size + optimization. */ +#define LOOP_ALIGN JUMP_ALIGN + +#define LABEL_ALIGN(LABEL) (arc_label_align (LABEL)) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ +do { \ + if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); \ + if ((LOG) > 1) \ + arc_clear_unalign (); \ +} while (0) + +/* ASM_OUTPUT_ALIGNED_DECL_LOCAL (STREAM, DECL, NAME, SIZE, ALIGNMENT) + Define this macro when you need to see the variable's decl in order to + chose what to output. */ +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \ + arc_asm_output_aligned_decl_local (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0) + +/* To translate the return value of arc_function_type into a register number + to jump through for function return. */ +extern int arc_return_address_regs[4]; + +/* Debugging information. */ + +/* Generate DBX and DWARF debugging information. */ +#ifdef DBX_DEBUGGING_INFO +#undef DBX_DEBUGGING_INFO +#endif +#define DBX_DEBUGGING_INFO + +#ifdef DWARF2_DEBUGGING_INFO +#undef DWARF2_DEBUGGING_INFO +#endif +#define DWARF2_DEBUGGING_INFO + +/* Prefer STABS (for now). */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* How to renumber registers for dbx and gdb. */ +#define DBX_REGISTER_NUMBER(REGNO) \ + ((TARGET_MULMAC_32BY16_SET && (REGNO) >= 56 && (REGNO) <= 57) \ + ? ((REGNO) ^ !TARGET_BIG_ENDIAN) \ + : (TARGET_MUL64_SET && (REGNO) >= 57 && (REGNO) <= 59) \ + ? ((REGNO) == 57 \ + ? 58 /* MMED */ \ + : ((REGNO) & 1) ^ TARGET_BIG_ENDIAN \ + ? 59 /* MHI */ \ + : 57 + !!TARGET_MULMAC_32BY16_SET) /* MLO */ \ + : (REGNO)) + +#define DWARF_FRAME_REGNUM(REG) (REG) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (31) + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 31) + +/* Frame info. */ + +/* Define this macro to 0 if your target supports DWARF 2 frame unwind + information, but it does not yet work with exception handling. */ +/* N.B. the below test is valid in an #if, but not in a C expression. */ +#if DEFAULT_LIBC == LIBC_UCLIBC +#define DWARF2_UNWIND_INFO 1 +#else +#define DWARF2_UNWIND_INFO 0 +#endif + +#define EH_RETURN_DATA_REGNO(N) \ + ((N) < 4 ? (N) : INVALID_REGNUM) + +/* Turn off splitting of long stabs. */ +#define DBX_CONTIN_LENGTH 0 + +/* Miscellaneous. */ + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. + If we have pc relative case vectors, we start the case vector shortening + with QImode. */ +#define CASE_VECTOR_MODE \ + ((optimize && (CASE_VECTOR_PC_RELATIVE || flag_pic)) ? QImode : Pmode) + +/* Define as C expression which evaluates to nonzero if the tablejump + instruction expects the table to contain offsets from the address of the + table. + Do not define this if the table should contain absolute addresses. */ +#define CASE_VECTOR_PC_RELATIVE TARGET_CASE_VECTOR_PC_RELATIVE + +#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \ + CASE_VECTOR_SHORTEN_MODE_1 \ + (MIN_OFFSET, TARGET_COMPACT_CASESI ? MAX_OFFSET + 6 : MAX_OFFSET, BODY) + +#define CASE_VECTOR_SHORTEN_MODE_1(MIN_OFFSET, MAX_OFFSET, BODY) \ +((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \ + : (MIN_OFFSET) >= -128 && (MAX_OFFSET) <= 127 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \ + : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 65535 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, HImode) \ + : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 \ + ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, HImode) \ + : SImode) + +#define ADDR_VEC_ALIGN(VEC_INSN) \ + (exact_log2 (GET_MODE_SIZE (GET_MODE (PATTERN (VEC_INSN))))) +#undef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \ + ASM_OUTPUT_ALIGN ((FILE), ADDR_VEC_ALIGN (TABLE)); + +#define INSN_LENGTH_ALIGNMENT(INSN) \ + ((JUMP_P (INSN) \ + && GET_CODE (PATTERN (INSN)) == ADDR_DIFF_VEC \ + && GET_MODE (PATTERN (INSN)) == QImode) \ + ? 0 : length_unit_log) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, NIL if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 + +/* Let the movmem expander handle small block moves. */ +#define MOVE_BY_PIECES_P(LEN, ALIGN) 0 +#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \ + (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \ + < (unsigned int) MOVE_RATIO (!optimize_size)) + +/* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P . */ +#define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3) + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. Changed from 1 to 0 for rotate pattern testcases + (e.g. 20020226-1.c). This change truncates the upper 27 bits of a word + while rotating a word. Came to notice through a combine phase + optimization viz. a << (32-b) is equivalent to a << (-b). +*/ +#define SHIFT_COUNT_TRUNCATED 0 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* We assume that the store-condition-codes instructions store 0 for false + and some other value for true. This is the value stored for true. */ +#define STORE_FLAG_VALUE 1 + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ +/* ARCompact has full 32-bit pointers. */ +#define Pmode SImode + +/* A function address in a call instruction. */ +#define FUNCTION_MODE SImode + +/* Define the information needed to generate branch and scc insns. This is + stored from the compare operation. Note that we can't use "rtx" here + since it hasn't been defined! */ +extern struct rtx_def *arc_compare_op0, *arc_compare_op1; + +/* ARC function types. */ +enum arc_function_type { + ARC_FUNCTION_UNKNOWN, ARC_FUNCTION_NORMAL, + /* These are interrupt handlers. The name corresponds to the register + name that contains the return address. */ + ARC_FUNCTION_ILINK1, ARC_FUNCTION_ILINK2 +}; +#define ARC_INTERRUPT_P(TYPE) \ +((TYPE) == ARC_FUNCTION_ILINK1 || (TYPE) == ARC_FUNCTION_ILINK2) + +/* Compute the type of a function from its DECL. Needed for EPILOGUE_USES. */ +struct function; +extern enum arc_function_type arc_compute_function_type (struct function *); + +/* Called by crtstuff.c to make calls to function FUNCTION that are defined in + SECTION_OP, and then to switch back to text section. */ +#undef CRT_CALL_STATIC_FUNCTION +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\t" \ + "bl @" USER_LABEL_PREFIX #FUNC "\n" \ + TEXT_SECTION_ASM_OP); + +/* This macro expands to the name of the scratch register r12, used for + temporary calculations according to the ABI. */ +#define ARC_TEMP_SCRATCH_REG "r12" + +/* The C++ compiler must use one bit to indicate whether the function + that will be called through a pointer-to-member-function is + virtual. Normally, we assume that the low-order bit of a function + pointer must always be zero. Then, by ensuring that the + vtable_index is odd, we can distinguish which variant of the union + is in use. But, on some platforms function pointers can be odd, + and so this doesn't work. In that case, we use the low-order bit + of the `delta' field, and shift the remainder of the `delta' field + to the left. We needed to do this for A4 because the address was always + shifted and thus could be odd. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION \ + (ptrmemfunc_vbit_in_pfn) + +#define INSN_SETS_ARE_DELAYED(X) \ + (GET_CODE (X) == INSN \ + && GET_CODE (PATTERN (X)) != SEQUENCE \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && (get_attr_type (X) == TYPE_CALL || get_attr_type (X) == TYPE_SFUNC)) + +#define INSN_REFERENCES_ARE_DELAYED(insn) INSN_SETS_ARE_DELAYED (insn) + +#define CALL_ATTR(X, NAME) \ + ((CALL_P (X) || NONJUMP_INSN_P (X)) \ + && GET_CODE (PATTERN (X)) != USE \ + && GET_CODE (PATTERN (X)) != CLOBBER \ + && get_attr_is_##NAME (X) == IS_##NAME##_YES) \ + +#define REVERSE_CONDITION(CODE,MODE) \ + (((MODE) == CC_FP_GTmode || (MODE) == CC_FP_GEmode \ + || (MODE) == CC_FP_UNEQmode || (MODE) == CC_FP_ORDmode \ + || (MODE) == CC_FPXmode) \ + ? reverse_condition_maybe_unordered ((CODE)) \ + : reverse_condition ((CODE))) + +#define ADJUST_INSN_LENGTH(X, LENGTH) \ + ((LENGTH) \ + = (GET_CODE (PATTERN (X)) == SEQUENCE \ + ? ((LENGTH) \ + + arc_adjust_insn_length (XVECEXP (PATTERN (X), 0, 0), \ + get_attr_length (XVECEXP (PATTERN (X), \ + 0, 0)), \ + true) \ + - get_attr_length (XVECEXP (PATTERN (X), 0, 0)) \ + + arc_adjust_insn_length (XVECEXP (PATTERN (X), 0, 1), \ + get_attr_length (XVECEXP (PATTERN (X), \ + 0, 1)), \ + true) \ + - get_attr_length (XVECEXP (PATTERN (X), 0, 1))) \ + : arc_adjust_insn_length ((X), (LENGTH), false))) + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C,STR) ((C) == '`') + +#define INIT_EXPANDERS arc_init_expanders () + +#define CFA_FRAME_BASE_OFFSET(FUNDECL) (-arc_decl_pretend_args ((FUNDECL))) + +#define ARG_POINTER_CFA_OFFSET(FNDECL) \ + (FIRST_PARM_OFFSET (FNDECL) + arc_decl_pretend_args ((FNDECL))) + +enum +{ + ARC_LRA_PRIORITY_NONE, ARC_LRA_PRIORITY_NONCOMPACT, ARC_LRA_PRIORITY_COMPACT +}; + +/* The define_cond_exec construct is rather crude, as we can't have + different ones with different conditions apply to different sets + of instructions. We can't use an attribute test inside the condition, + because that would lead to infinite recursion as the attribute test + needs to recognize the insn. So, instead we have a clause for + the pattern condition of all sfunc patterns which is only relevant for + the predicated varaint. */ +#define SFUNC_CHECK_PREDICABLE \ + (GET_CODE (PATTERN (insn)) != COND_EXEC || !flag_pic || !TARGET_MEDIUM_CALLS) + +#endif /* GCC_ARC_H */ diff --git a/gcc-4.9/gcc/config/arc/arc.md b/gcc-4.9/gcc/config/arc/arc.md new file mode 100644 index 000000000..80f6e338a --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc.md @@ -0,0 +1,5165 @@ +;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler +;; Copyright (C) 1994-2014 Free Software Foundation, Inc. + +;; Sources derived from work done by Sankhya Technologies (www.sankhya.com) on +;; behalf of Synopsys Inc. + +;; Position Independent Code support added,Code cleaned up, +;; Comments and Support For ARC700 instructions added by +;; Saurabh Verma (saurabh.verma@codito.com) +;; Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) +;; +;; Profiling support and performance improvements by +;; Joern Rennecke (joern.rennecke@embecosm.com) +;; +;; Support for DSP multiply instructions and mul64 +;; instructions for ARC600; and improvements in flag setting +;; instructions by +;; Muhammad Khurram Riaz (Khurram.Riaz@arc.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; dest, src Two operand instruction's syntax +;; dest, src1, src2 Three operand instruction's syntax + +;; ARC and ARCompact PREDICATES: +;; +;; comparison_operator LT, GT, LE, GE, LTU, GTU, LEU, GEU, EQ, NE +;; memory_operand memory [m] +;; immediate_operand immediate constant [IKLMNOP] +;; register_operand register [rq] +;; general_operand register, memory, constant [rqmIKLMNOP] + +;; Note that the predicates are only used when selecting a pattern +;; to determine if an operand is valid. + +;; The constraints then select which of the possible valid operands +;; is present (and guide register selection). The actual assembly +;; instruction is then selected on the basis of the constraints. + +;; ARC and ARCompact CONSTRAINTS: +;; +;; b stack pointer r28 +;; f frame pointer r27 +;; Rgp global pointer r26 +;; g general reg, memory, constant +;; m memory +;; p memory address +;; q registers commonly used in +;; 16-bit insns r0-r3, r12-r15 +;; c core registers r0-r60, ap, pcl +;; r general registers r0-r28, blink, ap, pcl +;; +;; H fp 16-bit constant +;; I signed 12-bit immediate (for ARCompact) +;; K unsigned 3-bit immediate (for ARCompact) +;; L unsigned 6-bit immediate (for ARCompact) +;; M unsinged 5-bit immediate (for ARCompact) +;; O unsinged 7-bit immediate (for ARCompact) +;; P unsinged 8-bit immediate (for ARCompact) +;; N constant '1' (for ARCompact) + + +;; TODO: +;; -> prefetch instruction + +;; ----------------------------------------------------------------------------- + +;; Include DFA scheduluers +(include ("arc600.md")) +(include ("arc700.md")) + +;; Predicates + +(include ("predicates.md")) +(include ("constraints.md")) +;; ----------------------------------------------------------------------------- + +;; UNSPEC Usage: +;; ~~~~~~~~~~~~ +;; ----------------------------------------------------------------------------- +;; Symbolic name Value Desc. +;; ----------------------------------------------------------------------------- +;; UNSPEC_PLT 3 symbol to be referenced through the PLT +;; UNSPEC_GOT 4 symbol to be rerenced through the GOT +;; UNSPEC_GOTOFF 5 Local symbol.To be referenced relative to the +;; GOTBASE.(Referenced as @GOTOFF) +;; ---------------------------------------------------------------------------- + + +(define_constants + [(UNSPEC_SWAP 13) ; swap generation through builtins. candidate for scheduling + (UNSPEC_MUL64 14) ; mul64 generation through builtins. candidate for scheduling + (UNSPEC_MULU64 15) ; mulu64 generation through builtins. candidate for scheduling + (UNSPEC_DIVAW 16) ; divaw generation through builtins. candidate for scheduling + (UNSPEC_DIRECT 17) + (UNSPEC_PROF 18) ; profile callgraph counter + (UNSPEC_LP 19) ; to set LP_END + (UNSPEC_CASESI 20) + (VUNSPEC_RTIE 17) ; blockage insn for rtie generation + (VUNSPEC_SYNC 18) ; blockage insn for sync generation + (VUNSPEC_BRK 19) ; blockage insn for brk generation + (VUNSPEC_FLAG 20) ; blockage insn for flag generation + (VUNSPEC_SLEEP 21) ; blockage insn for sleep generation + (VUNSPEC_SWI 22) ; blockage insn for swi generation + (VUNSPEC_CORE_READ 23) ; blockage insn for reading a core register + (VUNSPEC_CORE_WRITE 24) ; blockage insn for writing to a core register + (VUNSPEC_LR 25) ; blockage insn for reading an auxiliary register + (VUNSPEC_SR 26) ; blockage insn for writing to an auxiliary register + (VUNSPEC_TRAP_S 27) ; blockage insn for trap_s generation + (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation + + (R0_REG 0) + (R1_REG 1) + (R2_REG 2) + (R3_REG 3) + (R12_REG 12) + (SP_REG 28) + (ILINK1_REGNUM 29) + (ILINK2_REGNUM 30) + (RETURN_ADDR_REGNUM 31) + (MUL64_OUT_REG 58) + + (VUNSPEC_DEXCL 32) ; blockage insn for reading an auxiliary register without LR support + (VUNSPEC_DEXCL_NORES 33) ; blockage insn for reading an auxiliary register without LR support + (VUNSPEC_LR_HIGH 34) ; blockage insn for reading an auxiliary register + + (LP_COUNT 60) + (CC_REG 61) + (LP_START 144) + (LP_END 145) + ] +) + +(define_attr "is_sfunc" "no,yes" (const_string "no")) + +;; Insn type. Used to default other attribute values. +; While the attribute is_sfunc is set for any call of a special function, +; the instruction type sfunc is used only for the special call sequence +; that loads the (pc-relative) function address into r12 and then calls +; via r12. + +(define_attr "type" + "move,load,store,cmove,unary,binary,compare,shift,uncond_branch,jump,branch, + brcc,brcc_no_delay_slot,call,sfunc,call_no_delay_slot, + multi,umulti, two_cycle_core,lr,sr,divaw,loop_setup,loop_end,return, + misc,spfp,dpfp_mult,dpfp_addsub,mulmac_600,cc_arith, + simd_vload, simd_vload128, simd_vstore, simd_vmove, simd_vmove_else_zero, + simd_vmove_with_acc, simd_varith_1cycle, simd_varith_2cycle, + simd_varith_with_acc, simd_vlogic, simd_vlogic_with_acc, + simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc, + simd_valign, simd_valign_with_acc, simd_vcontrol, + simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma" + (cond [(eq_attr "is_sfunc" "yes") + (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call") + (match_test "flag_pic") (const_string "sfunc")] + (const_string "call_no_delay_slot"))] + (const_string "binary"))) + +;; The following three attributes are mixed case so that they can be +;; used conveniently with the CALL_ATTR macro. +(define_attr "is_CALL" "no,yes" + (cond [(eq_attr "is_sfunc" "yes") (const_string "yes") + (eq_attr "type" "call,call_no_delay_slot") (const_string "yes")] + (const_string "no"))) + +(define_attr "is_SIBCALL" "no,yes" (const_string "no")) + +(define_attr "is_NON_SIBCALL" "no,yes" + (cond [(eq_attr "is_SIBCALL" "yes") (const_string "no") + (eq_attr "is_CALL" "yes") (const_string "yes")] + (const_string "no"))) + + +;; Attribute describing the processor +(define_attr "cpu" "none,A5,ARC600,ARC700" + (const (symbol_ref "arc_cpu_attr"))) + +;; true for compact instructions (those with _s suffix) +;; "maybe" means compact unless we conditionalize the insn. +(define_attr "iscompact" "true,maybe,true_limm,maybe_limm,false" + (cond [(eq_attr "type" "sfunc") + (const_string "maybe")] + (const_string "false"))) + + +; Is there an instruction that we are actually putting into the delay slot? +(define_attr "delay_slot_filled" "no,yes" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_string "no") + (match_test "!TARGET_AT_DBR_CONDEXEC + && JUMP_P (insn) + && INSN_ANNULLED_BRANCH_P (insn) + && !INSN_FROM_TARGET_P (NEXT_INSN (insn))") + (const_string "no")] + (const_string "yes"))) + +; Is a delay slot present for purposes of shorten_branches? +; We have to take the length of this insn into account for forward branches +; even if we don't put the insn actually into a delay slot. +(define_attr "delay_slot_present" "no,yes" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_string "no")] + (const_string "yes"))) + +; We can't use get_attr_length (NEXT_INSN (insn)) because this gives the +; length of a different insn with the same uid. +(define_attr "delay_slot_length" "" + (cond [(match_test "NEXT_INSN (PREV_INSN (insn)) == insn") + (const_int 0)] + (symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn))) + - get_attr_length (insn)"))) + + +(define_attr "enabled" "no,yes" (const_string "yes")) + +(define_attr "predicable" "no,yes" (const_string "no")) +;; if 'predicable' were not so brain-dead, we would specify: +;; (cond [(eq_attr "cond" "!canuse") (const_string "no") +;; (eq_attr "iscompact" "maybe") (const_string "no")] +;; (const_string "yes")) +;; and then for everything but calls, we could just set the cond attribute. + +;; Condition codes: this one is used by final_prescan_insn to speed up +;; conditionalizing instructions. It saves having to scan the rtl to see if +;; it uses or alters the condition codes. + +;; USE: This insn uses the condition codes (eg: a conditional branch). +;; CANUSE: This insn can use the condition codes (for conditional execution). +;; SET: All condition codes are set by this insn. +;; SET_ZN: the Z and N flags are set by this insn. +;; SET_ZNC: the Z, N, and C flags are set by this insn. +;; CLOB: The condition codes are set to unknown values by this insn. +;; NOCOND: This insn can't use and doesn't affect the condition codes. + +(define_attr "cond" "use,canuse,canuse_limm,canuse_limm_add,set,set_zn,clob,nocond" + (cond + [(and (eq_attr "predicable" "yes") + (eq_attr "is_sfunc" "no") + (eq_attr "delay_slot_filled" "no")) + (const_string "canuse") + + (eq_attr "type" "call") + (cond [(eq_attr "delay_slot_filled" "yes") (const_string "nocond") + (match_test "!flag_pic") (const_string "canuse_limm")] + (const_string "nocond")) + + (eq_attr "iscompact" "maybe,false") + (cond [ (and (eq_attr "type" "move") + (match_operand 1 "immediate_operand" "")) + (if_then_else + (ior (match_operand 1 "u6_immediate_operand" "") + (match_operand 1 "long_immediate_operand" "")) + (const_string "canuse") + (const_string "canuse_limm")) + + (eq_attr "type" "binary") + (cond [(ne (symbol_ref "REGNO (operands[0])") + (symbol_ref "REGNO (operands[1])")) + (const_string "nocond") + (match_operand 2 "register_operand" "") + (const_string "canuse") + (match_operand 2 "u6_immediate_operand" "") + (const_string "canuse") + (match_operand 2 "long_immediate_operand" "") + (const_string "canuse") + (match_operand 2 "const_int_operand" "") + (const_string "canuse_limm")] + (const_string "nocond")) + + (eq_attr "type" "compare") + (const_string "set") + + (eq_attr "type" "cmove,branch") + (const_string "use") + + (eq_attr "is_sfunc" "yes") + (cond [(match_test "(TARGET_MEDIUM_CALLS + && !TARGET_LONG_CALLS_SET + && flag_pic)") + (const_string "canuse_limm_add") + (match_test "(TARGET_MEDIUM_CALLS + && !TARGET_LONG_CALLS_SET)") + (const_string "canuse_limm")] + (const_string "canuse")) + + ] + + (const_string "nocond"))] + + (cond [(eq_attr "type" "compare") + (const_string "set") + + (eq_attr "type" "cmove,branch") + (const_string "use") + + ] + + (const_string "nocond")))) + +/* ??? Having all these patterns gives ifcvt more freedom to generate + inefficient code. It seem to operate on the premise that + register-register copies and registers are free. I see better code + with -fno-if-convert now than without. */ +(define_cond_exec + [(match_operator 0 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)])] + "true" + "") + +;; Length (in # of bytes, long immediate constants counted too). +;; ??? There's a nasty interaction between the conditional execution fsm +;; and insn lengths: insns with shimm values cannot be conditionally executed. +(define_attr "length" "" + (cond + [(eq_attr "iscompact" "true,maybe") + (cond + [(eq_attr "type" "sfunc") + (cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") + (const_int 12)] + (const_int 10)) + (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)] + (const_int 2)) + + (eq_attr "iscompact" "true_limm,maybe_limm") + (const_int 6) + + (eq_attr "type" "load") + (if_then_else + (match_operand 1 "long_immediate_loadstore_operand" "") + (const_int 8) (const_int 4)) + + (eq_attr "type" "store") + (if_then_else + (ior (match_operand 0 "long_immediate_loadstore_operand" "") + (match_operand 1 "immediate_operand" "")) + (const_int 8) (const_int 4)) + + (eq_attr "type" "move,unary") + (cond + [(match_operand 1 "u6_immediate_operand" "") (const_int 4) + (match_operand 1 "register_operand" "") (const_int 4) + (match_operand 1 "long_immediate_operand" "") (const_int 8) + (match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 8)] + (const_int 4)) + + (and (eq_attr "type" "shift") + (match_operand 1 "immediate_operand")) + (const_int 8) + (eq_attr "type" "binary,shift") + (if_then_else + (ior (match_operand 2 "long_immediate_operand" "") + (and (ne (symbol_ref "REGNO (operands[0])") + (symbol_ref "REGNO (operands[1])")) + (eq (match_operand 2 "u6_immediate_operand" "") + (const_int 0)))) + + (const_int 8) (const_int 4)) + + (eq_attr "type" "cmove") + (if_then_else (match_operand 1 "register_operand" "") + (const_int 4) (const_int 8)) + + (eq_attr "type" "call_no_delay_slot") (const_int 8) + ] + + (const_int 4)) +) + +;; The length here is the length of a single asm. Unfortunately it might be +;; 4 or 8 so we must allow for 8. That's ok though. How often will users +;; lament asm's not being put in delay slots? +;; +(define_asm_attributes + [(set_attr "length" "8") + (set_attr "type" "multi") + (set_attr "cond" "clob") ]) + +;; Delay slots. +;; The first two cond clauses and the default are necessary for correctness; +;; the remaining cond clause is mainly an optimization, as otherwise nops +;; would be inserted; however, if we didn't do this optimization, we would +;; have to be more conservative in our length calculations. + +(define_attr "in_delay_slot" "false,true" + (cond [(eq_attr "type" "uncond_branch,jump,branch, + call,sfunc,call_no_delay_slot, + brcc, brcc_no_delay_slot,loop_setup,loop_end") + (const_string "false") + (match_test "arc_write_ext_corereg (insn)") + (const_string "false") + (gt (symbol_ref "arc_hazard (prev_active_insn (insn), + next_active_insn (insn))") + (symbol_ref "(arc_hazard (prev_active_insn (insn), insn) + + arc_hazard (insn, next_active_insn (insn)))")) + (const_string "false") + (eq_attr "iscompact" "maybe") (const_string "true") + ] + + (if_then_else (eq_attr "length" "2,4") + (const_string "true") + (const_string "false")))) + +; must not put an insn inside that refers to blink. +(define_attr "in_call_delay_slot" "false,true" + (cond [(eq_attr "in_delay_slot" "false") + (const_string "false") + (match_test "arc_regno_use_in (RETURN_ADDR_REGNUM, PATTERN (insn))") + (const_string "false")] + (const_string "true"))) + +(define_attr "in_sfunc_delay_slot" "false,true" + (cond [(eq_attr "in_call_delay_slot" "false") + (const_string "false") + (match_test "arc_regno_use_in (12, PATTERN (insn))") + (const_string "false")] + (const_string "true"))) + +;; Instructions that we can put into a delay slot and conditionalize. +(define_attr "cond_delay_insn" "no,yes" + (cond [(eq_attr "cond" "!canuse") (const_string "no") + (eq_attr "type" "call,branch,uncond_branch,jump,brcc") + (const_string "no") + (eq_attr "length" "2,4") (const_string "yes")] + (const_string "no"))) + +(define_attr "in_ret_delay_slot" "no,yes" + (cond [(eq_attr "in_delay_slot" "false") + (const_string "no") + (match_test "regno_clobbered_p + (arc_return_address_regs + [arc_compute_function_type (cfun)], + insn, SImode, 1)") + (const_string "no")] + (const_string "yes"))) + +(define_attr "cond_ret_delay_insn" "no,yes" + (cond [(eq_attr "in_ret_delay_slot" "no") (const_string "no") + (eq_attr "cond_delay_insn" "no") (const_string "no")] + (const_string "yes"))) + +(define_attr "annul_ret_delay_insn" "no,yes" + (cond [(eq_attr "cond_ret_delay_insn" "yes") (const_string "yes") + (match_test "TARGET_AT_DBR_CONDEXEC") (const_string "no") + (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc") + (const_string "yes")] + (const_string "no"))) + + +;; Delay slot definition for ARCompact ISA +;; ??? FIXME: +;; When outputting an annul-true insn elegible for cond-exec +;; in a cbranch delay slot, unless optimizing for size, we use cond-exec +;; for ARC600; we could also use this for ARC700 if the branch can't be +;; unaligned and is at least somewhat likely (add parameter for this). + +(define_delay (eq_attr "type" "call") + [(eq_attr "in_call_delay_slot" "true") + (eq_attr "in_call_delay_slot" "true") + (nil)]) + +(define_delay (and (match_test "!TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "brcc")) + [(eq_attr "in_delay_slot" "true") + (eq_attr "in_delay_slot" "true") + (nil)]) + +(define_delay (and (match_test "TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "brcc")) + [(eq_attr "in_delay_slot" "true") + (nil) + (nil)]) + +(define_delay + (eq_attr "type" "return") + [(eq_attr "in_ret_delay_slot" "yes") + (eq_attr "annul_ret_delay_insn" "yes") + (eq_attr "cond_ret_delay_insn" "yes")]) + +;; For ARC600, unexposing the delay sloy incurs a penalty also in the +;; non-taken case, so the only meaningful way to have an annull-true +;; filled delay slot is to conditionalize the delay slot insn. +(define_delay (and (match_test "TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "branch,uncond_branch,jump") + (match_test "!optimize_size")) + [(eq_attr "in_delay_slot" "true") + (eq_attr "cond_delay_insn" "yes") + (eq_attr "cond_delay_insn" "yes")]) + +;; For ARC700, anything goes for annulled-true insns, since there is no +;; penalty for the unexposed delay slot when the branch is not taken, +;; however, we must avoid things that have a delay slot themselvese to +;; avoid confusing gcc. +(define_delay (and (match_test "!TARGET_AT_DBR_CONDEXEC") + (eq_attr "type" "branch,uncond_branch,jump") + (match_test "!optimize_size")) + [(eq_attr "in_delay_slot" "true") + (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc") + (eq_attr "cond_delay_insn" "yes")]) + +;; -mlongcall -fpic sfuncs use r12 to load the function address +(define_delay (eq_attr "type" "sfunc") + [(eq_attr "in_sfunc_delay_slot" "true") + (eq_attr "in_sfunc_delay_slot" "true") + (nil)]) +;; ??? need to use a working strategy for canuse_limm: +;; - either canuse_limm is not eligible for delay slots, and has no +;; delay slots, or arc_reorg has to treat them as nocond, or it has to +;; somehow modify them to become inelegible for delay slots if a decision +;; is made that makes conditional execution required. + +(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac" + (const + (cond [(symbol_ref "arc_tune == TUNE_ARC600") + (const_string "arc600") + (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") + (const_string "arc700_4_2_std") + (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") + (const_string "arc700_4_2_xmac")] + (const_string "none")))) + +(define_attr "tune_arc700" "false,true" + (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac") + (const_string "true") + (const_string "false"))) + +;; Move instructions. +(define_expand "movqi" + [(set (match_operand:QI 0 "move_dest_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, QImode)) DONE;") + +; In order to allow the ccfsm machinery to do its work, the leading compact +; alternatives say 'canuse' - there is another alternative that will match +; when the condition codes are used. +; Rcq won't match if the condition is actually used; to avoid a spurious match +; via q, q is inactivated as constraint there. +; Likewise, the length of an alternative that might be shifted to conditional +; execution must reflect this, lest out-of-range branches are created. +; The iscompact attribute allows the epilogue expander to know for which +; insns it should lengthen the return insn. +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w, w,Rcq,S,!*x,r,m,???m") + (match_operand:QI 1 "move_src_operand" "cL,cP,Rcq#q,cL,I,?Rac,?i,T,Rcq,Usd,m,c,?Rac"))] + "register_operand (operands[0], QImode) + || register_operand (operands[1], QImode)" + "@ + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%S1 + ldb%? %0,%1%& + stb%? %1,%0%& + ldb%? %0,%1%& + ldb%U1%V1 %0,%1 + stb%U0%V0 %1,%0 + stb%U0%V0 %1,%0" + [(set_attr "type" "move,move,move,move,move,move,move,load,store,load,load,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,true,true,true,false,false,false") + (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,no,no,no,no,no,no")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "move_dest_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, HImode)) DONE;") + +(define_insn "*movhi_insn" + [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w,Rcq#q,w,Rcq,S,r,m,???m,VUsc") + (match_operand:HI 1 "move_src_operand" "cL,cP,Rcq#q,cL,I,?Rac, ?i,?i,T,Rcq,m,c,?Rac,i"))] + "register_operand (operands[0], HImode) + || register_operand (operands[1], HImode) + || (CONSTANT_P (operands[1]) + /* Don't use a LIMM that we could load with a single insn - we loose + delay-slot filling opportunities. */ + && !satisfies_constraint_I (operands[1]) + && satisfies_constraint_Usc (operands[0]))" + "@ + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1%& + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%1 + mov%? %0,%S1%& + mov%? %0,%S1 + ldw%? %0,%1%& + stw%? %1,%0%& + ldw%U1%V1 %0,%1 + stw%U0%V0 %1,%0 + stw%U0%V0 %1,%0 + stw%U0%V0 %S1,%0" + [(set_attr "type" "move,move,move,move,move,move,move,move,load,store,load,store,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,maybe_limm,false,true,true,false,false,false,false") + (set_attr "predicable" "yes,no,yes,yes,no,yes,yes,yes,no,no,no,no,no,no")]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "move_dest_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + "if (prepare_move_operands (operands, SImode)) DONE;") + +; In order to allow the ccfsm machinery to do its work, the leading compact +; alternatives say 'canuse' - there is another alternative that will match +; when the condition codes are used. +; Rcq won't match if the condition is actually used; to avoid a spurious match +; via q, q is inactivated as constraint there. +; Likewise, the length of an alternative that might be shifted to conditional +; execution must reflect this, lest out-of-range branches are created. +; the iscompact attribute allows the epilogue expander to know for which +; insns it should lengthen the return insn. +; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc . +(define_insn "*movsi_insn" + [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w, w,???w, ?w, w,Rcq#q, w,Rcq, S,Us<,RcqRck,!*x,r,m,???m,VUsc") + (match_operand:SI 1 "move_src_operand" " cL,cP,Rcq#q,cL,I,Crr,?Rac,Cpc,Clb,?Cal,?Cal,T,Rcq,RcqRck,Us>,Usd,m,c,?Rac,C32"))] + "register_operand (operands[0], SImode) + || register_operand (operands[1], SImode) + || (CONSTANT_P (operands[1]) + /* Don't use a LIMM that we could load with a single insn - we loose + delay-slot filling opportunities. */ + && !satisfies_constraint_I (operands[1]) + && satisfies_constraint_Usc (operands[0]))" + "@ + mov%? %0,%1%& ;0 + mov%? %0,%1%& ;1 + mov%? %0,%1%& ;2 + mov%? %0,%1 ;3 + mov%? %0,%1 ;4 + ror %0,((%1*2+1) & 0x3f) ;5 + mov%? %0,%1 ;6 + add %0,%S1 ;7 + * return arc_get_unalign () ? \"add %0,pcl,%1-.+2\" : \"add %0,pcl,%1-.\"; + mov%? %0,%S1%& ;9 + mov%? %0,%S1 ;10 + ld%? %0,%1%& ;11 + st%? %1,%0%& ;12 + * return arc_short_long (insn, \"push%? %1%&\", \"st%U0 %1,%0%&\"); + * return arc_short_long (insn, \"pop%? %0%&\", \"ld%U1 %0,%1%&\"); + ld%? %0,%1%& ;15 + ld%U1%V1 %0,%1 ;16 + st%U0%V0 %1,%0 ;17 + st%U0%V0 %1,%0 ;18 + st%U0%V0 %S1,%0 ;19" + [(set_attr "type" "move,move,move,move,move,two_cycle_core,move,binary,binary,move,move,load,store,store,load,load,load,store,store,store") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,maybe_limm,false,true,true,true,true,true,false,false,false,false") + ; Use default length for iscompact to allow for COND_EXEC. But set length + ; of Crr to 4. + (set_attr "length" "*,*,*,4,4,4,4,8,8,*,8,*,*,*,*,*,*,*,*,8") + (set_attr "predicable" "yes,no,yes,yes,no,no,yes,no,no,yes,yes,no,no,no,no,no,no,no,no,no")]) + +;; Sometimes generated by the epilogue code. We don't want to +;; recognize these addresses in general, because the limm is costly, +;; and we can't use them for stores. */ +(define_insn "*movsi_pre_mod" + [(set (match_operand:SI 0 "register_operand" "=w") + (mem:SI (pre_modify + (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand 1 "immediate_operand" "Cal")))))] + "reload_completed" + "ld.a %0,[sp,%1]" + [(set_attr "type" "load") + (set_attr "length" "8")]) + +;; Store a value to directly to memory. The location might also be cached. +;; Since the cached copy can cause a write-back at unpredictable times, +;; we first write cached, then we write uncached. +(define_insn "store_direct" + [(set (match_operand:SI 0 "move_dest_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "c")] + UNSPEC_DIRECT))] + "" + "st%U0 %1,%0\;st%U0.di %1,%0" + [(set_attr "type" "store")]) + +(define_insn_and_split "*movsi_set_cc_insn" + [(set (match_operand:CC_ZN 2 "cc_set_register" "") + (match_operator 3 "zn_compare_operator" + [(match_operand:SI 1 "nonmemory_operand" "cI,cL,Cal") (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (match_dup 1))] + "" + "mov%?.f %0,%S1" + ; splitting to 'tst' allows short insns and combination into brcc. + "reload_completed && operands_match_p (operands[0], operands[1])" + [(set (match_dup 2) (match_dup 3))] + "" + [(set_attr "type" "compare") + (set_attr "predicable" "no,yes,yes") + (set_attr "cond" "set_zn") + (set_attr "length" "4,4,8")]) + +(define_insn "unary_comparison" + [(set (match_operand:CC_ZN 0 "cc_set_register" "") + (match_operator 3 "zn_compare_operator" + [(match_operator:SI 2 "unary_operator" + [(match_operand:SI 1 "register_operand" "c")]) + (const_int 0)]))] + "" + "%O2.f 0,%1" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn")]) + + +; this pattern is needed by combiner for cases like if (c=(~b)) { ... } +(define_insn "*unary_comparison_result_used" + [(set (match_operand 2 "cc_register" "") + (match_operator 4 "zn_compare_operator" + [(match_operator:SI 3 "unary_operator" + [(match_operand:SI 1 "register_operand" "c")]) + (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w") + (match_dup 3))] + "" + "%O3.f %0,%1" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn") + (set_attr "length" "4")]) + +(define_insn "*tst" + [(set + (match_operand 0 "cc_register" "") + (match_operator 3 "zn_compare_operator" + [(and:SI + (match_operand:SI 1 "register_operand" + "%Rcq,Rcq, c, c, c, c, c, c") + (match_operand:SI 2 "nonmemory_operand" + " Rcq,C0p,cI,cL,C1p,Ccp,CnL,Cal")) + (const_int 0)]))] + "(register_operand (operands[1], SImode) + && nonmemory_operand (operands[2], SImode)) + || (memory_operand (operands[1], SImode) + && satisfies_constraint_Cux (operands[2]))" + "* + switch (which_alternative) + { + case 0: case 2: case 3: case 7: + return \"tst%? %1,%2\"; + case 1: + return \"btst%? %1,%z2\"; + case 4: + return \"bmsk%?.f 0,%1,%Z2%&\"; + case 5: + return \"bclr%?.f 0,%1,%M2%&\"; + case 6: + return \"bic%?.f 0,%1,%n2-1\"; + default: + gcc_unreachable (); + } + " + [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false") + (set_attr "type" "compare") + (set_attr "length" "*,*,4,4,4,4,4,8") + (set_attr "predicable" "no,yes,no,yes,no,no,no,yes") + (set_attr "cond" "set_zn")]) + +(define_insn "*commutative_binary_comparison" + [(set (match_operand:CC_ZN 0 "cc_set_register" "") + (match_operator 5 "zn_compare_operator" + [(match_operator:SI 4 "commutative_operator" + [(match_operand:SI 1 "register_operand" "%c,c,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + (clobber (match_scratch:SI 3 "=X,1,X"))] + "" + "%O4.f 0,%1,%2" + [(set_attr "type" "compare") + (set_attr "cond" "set_zn") + (set_attr "length" "4,4,8")]) + +; for flag setting 'add' instructions like if (a+b) { ...} +; the combiner needs this pattern +(define_insn "*addsi_compare" + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_operand:SI 0 "register_operand" "c") + (neg:SI (match_operand:SI 1 "register_operand" "c"))))] + "" + "add.f 0,%0,%1" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4")]) + +; for flag setting 'add' instructions like if (a+b < a) { ...} +; the combiner needs this pattern +(define_insn "addsi_compare_2" + [(set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c,c") + (match_operand:SI 1 "nonmemory_operand" "cL,Cal")) + (match_dup 0)))] + "" + "add.f 0,%0,%1" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,8")]) + +(define_insn "*addsi_compare_3" + [(set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c") + (match_operand:SI 1 "register_operand" "c")) + (match_dup 1)))] + "" + "add.f 0,%0,%1" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4")]) + +; this pattern is needed by combiner for cases like if (c=a+b) { ... } +(define_insn "*commutative_binary_comparison_result_used" + [(set (match_operand 3 "cc_register" "") + (match_operator 5 "zn_compare_operator" + ; We can accept any commutative operator except mult because + ; our 'w' class below could try to use LP_COUNT. + [(match_operator:SI 4 "commutative_operator_sans_mult" + [(match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (match_dup 4))] + "" + "%O4.f %0,%1,%2 ; non-mult commutative" + [(set_attr "type" "compare,compare,compare") + (set_attr "cond" "set_zn,set_zn,set_zn") + (set_attr "length" "4,4,8")]) + +; a MULT-specific version of this pattern to avoid touching the +; LP_COUNT register +(define_insn "*commutative_binary_mult_comparison_result_used" + [(set (match_operand 3 "cc_register" "") + (match_operator 5 "zn_compare_operator" + [(match_operator:SI 4 "mult_operator" + [(match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")]) + (const_int 0)])) + ; Make sure to use the W class to not touch LP_COUNT. + (set (match_operand:SI 0 "register_operand" "=W,W,W") + (match_dup 4))] + "TARGET_ARC700" + "%O4.f %0,%1,%2 ; mult commutative" + [(set_attr "type" "compare,compare,compare") + (set_attr "cond" "set_zn,set_zn,set_zn") + (set_attr "length" "4,4,8")]) + +; this pattern is needed by combiner for cases like if (c=a<= 0 + && INTVAL (operands[2]) <= 65535) + { + emit_insn (gen_umul_600 (operands[1], operands[2], + gen_acc2 (), gen_acc1 ())); + emit_move_insn (operands[0], gen_acc2 ()); + DONE; + } + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_umul_600 (operands[1], operands[2], + gen_acc2 (), gen_acc1 ())); + emit_insn (gen_mac_600 (operands[1], operands[2], + gen_acc2 (), gen_acc1 ())); + emit_move_insn (operands[0], gen_acc2 ()); + DONE; + } + else + { + emit_move_insn (gen_rtx_REG (SImode, R0_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R1_REG), operands[2]); + emit_insn (gen_mulsi3_600_lib ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, R0_REG)); + DONE; + } +}) + +; mululw conditional execution without a LIMM clobbers an input register; +; we'd need a different pattern to describe this. +; To make the conditional execution valid for the LIMM alternative, we +; have to emit the LIMM before the register operand. +(define_insn "umul_600" + [(set (match_operand:SI 2 "acc2_operand" "") + (mult:SI (match_operand:SI 0 "register_operand" "c,c,c") + (zero_extract:SI (match_operand:SI 1 "nonmemory_operand" + "c,L,Cal") + (const_int 16) + (const_int 0)))) + (clobber (match_operand:SI 3 "acc1_operand" ""))] + "TARGET_MULMAC_32BY16_SET" + "@mululw 0, %0, %1 + mululw 0, %0, %1 + mululw%? 0, %1, %0" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600, mulmac_600, mulmac_600") + (set_attr "predicable" "no, no, yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + +(define_insn "mac_600" + [(set (match_operand:SI 2 "acc2_operand" "") + (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "c,c,c") + (ashift:SI + (zero_extract:SI (match_operand:SI 1 "nonmemory_operand" "c,L,Cal") + (const_int 16) + (const_int 16)) + (const_int 16))) + (match_dup 2))) + (clobber (match_operand:SI 3 "acc1_operand" ""))] + "TARGET_MULMAC_32BY16_SET" + "machlw%? 0, %0, %1" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600, mulmac_600, mulmac_600") + (set_attr "predicable" "no, no, yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + +(define_insn "mulsi_600" + [(set (match_operand:SI 2 "mlo_operand" "") + (mult:SI (match_operand:SI 0 "register_operand" "Rcq#q,c,c,%c") + (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,Cal"))) + (clobber (match_operand:SI 3 "mhi_operand" ""))] + "TARGET_MUL64_SET" +; The assembler mis-assembles mul64 / mulu64 with "I" constraint constants, +; using a machine code pattern that only allows "L" constraint constants. +; "mul64%? \t0, %0, %1%&" +{ + if (satisfies_constraint_I (operands[1]) + && !satisfies_constraint_L (operands[1])) + { + /* MUL64 <0,>b,s12 00101bbb10000100 0BBBssssssSSSSSS */ + int n = true_regnum (operands[0]); + int i = INTVAL (operands[1]); + asm_fprintf (asm_out_file, "\t.short %d`", 0x2884 + ((n & 7) << 8)); + asm_fprintf (asm_out_file, "\t.short %d`", + ((i & 0x3f) << 6) + ((i >> 6) & 0x3f) + ((n & 070) << 9)); + return "; mul64%? \t0, %0, %1%&"; + } + return "mul64%? \t0, %0, %1%&"; +} + [(set_attr "length" "*,4,4,8") + (set_attr "iscompact" "maybe,false,false,false") + (set_attr "type" "multi,multi,multi,multi") + (set_attr "predicable" "yes,yes,no,yes") + (set_attr "cond" "canuse,canuse,canuse_limm,canuse")]) + +; If we compile without an mul option enabled, but link with libraries +; for a mul option, we'll see clobbers of multiplier output registers. +; There is also an implementation using norm that clobbers the loop registers. +(define_insn "mulsi3_600_lib" + [(set (reg:SI R0_REG) + (mult:SI (reg:SI R0_REG) (reg:SI R1_REG))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:SI LP_COUNT)) + (clobber (reg:SI LP_START)) + (clobber (reg:SI LP_END)) + (clobber (reg:CC CC_REG))] + "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__mulsi3\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_insn "mulsidi_600" + [(set (reg:DI MUL64_OUT_REG) + (mult:DI (sign_extend:DI + (match_operand:SI 0 "register_operand" "Rcq#q,c,c,%c")) + (sign_extend:DI +; assembler issue for "I", see mulsi_600 +; (match_operand:SI 1 "register_operand" "Rcq#q,cL,I,Cal"))))] + (match_operand:SI 1 "register_operand" "Rcq#q,cL,L,C32"))))] + "TARGET_MUL64_SET" + "mul64%? \t0, %0, %1%&" + [(set_attr "length" "*,4,4,8") + (set_attr "iscompact" "maybe,false,false,false") + (set_attr "type" "multi,multi,multi,multi") + (set_attr "predicable" "yes,yes,no,yes") + (set_attr "cond" "canuse,canuse,canuse_limm,canuse")]) + +(define_insn "umulsidi_600" + [(set (reg:DI MUL64_OUT_REG) + (mult:DI (zero_extend:DI + (match_operand:SI 0 "register_operand" "c,c,%c")) + (sign_extend:DI +; assembler issue for "I", see mulsi_600 +; (match_operand:SI 1 "register_operand" "cL,I,Cal"))))] + (match_operand:SI 1 "register_operand" "cL,L,C32"))))] + "TARGET_MUL64_SET" + "mulu64%? \t0, %0, %1%&" + [(set_attr "length" "4,4,8") + (set_attr "iscompact" "false") + (set_attr "type" "umulti") + (set_attr "predicable" "yes,no,yes") + (set_attr "cond" "canuse,canuse_limm,canuse")]) + +; ARC700 mpy* instructions: This is a multi-cycle extension, and thus 'w' +; may not be used as destination constraint. + +; The result of mpy and mpyu is the same except for flag setting (if enabled), +; but mpyu is faster for the standard multiplier. +; Note: we must make sure LP_COUNT is not one of the destination +; registers, since it cannot be the destination of a multi-cycle insn +; like MPY or MPYU. +(define_insn "mulsi3_700" + [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=Rcr,r,r,Rcr,r") + (mult:SI (match_operand:SI 1 "register_operand" " 0,c,0,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))] +"TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyu%? %0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "umulti") + (set_attr "predicable" "yes,no,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")]) + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (mult:DI (sign_extend:DI(match_operand:SI 1 "register_operand" "")) + (sign_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))] + "(TARGET_ARC700 && !TARGET_NOMPY_SET) + || TARGET_MUL64_SET + || TARGET_MULMAC_32BY16_SET" +" +{ + if (TARGET_ARC700 && !TARGET_NOMPY_SET) + { + operands[2] = force_reg (SImode, operands[2]); + if (!register_operand (operands[0], DImode)) + { + rtx result = gen_reg_rtx (DImode); + + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_mulsidi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], result); + DONE; + } + } + else if (TARGET_MUL64_SET) + { + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_mulsidi_600 (operands[1], operands[2])); + emit_move_insn (operands[0], gen_rtx_REG (DImode, MUL64_OUT_REG)); + DONE; + } + else if (TARGET_MULMAC_32BY16_SET) + { + rtx result_hi = gen_highpart(SImode, operands[0]); + rtx result_low = gen_lowpart(SImode, operands[0]); + + emit_insn (gen_mul64_600 (operands[1], operands[2])); + emit_insn (gen_mac64_600 (result_hi, operands[1], operands[2])); + emit_move_insn (result_low, gen_acc2 ()); + DONE; + } +}") + +(define_insn "mul64_600" + [(set (reg:DI 56) + (mult:DI (sign_extend:DI (match_operand:SI 0 "register_operand" + "c,c,c")) + (zero_extract:DI (match_operand:SI 1 "nonmemory_operand" + "c,L,Cal") + (const_int 16) + (const_int 0)))) + ] + "TARGET_MULMAC_32BY16_SET" + "mullw%? 0, %0, %1" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + +;; ??? check if this is canonical rtl +(define_insn "mac64_600" + [(set (reg:DI 56) + (plus:DI + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "c,c,c")) + (ashift:DI + (sign_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal") + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56))) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (zero_extract:SI + (plus:DI + (mult:DI (sign_extend:DI (match_dup 1)) + (ashift:DI + (sign_extract:DI (match_dup 2) + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56)) + (const_int 32) (const_int 32)))] + "TARGET_MULMAC_32BY16_SET" + "machlw%? %0, %1, %2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + +;; DI <- DI(signed SI) * DI(signed SI) +(define_insn_and_split "mulsidi3_700" + [(set (match_operand:DI 0 "register_operand" "=&r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%c")) + (sign_extend:DI (match_operand:SI 2 "extend_operand" "cL"))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "#" + "&& reload_completed" + [(const_int 0)] +{ + int hi = TARGET_BIG_ENDIAN ? 0 : UNITS_PER_WORD; + int lo = TARGET_BIG_ENDIAN ? UNITS_PER_WORD : 0; + rtx l0 = simplify_gen_subreg (word_mode, operands[0], DImode, lo); + rtx h0 = simplify_gen_subreg (word_mode, operands[0], DImode, hi); + emit_insn (gen_mulsi3_highpart (h0, operands[1], operands[2])); + emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2])); + DONE; +} + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +(define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) + (sign_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i"))) + (const_int 32))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyh%? %0,%1,%2" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse,nocond")]) + +; Note that mpyhu has the same latency as mpy / mpyh, +; thus we use the type multi. +(define_insn "*umulsi3_highpart_i" + [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) + (zero_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i"))) + (const_int 32))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyhu%? %0,%1,%2" + [(set_attr "length" "4,4,8,8") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse,nocond")]) + +; Implementations include additional labels for umulsidi3, so we got all +; the same clobbers - plus one for the result low part. */ +(define_insn "umulsi3_highpart_600_lib_le" + [(set (reg:SI R1_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG))) + (const_int 32)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R0_REG)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))] + "!TARGET_BIG_ENDIAN + && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__umulsi3_highpart\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_insn "umulsi3_highpart_600_lib_be" + [(set (reg:SI R0_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG))) + (const_int 32)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R1_REG)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))] + "TARGET_BIG_ENDIAN + && !TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__umulsi3_highpart\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +;; (zero_extend:DI (const_int)) leads to internal errors in combine, so we +;; need a separate pattern for immediates +;; ??? This is fine for combine, but not for reload. +(define_insn "umulsi3_highpart_int" + [(set (match_operand:SI 0 "register_operand" "=Rcr, r, r,Rcr, r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, c, 0, 0, c")) + (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal")) + (const_int 32))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "mpyhu%? %0,%1,%2" + [(set_attr "length" "4,4,4,8,8") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no,no,yes,no") + (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")]) + +(define_expand "umulsi3_highpart" + [(set (match_operand:SI 0 "general_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "nonmemory_operand" ""))) + (const_int 32))))] + "TARGET_ARC700 || (!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET)" + " +{ + rtx target = operands[0]; + + if (!TARGET_ARC700 || TARGET_NOMPY_SET) + { + emit_move_insn (gen_rtx_REG (SImode, 0), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 1), operands[2]); + if (TARGET_BIG_ENDIAN) + emit_insn (gen_umulsi3_highpart_600_lib_be ()); + else + emit_insn (gen_umulsi3_highpart_600_lib_le ()); + emit_move_insn (target, gen_rtx_REG (SImode, 0)); + DONE; + } + + if (!register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + operands[2] = simplify_const_unary_operation (ZERO_EXTEND, DImode, + operands[2], SImode); + else if (!immediate_operand (operands[2], SImode)) + operands[2] = gen_rtx_ZERO_EXTEND (DImode, operands[2]); + emit_insn (gen_umulsi3_highpart_int (target, operands[1], operands[2])); + if (target != operands[0]) + emit_move_insn (operands[0], target); + DONE; +}") + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (mult:DI (zero_extend:DI(match_operand:SI 1 "register_operand" "")) + (zero_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))] + "" +{ + if (TARGET_ARC700 && !TARGET_NOMPY_SET) + { + operands[2] = force_reg (SImode, operands[2]); + if (!register_operand (operands[0], DImode)) + { + rtx result = gen_reg_rtx (DImode); + + emit_insn (gen_umulsidi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], result); + DONE; + } + } + else if (TARGET_MUL64_SET) + { + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_umulsidi_600 (operands[1], operands[2])); + emit_move_insn (operands[0], gen_rtx_REG (DImode, MUL64_OUT_REG)); + DONE; + } + else if (TARGET_MULMAC_32BY16_SET) + { + rtx result_hi = gen_reg_rtx (SImode); + rtx result_low = gen_reg_rtx (SImode); + + result_hi = gen_highpart(SImode , operands[0]); + result_low = gen_lowpart(SImode , operands[0]); + + emit_insn (gen_umul64_600 (operands[1], operands[2])); + emit_insn (gen_umac64_600 (result_hi, operands[1], operands[2])); + emit_move_insn (result_low, gen_acc2 ()); + DONE; + } + else + { + emit_move_insn (gen_rtx_REG (SImode, R0_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R1_REG), operands[2]); + emit_insn (gen_umulsidi3_600_lib ()); + emit_move_insn (operands[0], gen_rtx_REG (DImode, R0_REG)); + DONE; + } +}) + +(define_insn "umul64_600" + [(set (reg:DI 56) + (mult:DI (zero_extend:DI (match_operand:SI 0 "register_operand" + "c,c,c")) + (zero_extract:DI (match_operand:SI 1 "nonmemory_operand" + "c,L,Cal") + (const_int 16) + (const_int 0)))) + ] + "TARGET_MULMAC_32BY16_SET" + "@mululw 0, %0, %1 + mululw 0, %0, %1 + mululw%? 0, %1, %0" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + +(define_insn "umac64_600" + [(set (reg:DI 56) + (plus:DI + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "c,c,c")) + (ashift:DI + (zero_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal") + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56))) + (set (match_operand:SI 0 "register_operand" "=w,w,w") + (zero_extract:SI + (plus:DI + (mult:DI (zero_extend:DI (match_dup 1)) + (ashift:DI + (zero_extract:DI (match_dup 2) + (const_int 16) (const_int 16)) + (const_int 16))) + (reg:DI 56)) + (const_int 32) (const_int 32)))] + "TARGET_MULMAC_32BY16_SET" + "machulw%? %0, %1, %2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mulmac_600") + (set_attr "predicable" "no,no,yes") + (set_attr "cond" "nocond, canuse_limm, canuse")]) + + + +;; DI <- DI(unsigned SI) * DI(unsigned SI) +(define_insn_and_split "umulsidi3_700" + [(set (match_operand:DI 0 "dest_reg_operand" "=&r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%c")) + (zero_extend:DI (match_operand:SI 2 "extend_operand" "cL"))))] + "TARGET_ARC700 && !TARGET_NOMPY_SET" + "#" + "reload_completed" + [(const_int 0)] +{ + int hi = !TARGET_BIG_ENDIAN; + int lo = !hi; + rtx l0 = operand_subword (operands[0], lo, 0, DImode); + rtx h0 = operand_subword (operands[0], hi, 0, DImode); + emit_insn (gen_umulsi3_highpart (h0, operands[1], operands[2])); + emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2])); + DONE; +} + [(set_attr "type" "umulti") + (set_attr "length" "8")]) + +(define_insn "umulsidi3_600_lib" + [(set (reg:DI R0_REG) + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))] + "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__umulsidi3\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_peephole2 + [(parallel + [(set (reg:DI R0_REG) + (mult:DI (zero_extend:DI (reg:SI R0_REG)) + (zero_extend:DI (reg:SI R1_REG)))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:DI R2_REG)) + (clobber (reg:SI R12_REG)) + (clobber (reg:DI MUL64_OUT_REG)) + (clobber (reg:CC CC_REG))])] + "!TARGET_MUL64_SET && !TARGET_MULMAC_32BY16_SET + && (!TARGET_ARC700 || TARGET_NOMPY_SET) + && peep2_regno_dead_p (1, TARGET_BIG_ENDIAN ? R1_REG : R0_REG)" + [(pc)] +{ + if (TARGET_BIG_ENDIAN) + emit_insn (gen_umulsi3_highpart_600_lib_be ()); + else + emit_insn (gen_umulsi3_highpart_600_lib_le ()); + DONE; +}) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + "if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2], false)) + { + operands[2]=force_reg(SImode, operands[2]); + } + else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[2], Pmode)) + { + operands[2] = force_reg (SImode, arc_rewrite_small_data (operands[2])); + } + + ") + +(define_expand "adddi3" + [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (TARGET_EXPAND_ADDDI) + { + rtx l0 = gen_lowpart (SImode, operands[0]); + rtx h0 = disi_highpart (operands[0]); + rtx l1 = gen_lowpart (SImode, operands[1]); + rtx h1 = disi_highpart (operands[1]); + rtx l2 = gen_lowpart (SImode, operands[2]); + rtx h2 = disi_highpart (operands[2]); + rtx cc_c = gen_rtx_REG (CC_Cmode, CC_REG); + + if (CONST_INT_P (h2) && INTVAL (h2) < 0 && SIGNED_INT12 (INTVAL (h2))) + { + emit_insn (gen_sub_f (l0, l1, gen_int_mode (-INTVAL (l2), SImode))); + emit_insn (gen_sbc (h0, h1, + gen_int_mode (-INTVAL (h2) - (l1 != 0), SImode), + cc_c)); + DONE; + } + emit_insn (gen_add_f (l0, l1, l2)); + emit_insn (gen_adc (h0, h1, h2)); + DONE; + } +}) + +; This assumes that there can be no strictly partial overlap between +; operands[1] and operands[2]. +(define_insn_and_split "*adddi3_i" + [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w") + (plus:DI (match_operand:DI 1 "register_operand" "%c,0,c") + (match_operand:DI 2 "nonmemory_operand" "ci,ci,!i"))) + (clobber (reg:CC CC_REG))] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + int hi = !TARGET_BIG_ENDIAN; + int lo = !hi; + rtx l0 = operand_subword (operands[0], lo, 0, DImode); + rtx h0 = operand_subword (operands[0], hi, 0, DImode); + rtx l1 = operand_subword (operands[1], lo, 0, DImode); + rtx h1 = operand_subword (operands[1], hi, 0, DImode); + rtx l2 = operand_subword (operands[2], lo, 0, DImode); + rtx h2 = operand_subword (operands[2], hi, 0, DImode); + + + if (l2 == const0_rtx) + { + if (!rtx_equal_p (l0, l1) && !rtx_equal_p (l0, h1)) + emit_move_insn (l0, l1); + emit_insn (gen_addsi3 (h0, h1, h2)); + if (!rtx_equal_p (l0, l1) && rtx_equal_p (l0, h1)) + emit_move_insn (l0, l1); + DONE; + } + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) >= -0x7fffffff) + { + emit_insn (gen_subdi3_i (operands[0], operands[1], + GEN_INT (-INTVAL (operands[2])))); + DONE; + } + if (rtx_equal_p (l0, h1)) + { + if (h2 != const0_rtx) + emit_insn (gen_addsi3 (h0, h1, h2)); + else if (!rtx_equal_p (h0, h1)) + emit_move_insn (h0, h1); + emit_insn (gen_add_f (l0, l1, l2)); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)), + gen_rtx_SET (VOIDmode, h0, plus_constant (SImode, h0, 1)))); + DONE; + } + emit_insn (gen_add_f (l0, l1, l2)); + emit_insn (gen_adc (h0, h1, h2)); + DONE; +} + [(set_attr "cond" "clob") + (set_attr "type" "binary") + (set_attr "length" "16,16,20")]) + +(define_insn "add_f" + [(set (reg:CC_C CC_REG) + (compare:CC_C + (plus:SI (match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal")) + (match_dup 1))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "add.f %0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,8")]) + +(define_insn "*add_f_2" + [(set (reg:CC_C CC_REG) + (compare:CC_C + (plus:SI (match_operand:SI 1 "register_operand" "c,0,c") + (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal")) + (match_dup 2))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "add.f %0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,8")]) + +; w/c/c comes first (rather than w/0/C_0) to prevent the middle-end +; needlessly prioritizing the matching constraint. +; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional +; execution is used where possible. +(define_insn_and_split "adc" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w") + (plus:SI (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) + (match_operand:SI 1 "nonmemory_operand" + "%c,0,c,0,cCal")) + (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + adc %0,%1,%2 + add.cs %0,%1,1 + adc %0,%1,%2 + adc %0,%1,%2 + adc %0,%1,%2" + ; if we have a bad schedule after sched2, split. + "reload_completed + && !optimize_size && TARGET_ARC700 + && arc_scheduling_not_expected () + && arc_sets_cc_p (prev_nonnote_insn (insn)) + /* If next comes a return or other insn that needs a delay slot, + expect the adc to get into the delay slot. */ + && next_nonnote_insn (insn) + && !arc_need_delay (next_nonnote_insn (insn)) + /* Restore operands before emitting. */ + && (extract_insn_cached (insn), 1)" + [(set (match_dup 0) (match_dup 3)) + (cond_exec + (ltu (reg:CC_C CC_REG) (const_int 0)) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))))] + "operands[3] = simplify_gen_binary (PLUS, SImode, operands[1], operands[2]);" + [(set_attr "cond" "use") + (set_attr "type" "cc_arith") + (set_attr "length" "4,4,4,4,8")]) + +; combiner-splitter cmp / scc -> cmp / adc +(define_split + [(set (match_operand:SI 0 "dest_reg_operand" "") + (gtu:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (clobber (reg CC_REG))] + "" + [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1))) + (set (match_dup 0) (ltu:SI (reg:CC_C CC_REG) (const_int 0)))]) + +; combine won't work when an intermediate result is used later... +; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2 +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (reg:CC_C CC_REG) + (compare:CC_C (match_dup 0) + (match_operand:SI 3 "nonmemory_operand" "")))] + "rtx_equal_p (operands[1], operands[3]) + || rtx_equal_p (operands[2], operands[3])" + [(parallel + [(set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1))) + (set (match_dup 0) + (plus:SI (match_dup 1) (match_dup 2)))])]) + +; ??? need to delve into combine to find out why this is not useful. +; We'd like to be able to grok various C idioms for carry bit usage. +;(define_insn "*adc_0" +; [(set (match_operand:SI 0 "dest_reg_operand" "=w") +; (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) +; (match_operand:SI 1 "register_operand" "c")))] +; "" +; "adc %0,%1,0" +; [(set_attr "cond" "use") +; (set_attr "type" "cc_arith") +; (set_attr "length" "4")]) +; +;(define_split +; [(set (match_operand:SI 0 "dest_reg_operand" "=w") +; (plus:SI (gtu:SI (match_operand:SI 1 "register_operand" "c") +; (match_operand:SI 2 "register_operand" "c")) +; (match_operand:SI 3 "register_operand" "c"))) +; (clobber (reg CC_REG))] +; "" +; [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1))) +; (set (match_dup 0) +; (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0)) +; (match_dup 3)))]) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + int c = 1; + + if (!register_operand (operands[2], SImode)) + { + operands[1] = force_reg (SImode, operands[1]); + c = 2; + } + if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[c], false)) + operands[c] = force_reg (SImode, operands[c]); + else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[c], Pmode)) + operands[c] = force_reg (SImode, arc_rewrite_small_data (operands[c])); +}") + +; the casesi expander might generate a sub of zero, so we have to recognize it. +; combine should make such an insn go away. +(define_insn_and_split "subsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,w,w,w, w, w, w") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0, 0, cL,c,L,I,Cal,Cal, c") + (match_operand:SI 2 "nonmemory_operand" "Rcqq, c, 0,c,c,0, 0, c,Cal")))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + sub%? %0,%1,%2%& + sub%? %0,%1,%2 + rsub%? %0,%2,%1 + sub %0,%1,%2 + rsub %0,%2,%1 + rsub %0,%2,%1 + rsub%? %0,%2,%1 + rsub %0,%2,%1 + sub %0,%1,%2" + "reload_completed && get_attr_length (insn) == 8 + && satisfies_constraint_I (operands[1]) + && GET_CODE (PATTERN (insn)) != COND_EXEC" + [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))] + "split_subsi (operands);" + [(set_attr "iscompact" "maybe,false,false,false,false,false,false,false, false") + (set_attr "length" "*,4,4,4,4,4,8,8,8") + (set_attr "predicable" "yes,yes,yes,no,no,no,yes,no,no") + (set_attr "cond" "canuse,canuse,canuse,nocond,nocond,canuse_limm,canuse,nocond,nocond")]) + +(define_expand "subdi3" + [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "") + (minus:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REG))])] + "" +{ + if (!register_operand (operands[2], DImode)) + operands[1] = force_reg (DImode, operands[1]); + if (TARGET_EXPAND_ADDDI) + { + rtx l0 = gen_lowpart (SImode, operands[0]); + rtx h0 = disi_highpart (operands[0]); + rtx l1 = gen_lowpart (SImode, operands[1]); + rtx h1 = disi_highpart (operands[1]); + rtx l2 = gen_lowpart (SImode, operands[2]); + rtx h2 = disi_highpart (operands[2]); + rtx cc_c = gen_rtx_REG (CC_Cmode, CC_REG); + + emit_insn (gen_sub_f (l0, l1, l2)); + emit_insn (gen_sbc (h0, h1, h2, cc_c)); + DONE; + } +}) + +(define_insn_and_split "subdi3_i" + [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w,w,w") + (minus:DI (match_operand:DI 1 "nonmemory_operand" "ci,0,ci,c,!i") + (match_operand:DI 2 "nonmemory_operand" "ci,ci,0,!i,c"))) + (clobber (reg:CC CC_REG))] + "register_operand (operands[1], DImode) + || register_operand (operands[2], DImode)" + "#" + "reload_completed" + [(const_int 0)] +{ + int hi = !TARGET_BIG_ENDIAN; + int lo = !hi; + rtx l0 = operand_subword (operands[0], lo, 0, DImode); + rtx h0 = operand_subword (operands[0], hi, 0, DImode); + rtx l1 = operand_subword (operands[1], lo, 0, DImode); + rtx h1 = operand_subword (operands[1], hi, 0, DImode); + rtx l2 = operand_subword (operands[2], lo, 0, DImode); + rtx h2 = operand_subword (operands[2], hi, 0, DImode); + + if (rtx_equal_p (l0, h1) || rtx_equal_p (l0, h2)) + { + h1 = simplify_gen_binary (MINUS, SImode, h1, h2); + if (!rtx_equal_p (h0, h1)) + emit_insn (gen_rtx_SET (VOIDmode, h0, h1)); + emit_insn (gen_sub_f (l0, l1, l2)); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)), + gen_rtx_SET (VOIDmode, h0, plus_constant (SImode, h0, -1)))); + DONE; + } + emit_insn (gen_sub_f (l0, l1, l2)); + emit_insn (gen_sbc (h0, h1, h2, gen_rtx_REG (CCmode, CC_REG))); + DONE; +} + [(set_attr "cond" "clob") + (set_attr "length" "16,16,16,20,20")]) + +(define_insn "*sbc_0" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (minus:SI (match_operand:SI 1 "register_operand" "c") + (ltu:SI (match_operand:CC_C 2 "cc_use_register") + (const_int 0))))] + "" + "sbc %0,%1,0" + [(set_attr "cond" "use") + (set_attr "type" "cc_arith") + (set_attr "length" "4")]) + +; w/c/c comes first (rather than Rcw/0/C_0) to prevent the middle-end +; needlessly prioritizing the matching constraint. +; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional execution +; is used where possible. +(define_insn_and_split "sbc" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w") + (minus:SI (minus:SI (match_operand:SI 1 "nonmemory_operand" + "c,0,c,0,cCal") + (ltu:SI (match_operand:CC_C 3 "cc_use_register") + (const_int 0))) + (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + sbc %0,%1,%2 + sub.cs %0,%1,1 + sbc %0,%1,%2 + sbc %0,%1,%2 + sbc %0,%1,%2" + ; if we have a bad schedule after sched2, split. + "reload_completed + && !optimize_size && TARGET_ARC700 + && arc_scheduling_not_expected () + && arc_sets_cc_p (prev_nonnote_insn (insn)) + /* If next comes a return or other insn that needs a delay slot, + expect the adc to get into the delay slot. */ + && next_nonnote_insn (insn) + && !arc_need_delay (next_nonnote_insn (insn)) + /* Restore operands before emitting. */ + && (extract_insn_cached (insn), 1)" + [(set (match_dup 0) (match_dup 4)) + (cond_exec + (ltu (reg:CC_C CC_REG) (const_int 0)) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))))] + "operands[4] = simplify_gen_binary (MINUS, SImode, operands[1], operands[2]);" + [(set_attr "cond" "use") + (set_attr "type" "cc_arith") + (set_attr "length" "4,4,4,4,8")]) + +(define_insn "sub_f" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal") + (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c"))) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w") + (minus:SI (match_dup 1) (match_dup 2)))] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "@ + sub.f %0,%1,%2 + rsub.f %0,%2,%1 + sub.f %0,%1,%2 + rsub.f %0,%2,%1 + sub.f %0,%1,%2 + sub.f %0,%1,%2" + [(set_attr "type" "compare") + (set_attr "length" "4,4,4,4,8,8")]) + +; combine won't work when an intermediate result is used later... +; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2 +(define_peephole2 + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "dest_reg_operand" "") + (minus:SI (match_dup 1) (match_dup 2)))] + "" + [(parallel + [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])]) + +(define_peephole2 + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (match_operand 3 "" "") (match_operand 4 "" "")) + (set (match_operand:SI 0 "dest_reg_operand" "") + (minus:SI (match_dup 1) (match_dup 2)))] + "!reg_overlap_mentioned_p (operands[3], operands[1]) + && !reg_overlap_mentioned_p (operands[3], operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[4]) + && !reg_overlap_mentioned_p (operands[0], operands[3])" + [(parallel + [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (match_dup 4))]) + +(define_insn "*add_n" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,W,W,w,w") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "Rcqq,c,c,c,c,c") + (match_operand:SI 2 "_2_4_8_operand" "")) + (match_operand:SI 3 "nonmemory_operand" "0,0,c,?Cal,?c,??Cal")))] + "" + "add%z2%? %0,%3,%1%&" + [(set_attr "type" "shift") + (set_attr "length" "*,4,4,8,4,8") + (set_attr "predicable" "yes,yes,no,no,no,no") + (set_attr "cond" "canuse,canuse,nocond,nocond,nocond,nocond") + (set_attr "iscompact" "maybe,false,false,false,false,false")]) + +;; N.B. sub[123] has the operands of the MINUS in the opposite order from +;; what synth_mult likes. +(define_insn "*sub_n" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal") + (mult:SI (match_operand:SI 2 "register_operand" "c,c,c") + (match_operand:SI 3 "_2_4_8_operand" ""))))] + "" + "sub%z3%? %0,%1,%2" + [(set_attr "type" "shift") + (set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond") + (set_attr "iscompact" "false")]) + +; ??? check if combine matches this. +(define_insn "*bset" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (ior:SI (ashift:SI (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) + (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + "" + "bset%? %0,%2,%1" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; ??? check if combine matches this. +(define_insn "*bxor" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (xor:SI (ashift:SI (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) + (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + "" + "bxor%? %0,%2,%1" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; ??? check if combine matches this. +(define_insn "*bclr" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (and:SI (not:SI (ashift:SI (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))) + (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + "" + "bclr%? %0,%2,%1" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; ??? FIXME: find combine patterns for bmsk. + +;;Following are the define_insns added for the purpose of peephole2's + +; see also iorsi3 for use with constant bit number. +(define_insn "*bset_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + (ashift:SI (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + "" + "@ + bset%? %0,%1,%2 ;;peep2, constr 1 + bset %0,%1,%2 ;;peep2, constr 2 + bset %0,%S1,%2 ;;peep2, constr 3" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; see also xorsi3 for use with constant bit number. +(define_insn "*bxor_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + (ashift:SI (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + "" + "@ + bxor%? %0,%1,%2 + bxor %0,%1,%2 + bxor %0,%S1,%2" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; see also andsi3 for use with constant bit number. +(define_insn "*bclr_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (and:SI (not:SI (ashift:SI (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "cL,rL,r"))) + (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))] + "" + "@ + bclr%? %0,%1,%2 + bclr %0,%1,%2 + bclr %0,%S1,%2" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +; see also andsi3 for use with constant bit number. +(define_insn "*bmsk_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + (and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + (plus:SI (ashift:SI (const_int 1) + (plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r") + (const_int 1))) + (const_int -1))))] + "" + "@ + bmsk%? %0,%S1,%2 + bmsk %0,%1,%2 + bmsk %0,%S1,%2" + [(set_attr "length" "4,4,8") + (set_attr "predicable" "yes,no,no") + (set_attr "cond" "canuse,nocond,nocond")] +) + +;;Instructions added for peephole2s end + +;; Boolean instructions. + +(define_expand "andsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + "if (!satisfies_constraint_Cux (operands[2])) + operands[1] = force_reg (SImode, operands[1]); + else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode)) + operands[1] = arc_rewrite_small_data (operands[1]);") + +(define_insn "andsi3_i" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcqq,Rcqq,Rcqq,Rcw,Rcw,Rcw,Rcw,Rcw,Rcw, w, w, w, w,w,Rcw, w, W") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,Rcq, 0, 0,Rcqq, 0, c, 0, 0, 0, 0, c, c, c, c,0, 0, c, o") + (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, C1p, Ccp, Cux, cL, 0,C1p,Ccp,CnL, I, Lc,C1p,Ccp,CnL,I,Cal,Cal,Cux")))] + "(register_operand (operands[1], SImode) + && nonmemory_operand (operands[2], SImode)) + || (memory_operand (operands[1], SImode) + && satisfies_constraint_Cux (operands[2]))" + "* +{ + switch (which_alternative) + { + case 0: case 5: case 10: case 11: case 15: case 16: case 17: + return \"and%? %0,%1,%2%&\"; + case 1: case 6: + return \"and%? %0,%2,%1%&\"; + case 2: case 7: case 12: + return \"bmsk%? %0,%1,%Z2%&\"; + case 3: case 8: case 13: + return \"bclr%? %0,%1,%M2%&\"; + case 4: + return (INTVAL (operands[2]) == 0xff + ? \"extb%? %0,%1%&\" : \"extw%? %0,%1%&\"); + case 9: case 14: return \"bic%? %0,%1,%n2-1\"; + case 18: + if (TARGET_BIG_ENDIAN) + { + rtx xop[2]; + + xop[0] = operands[0]; + xop[1] = adjust_address (operands[1], QImode, + INTVAL (operands[2]) == 0xff ? 3 : 2); + output_asm_insn (INTVAL (operands[2]) == 0xff + ? \"ldb %0,%1\" : \"ldw %0,%1\", + xop); + return \"\"; + } + return INTVAL (operands[2]) == 0xff ? \"ldb %0,%1\" : \"ldw %0,%1\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "iscompact" "maybe,maybe,maybe,maybe,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false") + (set_attr "type" "binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,load") + (set_attr "length" "*,*,*,*,*,4,4,4,4,4,4,4,4,4,4,4,8,8,*") + (set_attr "predicable" "no,no,no,no,no,yes,yes,yes,yes,yes,no,no,no,no,no,no,yes,no,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,nocond,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,nocond,nocond,canuse_limm,canuse,nocond,nocond")]) + +; combiner splitter, pattern found in ldtoa.c . +; and op3,op0,op1 / cmp op3,op2 -> add op3,op0,op4 / bmsk.f 0,op3,op1 +(define_split + [(set (reg:CC_Z CC_REG) + (compare:CC_Z (and:SI (match_operand:SI 0 "register_operand" "") + (match_operand 1 "const_int_operand" "")) + (match_operand 2 "const_int_operand" ""))) + (clobber (match_operand:SI 3 "register_operand" ""))] + "((INTVAL (operands[1]) + 1) & INTVAL (operands[1])) == 0" + [(set (match_dup 3) + (plus:SI (match_dup 0) (match_dup 4))) + (set (reg:CC_Z CC_REG) + (compare:CC_Z (and:SI (match_dup 3) (match_dup 1)) + (const_int 0)))] + "operands[4] = GEN_INT ( -(~INTVAL (operands[1]) | INTVAL (operands[2])));") + +;;bic define_insn that allows limm to be the first operand +(define_insn "*bicsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w") + (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c")) + (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))] + "" + "@ + bic%? %0, %2, %1%& ;;constraint 0 + bic%? %0,%2,%1 ;;constraint 1 + bic %0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ??? + bic%? %0,%2,%S1 ;;constraint 3, FIXME: will it ever get generated ??? + bic %0,%2,%1 ;;constraint 4 + bic %0,%2,%S1 ;;constraint 5, FIXME: will it ever get generated ??? + bic %0,%S2,%1 ;;constraint 6" + [(set_attr "length" "*,4,4,8,4,8,8") + (set_attr "iscompact" "maybe, false, false, false, false, false, false") + (set_attr "predicable" "no,yes,no,yes,no,no,no") + (set_attr "cond" "canuse,canuse,canuse_limm,canuse,nocond,nocond,nocond")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcqq,Rcw,Rcw,Rcw,Rcw,w, w,w,Rcw, w") + (ior:SI (match_operand:SI 1 "nonmemory_operand" "% 0,Rcq, 0, 0, c, 0, 0, c, c,0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "Rcqq, 0, C0p, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] + "" + "* + switch (which_alternative) + { + case 0: case 3: case 6: case 7: case 9: case 10: case 11: + return \"or%? %0,%1,%2%&\"; + case 1: case 4: + return \"or%? %0,%2,%1%&\"; + case 2: case 5: case 8: + return \"bset%? %0,%1,%z2%&\"; + default: + gcc_unreachable (); + }" + [(set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,false,false,false") + (set_attr "length" "*,*,*,4,4,4,4,4,4,4,8,8") + (set_attr "predicable" "no,no,no,yes,yes,yes,no,no,no,no,yes,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w, w,w, w, w") + (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0, c, 0, 0, c, c,0, 0, c") + (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] + "" + "* + switch (which_alternative) + { + case 0: case 2: case 5: case 6: case 8: case 9: case 10: + return \"xor%? %0,%1,%2%&\"; + case 1: case 3: + return \"xor%? %0,%2,%1%&\"; + case 4: case 7: + return \"bxor%? %0,%1,%z2\"; + default: + gcc_unreachable (); + } + " + [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false,false,false,false") + (set_attr "type" "binary") + (set_attr "length" "*,*,4,4,4,4,4,4,4,8,8") + (set_attr "predicable" "no,no,yes,yes,yes,no,no,no,no,yes,no") + (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w") + (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))] + "" + "neg%? %0,%1%&" + [(set_attr "type" "unary") + (set_attr "iscompact" "maybe,true,false,false") + (set_attr "predicable" "no,no,yes,no")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") + (not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))] + "" + "not%? %0,%1%&" + [(set_attr "type" "unary,unary") + (set_attr "iscompact" "true,false")]) + +(define_insn_and_split "one_cmpldi2" + [(set (match_operand:DI 0 "dest_reg_operand" "=q,w") + (not:DI (match_operand:DI 1 "register_operand" "q,c")))] + "" + "#" + "&& reload_completed" + [(set (match_dup 2) (not:SI (match_dup 3))) + (set (match_dup 4) (not:SI (match_dup 5)))] +{ + int swap = (true_regnum (operands[0]) == true_regnum (operands[1]) + 1); + + operands[2] = operand_subword (operands[0], 0+swap, 0, DImode); + operands[3] = operand_subword (operands[1], 0+swap, 0, DImode); + operands[4] = operand_subword (operands[0], 1-swap, 0, DImode); + operands[5] = operand_subword (operands[1], 1-swap, 0, DImode); +} + [(set_attr "type" "unary,unary") + (set_attr "cond" "nocond,nocond") + (set_attr "length" "4,8")]) + +;; Shift instructions. + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (!TARGET_BARREL_SHIFTER) + { + emit_shift (ASHIFT, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (!TARGET_BARREL_SHIFTER) + { + emit_shift (ASHIFTRT, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + " +{ + if (!TARGET_BARREL_SHIFTER) + { + emit_shift (LSHIFTRT, operands[0], operands[1], operands[2]); + DONE; + } +}") + +(define_insn "shift_si3" + [(set (match_operand:SI 0 "dest_reg_operand" "=r") + (match_operator:SI 3 "shift4_operator" + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")])) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (reg:CC CC_REG)) + ] + "!TARGET_BARREL_SHIFTER" + "* return output_shift (operands);" + [(set_attr "type" "shift") + (set_attr "length" "16")]) + +(define_insn "shift_si3_loop" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rn,Cal")])) + (clobber (match_scratch:SI 4 "=X,X")) + (clobber (reg:SI LP_COUNT)) + (clobber (reg:SI LP_START)) + (clobber (reg:SI LP_END)) + (clobber (reg:CC CC_REG)) + ] + "!TARGET_BARREL_SHIFTER" + "* return output_shift (operands);" + [(set_attr "type" "shift") + (set_attr "length" "16,20")]) + +; asl, asr, lsr patterns: +; There is no point in including an 'I' alternative since only the lowest 5 +; bits are used for the shift. OTOH Cal can be useful if the shift amount +; is defined in an external symbol, as we don't have special relocations +; to truncate a symbol in a u6 immediate; but that's rather exotic, so only +; provide one alternatice for this, without condexec support. +(define_insn "*ashlsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") + (ashift:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") + (match_operand:SI 2 "nonmemory_operand" "K, K,RcqqM, cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER + && (register_operand (operands[1], SImode) + || register_operand (operands[2], SImode))" + "asl%? %0,%1,%2%&" + [(set_attr "type" "shift") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") + (set_attr "predicable" "no,no,no,yes,no,no") + (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) + +(define_insn "*ashrsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") + (ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") + (match_operand:SI 2 "nonmemory_operand" "K, K,RcqqM, cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER + && (register_operand (operands[1], SImode) + || register_operand (operands[2], SImode))" + "asr%? %0,%1,%2%&" + [(set_attr "type" "shift") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") + (set_attr "predicable" "no,no,no,yes,no,no") + (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) + +(define_insn "*lshrsi3_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") + (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") + (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM, cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER + && (register_operand (operands[1], SImode) + || register_operand (operands[2], SImode))" + "*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p () + ? \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");" + [(set_attr "type" "shift") + (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") + (set_attr "predicable" "no,no,no,yes,no,no") + (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") + (rotatert:SI (match_operand:SI 1 "register_operand" " 0,cL,cCal") + (match_operand:SI 2 "nonmemory_operand" "cL,cL,cCal")))] + "TARGET_BARREL_SHIFTER" + "ror%? %0,%1,%2" + [(set_attr "type" "shift,shift,shift") + (set_attr "predicable" "yes,no,no") + (set_attr "length" "4,4,8")]) + +;; Compare / branch instructions. + +(define_expand "cbranchsi4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 1 "nonmemory_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(reg CC_REG) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + gcc_assert (XEXP (operands[0], 0) == operands[1]); + gcc_assert (XEXP (operands[0], 1) == operands[2]); + operands[0] = gen_compare_reg (operands[0], VOIDmode); + emit_jump_insn (gen_branch_insn (operands[3], operands[0])); + DONE; +}) + +;; ??? Could add a peephole to generate compare with swapped operands and +;; modifed cc user if second, but not first operand is a compact register. +(define_insn "cmpsi_cc_insn_mixed" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 0 "register_operand" "Rcq#q,c,c, qRcq, c") + (match_operand:SI 1 "nonmemory_operand" "cO,cI,cL, Cal, Cal")))] + "" + "cmp%? %0,%B1%&" + [(set_attr "type" "compare") + (set_attr "iscompact" "true,false,false,true_limm,false") + (set_attr "predicable" "no,no,yes,no,yes") + (set_attr "cond" "set") + (set_attr "length" "*,4,4,*,8")]) + +(define_insn "*cmpsi_cc_zn_insn" + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_operand:SI 0 "register_operand" "qRcq,c") + (const_int 0)))] + "" + "tst%? %0,%0%&" + [(set_attr "type" "compare,compare") + (set_attr "iscompact" "true,false") + (set_attr "predicable" "no,yes") + (set_attr "cond" "set_zn") + (set_attr "length" "*,4")]) + +; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes. +(define_insn "*btst" + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN + (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c") + (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "L,Lc")) + (const_int 0)))] + "" + "btst%? %0,%1" + [(set_attr "iscompact" "true,false") + (set_attr "predicable" "no,yes") + (set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "*,4")]) + +; combine suffers from 'simplifications' that replace a one-bit zero +; extract with a shift if it can prove that the upper bits are zero. +; arc_reorg sees the code after sched2, which can have caused our +; inputs to be clobbered even if they were not clobbered before. +; Therefore, add a third way to convert btst / b{eq,ne} to bbit{0,1} +; OTOH, this is somewhat marginal, and can leat to out-of-range +; bbit (i.e. bad scheduling) and missed conditional execution, +; so make this an option. +(define_peephole2 + [(set (reg:CC_ZN CC_REG) + (compare:CC_ZN + (zero_extract:SI (match_operand:SI 0 "register_operand" "") + (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "")) + (const_int 0))) + (set (pc) + (if_then_else (match_operator 3 "equality_comparison_operator" + [(reg:CC_ZN CC_REG) (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_BBIT_PEEPHOLE && peep2_regno_dead_p (2, CC_REG)" + [(parallel [(set (pc) + (if_then_else + (match_op_dup 3 + [(zero_extract:SI (match_dup 0) + (const_int 1) (match_dup 1)) + (const_int 0)]) + (label_ref (match_dup 2)) + (pc))) + (clobber (reg:CC_ZN CC_REG))])]) + +(define_insn "*cmpsi_cc_z_insn" + [(set (reg:CC_Z CC_REG) + (compare:CC_Z (match_operand:SI 0 "register_operand" "qRcq,c") + (match_operand:SI 1 "p2_immediate_operand" "O,n")))] + "" + "@ + cmp%? %0,%1%& + bxor.f 0,%0,%z1" + [(set_attr "type" "compare,compare") + (set_attr "iscompact" "true,false") + (set_attr "cond" "set,set_zn") + (set_attr "length" "*,4")]) + +(define_insn "*cmpsi_cc_c_insn" + [(set (reg:CC_C CC_REG) + (compare:CC_C (match_operand:SI 0 "register_operand" "Rcqq, c,Rcqq, c") + (match_operand:SI 1 "nonmemory_operand" "cO, cI, Cal,Cal")))] + "" + "cmp%? %0,%S1%&" + [(set_attr "type" "compare") + (set_attr "iscompact" "true,false,true_limm,false") + (set_attr "cond" "set") + (set_attr "length" "*,4,*,8")]) + +;; Next come the scc insns. + +(define_expand "cstoresi4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SI 2 "nonmemory_operand" "") + (match_operand:SI 3 "nonmemory_operand" ""))) + (set (match_operand:SI 0 "dest_reg_operand" "") + (match_operator:SI 1 "ordered_comparison_operator" [(reg CC_REG) + (const_int 0)]))] + "" +{ + gcc_assert (XEXP (operands[1], 0) == operands[2]); + gcc_assert (XEXP (operands[1], 1) == operands[3]); + operands[1] = gen_compare_reg (operands[1], SImode); + emit_insn (gen_scc_insn (operands[0], operands[1])); + DONE; +}) + +(define_mode_iterator SDF [SF DF]) + +(define_expand "cstore4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SDF 2 "register_operand" "") + (match_operand:SDF 3 "register_operand" ""))) + (set (match_operand:SI 0 "dest_reg_operand" "") + (match_operator:SI 1 "comparison_operator" [(reg CC_REG) + (const_int 0)]))] + + "TARGET_OPTFPE" +{ + gcc_assert (XEXP (operands[1], 0) == operands[2]); + gcc_assert (XEXP (operands[1], 1) == operands[3]); + operands[1] = gen_compare_reg (operands[1], SImode); + emit_insn (gen_scc_insn (operands[0], operands[1])); + DONE; +}) + +; We need a separate expander for this lest we loose the mode of CC_REG +; when match_operator substitutes the literal operand into the comparison. +(define_expand "scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") (match_operand:SI 1 ""))]) + +(define_insn_and_split "*scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (match_operator:SI 1 "proper_comparison_operator" [(reg CC_REG) (const_int 0)]))] + "" + "#" + "reload_completed" + [(set (match_dup 0) (const_int 1)) + (cond_exec + (match_dup 1) + (set (match_dup 0) (const_int 0)))] +{ + operands[1] + = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[1]), + GET_MODE (XEXP (operands[1], 0))), + VOIDmode, + XEXP (operands[1], 0), XEXP (operands[1], 1)); +} + [(set_attr "type" "unary")]) + +;; ??? At least for ARC600, we should use sbc b,b,s12 if we want a value +;; that is one lower if the carry flag is set. + +;; ??? Look up negscc insn. See pa.md for example. +(define_insn "*neg_scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (neg:SI (match_operator:SI 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)])))] + "" + "mov %0,-1\;sub.%D1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +(define_insn "*not_scc_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=w") + (not:SI (match_operator:SI 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)])))] + "" + "mov %0,1\;sub.%d1 %0,%0,%0" + [(set_attr "type" "unary") + (set_attr "length" "8")]) + +; cond_exec patterns +(define_insn "*movsi_ne" + [(cond_exec + (ne (match_operand:CC_Z 2 "cc_use_register" "Rcc,Rcc,Rcc") (const_int 0)) + (set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w") + (match_operand:SI 1 "nonmemory_operand" "C_0,Lc,?Cal")))] + "" + "@ + * current_insn_predicate = 0; return \"sub%?.ne %0,%0,%0%&\"; + mov.ne %0,%1 + mov.ne %0,%S1" + [(set_attr "type" "cmove,cmove,cmove") + (set_attr "iscompact" "true,false,false") + (set_attr "length" "2,4,8")]) + +(define_insn "*movsi_cond_exec" + [(cond_exec + (match_operator 3 "proper_comparison_operator" + [(match_operand 2 "cc_register" "Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (match_operand:SI 1 "nonmemory_operand" "Lc,?Cal")))] + "" + "mov.%d3 %0,%S1" + [(set_attr "type" "cmove") + (set_attr "length" "4,8")]) + +(define_insn "*commutative_cond_exec" + [(cond_exec + (match_operator 5 "proper_comparison_operator" + [(match_operand 4 "cc_register" "Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (match_operator:SI 3 "commutative_operator" + [(match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "nonmemory_operand" "cL,?Cal")])))] + "" +{ + arc_output_commutative_cond_exec (operands, true); + return ""; +} + [(set_attr "cond" "use") + (set_attr "type" "cmove") + (set_attr_alternative "length" + [(const_int 4) + (cond + [(eq (symbol_ref "arc_output_commutative_cond_exec (operands, false)") + (const_int 4)) + (const_int 4)] + (const_int 8))])]) + +(define_insn "*sub_cond_exec" + [(cond_exec + (match_operator 4 "proper_comparison_operator" + [(match_operand 3 "cc_register" "Rcc,Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w,w") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,cL,Cal") + (match_operand:SI 2 "nonmemory_operand" "cL,0,0"))))] + "" + "@ + sub.%d4 %0,%1,%2 + rsub.%d4 %0,%2,%1 + rsub.%d4 %0,%2,%1" + [(set_attr "cond" "use") + (set_attr "type" "cmove") + (set_attr "length" "4,4,8")]) + +(define_insn "*noncommutative_cond_exec" + [(cond_exec + (match_operator 5 "proper_comparison_operator" + [(match_operand 4 "cc_register" "Rcc,Rcc") (const_int 0)]) + (set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (match_operator:SI 3 "noncommutative_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "cL,Cal")])))] + "" + "%O3.%d5 %0,%1,%2" + [(set_attr "cond" "use") + (set_attr "type" "cmove") + (set_attr "length" "4,8")]) + +;; These control RTL generation for conditional jump insns +;; Match both normal and inverted jump. + +; We need a separate expander for this lest we loose the mode of CC_REG +; when match_operator substitutes the literal operand into the comparison. +(define_expand "branch_insn" + [(set (pc) + (if_then_else (match_operand 1 "" "") + (label_ref (match_operand 0 "" "")) + (pc)))]) + +; When estimating sizes during arc_reorg, when optimizing for speed, there +; are three reasons why we need to consider branches to be length 6: +; - annull-false delay slot insns are implemented using conditional execution, +; thus preventing short insn formation where used. +; - for ARC600: annull-true delay slot isnns are implemented where possile +; using conditional execution, preventing short insn formation where used. +; - for ARC700: likely or somewhat likely taken branches are made long and +; unaligned if possible to avoid branch penalty. +(define_insn "*branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "* +{ + if (arc_ccfsm_branch_deleted_p ()) + { + arc_ccfsm_record_branch_deleted (); + return \"; branch deleted, next insns conditionalized\"; + } + else + { + arc_ccfsm_record_condition (operands[1], false, insn, 0); + if (get_attr_length (insn) == 2) + return \"b%d1%? %^%l0%&\"; + else + return \"b%d1%# %^%l0\"; + } +}" + [(set_attr "type" "branch") + (set + (attr "length") + (cond [ + (eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (ne + (if_then_else + (match_operand 1 "equality_comparison_operator" "") + (ior (lt (minus (match_dup 0) (pc)) (const_int -512)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 506) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (ior (match_test "!arc_short_comparison_p (operands[1], -64)") + (lt (minus (match_dup 0) (pc)) (const_int -64)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 58) + (symbol_ref "get_attr_delay_slot_length (insn)"))))) + (const_int 0)) + (const_int 4)] + (const_int 2))) + + (set (attr "iscompact") + (cond [(match_test "get_attr_length (insn) == 2") (const_string "true")] + (const_string "false")))]) + +(define_insn "*rev_branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "REVERSIBLE_CC_MODE (GET_MODE (XEXP (operands[1], 0)))" + "* +{ + if (arc_ccfsm_branch_deleted_p ()) + { + arc_ccfsm_record_branch_deleted (); + return \"; branch deleted, next insns conditionalized\"; + } + else + { + arc_ccfsm_record_condition (operands[1], true, insn, 0); + if (get_attr_length (insn) == 2) + return \"b%D1%? %^%l0\"; + else + return \"b%D1%# %^%l0\"; + } +}" + [(set_attr "type" "branch") + (set + (attr "length") + (cond [ + (eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (ne + (if_then_else + (match_operand 1 "equality_comparison_operator" "") + (ior (lt (minus (match_dup 0) (pc)) (const_int -512)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 506) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (ior (match_test "!arc_short_comparison_p (operands[1], -64)") + (lt (minus (match_dup 0) (pc)) (const_int -64)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 58) + (symbol_ref "get_attr_delay_slot_length (insn)"))))) + (const_int 0)) + (const_int 4)] + (const_int 2))) + + (set (attr "iscompact") + (cond [(match_test "get_attr_length (insn) == 2") (const_string "true")] + (const_string "false")))]) + +;; Unconditional and other jump instructions. + +(define_expand "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "") + +(define_insn "jump_i" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "!TARGET_LONG_CALLS_SET || !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "b%!%* %^%l0%&" + [(set_attr "type" "uncond_branch") + (set (attr "iscompact") + (if_then_else (match_test "get_attr_length (insn) == 2") + (const_string "true") (const_string "false"))) + (set_attr "cond" "canuse") + (set (attr "length") + (cond [ + ; In arc_reorg we just guesstimate; might be more or less than 4. + (match_test "arc_branch_size_unknown_p ()") + (const_int 4) + + (eq_attr "delay_slot_filled" "yes") + (const_int 4) + + (match_test "find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)") + (const_int 4) + + (ior (lt (minus (match_dup 0) (pc)) (const_int -512)) + (gt (minus (match_dup 0) (pc)) + (minus (const_int 506) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4)] + (const_int 2)))]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))] + "" + "j%!%* [%0]%&" + [(set_attr "type" "jump") + (set_attr "iscompact" "false,false,false,maybe,false") + (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse")]) + +;; Implement a switch statement. + +(define_expand "casesi" + [(set (match_dup 5) + (minus:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "nonmemory_operand" ""))) + (set (reg:CC CC_REG) + (compare:CC (match_dup 5) + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else (gtu (reg:CC CC_REG) + (const_int 0)) + (label_ref (match_operand 4 "" "")) + (pc))) + (set (match_dup 6) + (unspec:SI [(match_operand 3 "" "") + (match_dup 5) (match_dup 7)] UNSPEC_CASESI)) + (parallel [(set (pc) (match_dup 6)) (use (match_dup 7))])] + "" + " +{ + rtx x; + + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = operands[3]; + emit_insn (gen_subsi3 (operands[5], operands[0], operands[1])); + emit_insn (gen_cmpsi_cc_insn_mixed (operands[5], operands[2])); + x = gen_rtx_GTU (VOIDmode, gen_rtx_REG (CCmode, CC_REG), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[4]), pc_rtx); + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x)); + if (TARGET_COMPACT_CASESI) + { + emit_jump_insn (gen_casesi_compact_jump (operands[5], operands[7])); + } + else + { + operands[3] = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + if (flag_pic || !cse_not_expected) + operands[3] = force_reg (Pmode, operands[3]); + emit_insn (gen_casesi_load (operands[6], + operands[3], operands[5], operands[7])); + if (CASE_VECTOR_PC_RELATIVE || flag_pic) + emit_insn (gen_addsi3 (operands[6], operands[6], operands[3])); + emit_jump_insn (gen_casesi_jump (operands[6], operands[7])); + } + DONE; +}") + +(define_insn "casesi_load" + [(set (match_operand:SI 0 "register_operand" "=Rcq,r,r") + (unspec:SI [(match_operand:SI 1 "nonmemory_operand" "Rcq,c,Cal") + (match_operand:SI 2 "register_operand" "Rcq,c,c") + (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))] + "" + "* +{ + rtx diff_vec = PATTERN (next_nonnote_insn (operands[3])); + + if (GET_CODE (diff_vec) != ADDR_DIFF_VEC) + { + gcc_assert (GET_CODE (diff_vec) == ADDR_VEC); + gcc_assert (GET_MODE (diff_vec) == SImode); + gcc_assert (!CASE_VECTOR_PC_RELATIVE && !flag_pic); + } + + switch (GET_MODE (diff_vec)) + { + case SImode: + return \"ld.as %0,[%1,%2]%&\"; + case HImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"ldw.as %0,[%1,%2]\"; + return \"ldw.x.as %0,[%1,%2]\"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return \"ldb%? %0,[%1,%2]%&\"; + return \"ldb.x %0,[%1,%2]\"; + default: + gcc_unreachable (); + } +}" + [(set_attr "type" "load") + (set_attr_alternative "iscompact" + [(cond + [(ne (symbol_ref "GET_MODE (PATTERN (next_nonnote_insn (operands[3])))") + (symbol_ref "QImode")) + (const_string "false") + (match_test "!ADDR_DIFF_VEC_FLAGS (PATTERN (next_nonnote_insn (operands[3]))).offset_unsigned") + (const_string "false")] + (const_string "true")) + (const_string "false") + (const_string "false")]) + (set_attr_alternative "length" + [(cond + [(eq_attr "iscompact" "false") (const_int 4) + ; We have to mention (match_dup 3) to convince genattrtab.c that this + ; is a varying length insn. + (eq (symbol_ref "1+1") (const_int 2)) (const_int 2) + (gt (minus (match_dup 3) (pc)) (const_int 42)) (const_int 4)] + (const_int 2)) + (const_int 4) + (const_int 8)])]) + +; Unlike the canonical tablejump, this pattern always uses a jump address, +; even for CASE_VECTOR_PC_RELATIVE. +(define_insn "casesi_jump" + [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c")) + (use (label_ref (match_operand 1 "" "")))] + "" + "j%!%* [%0]%&" + [(set_attr "type" "jump") + (set_attr "iscompact" "false,maybe,false") + (set_attr "cond" "canuse")]) + +(define_insn "casesi_compact_jump" + [(set (pc) + (unspec:SI [(match_operand:SI 0 "register_operand" "c,q")] + UNSPEC_CASESI)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_scratch:SI 2 "=q,0"))] + "TARGET_COMPACT_CASESI" + "* +{ + rtx diff_vec = PATTERN (next_nonnote_insn (operands[1])); + int unalign = arc_get_unalign (); + rtx xop[3]; + const char *s; + + xop[0] = operands[0]; + xop[2] = operands[2]; + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + /* Max length can be 12 in this case, but this is OK because + 2 of these are for alignment, and are anticipated in the length + of the ADDR_DIFF_VEC. */ + if (unalign && !satisfies_constraint_Rcq (xop[0])) + s = \"add2 %2,pcl,%0\n\tld_s%2,[%2,12]\"; + else if (unalign) + s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\"; + else + s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\"; + arc_clear_unalign (); + break; + case HImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + { + if (satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %2,%0,%1\n\tldw.as %2,[pcl,%2]\"; + xop[1] = GEN_INT ((10 - unalign) / 2U); + } + else + { + s = \"add1 %2,pcl,%0\n\tldw_s %2,[%2,%1]\"; + xop[1] = GEN_INT (10 + unalign); + } + } + else + { + if (satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %2,%0,%1\n\tldw.x.as %2,[pcl,%2]\"; + xop[1] = GEN_INT ((10 - unalign) / 2U); + } + else + { + s = \"add1 %2,pcl,%0\n\tldw_s.x %2,[%2,%1]\"; + xop[1] = GEN_INT (10 + unalign); + } + } + arc_toggle_unalign (); + break; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + { + if ((rtx_equal_p (xop[2], xop[0]) + || find_reg_note (insn, REG_DEAD, xop[0])) + && satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %0,%0,pcl\n\tldb_s %2,[%0,%1]\"; + xop[1] = GEN_INT (8 + unalign); + } + else + { + s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\"; + xop[1] = GEN_INT (10 + unalign); + arc_toggle_unalign (); + } + } + else if ((rtx_equal_p (xop[0], xop[2]) + || find_reg_note (insn, REG_DEAD, xop[0])) + && satisfies_constraint_Rcq (xop[0])) + { + s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\"; + xop[1] = GEN_INT (10 - unalign); + arc_toggle_unalign (); + } + else + { + /* ??? Length is 12. */ + s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\"; + xop[1] = GEN_INT (8 + unalign); + } + break; + default: + gcc_unreachable (); + } + output_asm_insn (s, xop); + return \"add_s %2,%2,pcl\n\tj_s%* [%2]\"; +}" + [(set_attr "length" "10") + (set_attr "type" "jump") + (set_attr "iscompact" "true") + (set_attr "cond" "nocond")]) + +(define_expand "call" + ;; operands[1] is stack_size_rtx + ;; operands[2] is next_arg_register + [(parallel [(call (match_operand:SI 0 "call_operand" "") + (match_operand 1 "" "")) + (clobber (reg:SI 31))])] + "" + "{ + rtx callee; + + gcc_assert (MEM_P (operands[0])); + callee = XEXP (operands[0], 0); + if (crtl->profile && arc_profile_call (callee)) + { + emit_call_insn (gen_call_prof (gen_rtx_SYMBOL_REF (Pmode, + \"_mcount_call\"), + operands[1])); + DONE; + } + /* This is to decide if we should generate indirect calls by loading the + 32 bit address of the callee into a register before performing the + branch and link - this exposes cse opportunities. + Also, in weird cases like compile/20010107-1.c, we may get a PLUS. */ + if (GET_CODE (callee) != REG + && (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee))) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + } +") + + +; Rcq, which is used in alternative 0, checks for conditional execution. +; At instruction output time, if it doesn't match and we end up with +; alternative 1 ("q"), that means that we can't use the short form. +(define_insn "*call_i" + [(call (mem:SI (match_operand:SI 0 + "call_address_operand" "Rcq,q,c,Cbp,Cbr,L,I,Cal")) + (match_operand 1 "" "")) + (clobber (reg:SI 31))] + "" + "@ + jl%!%* [%0]%& + jl%!%* [%0]%& + jl%!%* [%0] + bl%!%* %P0 + bl%!%* %P0 + jl%!%* %S0 + jl%* %S0 + jl%! %S0" + [(set_attr "type" "call,call,call,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,false,*,*,*,*,*,*") + (set_attr "predicable" "no,no,yes,yes,no,yes,no,yes") + (set_attr "length" "*,*,4,4,4,4,4,8")]) + +(define_insn "call_prof" + [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "Cbr,Cal")) + (match_operand 1 "" "")) + (clobber (reg:SI 31)) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + bl%!%* %P0;2 + jl%! %^%S0" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes,yes") + (set_attr "length" "4,8")]) + +(define_expand "call_value" + ;; operand 2 is stack_size_rtx + ;; operand 3 is next_arg_register + [(parallel [(set (match_operand 0 "dest_reg_operand" "=r") + (call (match_operand:SI 1 "call_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI 31))])] + "" + " + { + rtx callee; + + gcc_assert (MEM_P (operands[1])); + callee = XEXP (operands[1], 0); + if (crtl->profile && arc_profile_call (callee)) + { + emit_call_insn (gen_call_value_prof (operands[0], + gen_rtx_SYMBOL_REF (Pmode, + \"_mcount_call\"), + operands[2])); + DONE; + } + /* See the comment in define_expand \"call\". */ + if (GET_CODE (callee) != REG + && (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee))) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + }") + + +; Rcq, which is used in alternative 0, checks for conditional execution. +; At instruction output time, if it doesn't match and we end up with +; alternative 1 ("q"), that means that we can't use the short form. +(define_insn "*call_value_i" + [(set (match_operand 0 "dest_reg_operand" "=Rcq,q,w, w, w,w,w, w") + (call (mem:SI (match_operand:SI 1 + "call_address_operand" "Rcq,q,c,Cbp,Cbr,L,I,Cal")) + (match_operand 2 "" ""))) + (clobber (reg:SI 31))] + "" + "@ + jl%!%* [%1]%& + jl%!%* [%1]%& + jl%!%* [%1] + bl%!%* %P1;1 + bl%!%* %P1;1 + jl%!%* %S1 + jl%* %S1 + jl%! %S1" + [(set_attr "type" "call,call,call,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,false,*,*,*,*,*,*") + (set_attr "predicable" "no,no,yes,yes,no,yes,no,yes") + (set_attr "length" "*,*,4,4,4,4,4,8")]) + +; There is a bl_s instruction (16 bit opcode branch-and-link), but we can't +; use it for lack of inter-procedural branch shortening. +; Link-time relaxation would help... + + +(define_insn "call_value_prof" + [(set (match_operand 0 "dest_reg_operand" "=r,r") + (call (mem:SI (match_operand:SI 1 "symbolic_operand" "Cbr,Cal")) + (match_operand 2 "" ""))) + (clobber (reg:SI 31)) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + bl%!%* %P1;1 + jl%! %^%S1" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes,yes") + (set_attr "length" "4,8")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop%?" + [(set_attr "type" "misc") + (set_attr "iscompact" "true") + (set_attr "cond" "canuse") + (set_attr "length" "2")]) + +;; Special pattern to flush the icache. +;; ??? Not sure what to do here. Some ARC's are known to support this. + +(define_insn "flush_icache" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 0)] + "" + "* return \"\";" + [(set_attr "type" "misc")]) + +;; Split up troublesome insns for better scheduling. + +;; Peepholes go at the end. +;;asl followed by add can be replaced by an add{1,2,3} +;; Three define_peepholes have been added for this optimization +;; ??? This used to target non-canonical rtl. Now we use add_n, which +;; can be generated by combine. Check if these peepholes still provide +;; any benefit. + +;; ------------------------------------------------------------- +;; Pattern 1 : r0 = r1 << {i} +;; r3 = r4/INT + r0 ;;and commutative +;; || +;; \/ +;; add{i} r3,r4/INT,r1 +;; ------------------------------------------------------------- +;; ??? This should be covered by combine, alas, at times combine gets +;; too clever for it's own good: when the shifted input is known to be +;; either 0 or 1, the operation will be made into an if-then-else, and +;; thus fail to match the add_n pattern. Example: _mktm_r, line 85 in +;; newlib/libc/time/mktm_r.c . + +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 3 "dest_reg_operand" "") + (plus:SI (match_operand:SI 4 "nonmemory_operand" "") + (match_operand:SI 5 "nonmemory_operand" "")))] + "(INTVAL (operands[2]) == 1 + || INTVAL (operands[2]) == 2 + || INTVAL (operands[2]) == 3) + && (true_regnum (operands[4]) == true_regnum (operands[0]) + || true_regnum (operands[5]) == true_regnum (operands[0])) + && (peep2_reg_dead_p (2, operands[0]) || (true_regnum (operands[3]) == true_regnum (operands[0])))" + ;; the preparation statements take care to put proper operand in operands[4] + ;; operands[4] will always contain the correct operand. This is added to satisfy commutativity + [(set (match_dup 3) + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 4)))] + "if (true_regnum (operands[4]) == true_regnum (operands[0])) + operands[4] = operands[5]; + operands[2] = GEN_INT (1 << INTVAL (operands[2]));" +) + +;; ------------------------------------------------------------- +;; Pattern 1 : r0 = r1 << {i} +;; r3 = r4 - r0 +;; || +;; \/ +;; sub{i} r3,r4,r1 +;; ------------------------------------------------------------- +;; ??? This should be covered by combine, alas, at times combine gets +;; too clever for it's own good: when the shifted input is known to be +;; either 0 or 1, the operation will be made into an if-then-else, and +;; thus fail to match the sub_n pattern. Example: __ieee754_yn, line 239 in +;; newlib/libm/math/e_jn.c . + +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 3 "dest_reg_operand" "") + (minus:SI (match_operand:SI 4 "nonmemory_operand" "") + (match_dup 0)))] + "(INTVAL (operands[2]) == 1 + || INTVAL (operands[2]) == 2 + || INTVAL (operands[2]) == 3) + && (peep2_reg_dead_p (2, operands[0]) + || (true_regnum (operands[3]) == true_regnum (operands[0])))" + [(set (match_dup 3) + (minus:SI (match_dup 4) + (mult:SI (match_dup 1) + (match_dup 2))))] + "operands[2] = GEN_INT (1 << INTVAL (operands[2]));" +) + + + +; When using the high single bit, the result of a multiply is either +; the original number or zero. But MPY costs 4 cycles, which we +; can replace with the 2 cycles for the pair of TST_S and ADD.NE. +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (const_int 31))) + (set (match_operand:SI 4 "register_operand" "") + (mult:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonmemory_operand" "")))] + "TARGET_ARC700 && !TARGET_NOMPY_SET + && (rtx_equal_p (operands[0], operands[2]) + || rtx_equal_p (operands[0], operands[3])) + && peep2_regno_dead_p (0, CC_REG) + && (rtx_equal_p (operands[0], operands[4]) + || (peep2_reg_dead_p (2, operands[0]) + && peep2_reg_dead_p (1, operands[4])))" + [(parallel [(set (reg:CC_Z CC_REG) + (compare:CC_Z (lshiftrt:SI (match_dup 1) (const_int 31)) + (const_int 0))) + (set (match_dup 4) (lshiftrt:SI (match_dup 1) (const_int 31)))]) + (cond_exec + (ne (reg:CC_Z CC_REG) (const_int 0)) + (set (match_dup 4) (match_dup 5)))] +{ + if (!rtx_equal_p (operands[0], operands[2])) + operands[5] = operands[2]; + else if (!rtx_equal_p (operands[0], operands[3])) + operands[5] = operands[3]; + else + operands[5] = operands[4]; /* Actually a no-op... presumably rare. */ +}) + +(define_peephole2 + [(set (match_operand:SI 0 "dest_reg_operand" "") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (const_int 31))) + (set (match_operand:SI 4 "register_operand" "") + (mult:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "nonmemory_operand" "")))] + "TARGET_ARC700 && !TARGET_NOMPY_SET + && (rtx_equal_p (operands[0], operands[2]) + || rtx_equal_p (operands[0], operands[3])) + && peep2_regno_dead_p (2, CC_REG)" + [(parallel [(set (reg:CC_Z CC_REG) + (compare:CC_Z (lshiftrt:SI (match_dup 1) (const_int 31)) + (const_int 0))) + (set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))]) + (set (match_dup 4) (match_dup 5)) + (cond_exec + (eq (reg:CC_Z CC_REG) (const_int 0)) + (set (match_dup 4) (const_int 0)))] + "operands[5] = operands[rtx_equal_p (operands[0], operands[2]) ? 3 : 2];") + +;; Instructions generated through builtins + +(define_insn "clrsbsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal")))] + "TARGET_NORM" + "@ + norm \t%0, %1 + norm \t%0, %S1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core,two_cycle_core")]) + +(define_insn "norm_f" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (clrsb:SI (match_operand:SI 1 "general_operand" "cL,Cal"))) + (set (reg:CC_ZN CC_REG) + (compare:CC_ZN (match_dup 1) (const_int 0)))] + "TARGET_NORM" + "@ + norm.f\t%0, %1 + norm.f\t%0, %S1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core,two_cycle_core")]) + +(define_insn_and_split "clrsbhi2" + [(set (match_operand:HI 0 "dest_reg_operand" "=w,w") + (clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal")))] + "TARGET_NORM" + "#" + "reload_completed" + [(set (match_dup 0) (zero_extend:SI (clrsb:HI (match_dup 1))))] + "operands[0] = simplify_gen_subreg (SImode, operands[0], HImode, 0);") + +(define_insn "normw" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") + (zero_extend:SI + (clrsb:HI (match_operand:HI 1 "general_operand" "cL,Cal"))))] + "TARGET_NORM" + "@ + normw \t%0, %1 + normw \t%0, %S1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core,two_cycle_core")]) + +(define_expand "clzsi2" + [(set (match_operand:SI 0 "dest_reg_operand" "") + (clz:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_NORM" +{ + emit_insn (gen_norm_f (operands[0], operands[1])); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LT (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_GE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], + plus_constant (SImode, operands[0], 1)))); + DONE; +}) + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "") + (ctz:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_NORM" +{ + rtx temp = operands[0]; + + if (reg_overlap_mentioned_p (temp, operands[1]) + || (REGNO (temp) < FIRST_PSEUDO_REGISTER + && !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], + REGNO (temp)))) + temp = gen_reg_rtx (SImode); + emit_insn (gen_addsi3 (temp, operands[1], constm1_rtx)); + emit_insn (gen_bic_f_zn (temp, temp, operands[1])); + emit_insn (gen_clrsbsi2 (temp, temp)); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_LT (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], GEN_INT (32)))); + emit_insn + (gen_rtx_COND_EXEC + (VOIDmode, + gen_rtx_GE (VOIDmode, gen_rtx_REG (CC_ZNmode, CC_REG), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_MINUS (SImode, GEN_INT (31), temp)))); + DONE; +}) + + +(define_insn "swap" + [(set (match_operand:SI 0 "dest_reg_operand" "=w,w,w") + (unspec:SI [(match_operand:SI 1 "general_operand" "L,Cal,c")] + UNSPEC_SWAP))] + "TARGET_SWAP" + "@ + swap \t%0, %1 + swap \t%0, %S1 + swap \t%0, %1" + [(set_attr "length" "4,8,4") + (set_attr "type" "two_cycle_core,two_cycle_core,two_cycle_core")]) + +;; FIXME: an intrinsic for multiply is daft. Can we remove this? +(define_insn "mul64" + [(unspec [(match_operand:SI 0 "general_operand" "q,r,r,%r") + (match_operand:SI 1 "general_operand" "q,rL,I,Cal")] + UNSPEC_MUL64)] + "TARGET_MUL64_SET" + "@ + mul64%? \t0, %0, %1%& + mul64%? \t0, %0, %1 + mul64 \t0, %0, %1 + mul64%? \t0, %0, %S1" + [(set_attr "length" "2,4,4,8") + (set_attr "iscompact" "true,false,false,false") + (set_attr "type" "binary,binary,binary,binary") + (set_attr "cond" "canuse,canuse, nocond, canuse")]) + +(define_insn "mulu64" + [(unspec [(match_operand:SI 0 "general_operand" "%r,r,r,r") + (match_operand:SI 1 "general_operand" "rL,I,r,Cal")] + UNSPEC_MULU64)] + "TARGET_MUL64_SET" + "@ + mulu64%? \t0, %0, %1 + mulu64 \t0, %0, %1 + mulu64 \t0, %0, %1 + mulu64%? \t0, %0, %S1" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "binary,binary,binary,binary") + (set_attr "cond" "canuse,nocond,nocond,canuse")]) + +(define_insn "divaw" + [(set (match_operand:SI 0 "dest_reg_operand" "=&w,&w,&w") + (unspec:SI [(div:SI (match_operand:SI 1 "general_operand" "r,Cal,r") + (match_operand:SI 2 "general_operand" "r,r,Cal"))] + UNSPEC_DIVAW))] + "TARGET_ARC700 || TARGET_EA_SET" + "@ + divaw \t%0, %1, %2 + divaw \t%0, %S1, %2 + divaw \t%0, %1, %S2" + [(set_attr "length" "4,8,8") + (set_attr "type" "divaw,divaw,divaw")]) + +(define_insn "flag" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "rL,I,Cal")] + VUNSPEC_FLAG)] + "" + "@ + flag%? %0 + flag %0 + flag%? %S0" + [(set_attr "length" "4,4,8") + (set_attr "type" "misc,misc,misc") + (set_attr "predicable" "yes,no,yes") + (set_attr "cond" "clob,clob,clob")]) + +(define_insn "brk" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_BRK)] + "" + "brk" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +(define_insn "rtie" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_RTIE)] + "" + "rtie" + [(set_attr "length" "4") + (set_attr "type" "misc") + (set_attr "cond" "clob")]) + +(define_insn "sync" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_SYNC)] + "" + "sync" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +(define_insn "swi" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_SWI)] + "" + "* +{ + if(TARGET_ARC700) + return \"trap0\"; + else + return \"swi\"; +}" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + + +(define_insn "sleep" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L")] + VUNSPEC_SLEEP)] + "check_if_valid_sleep_operand(operands,0)" + "sleep %0" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +(define_insn "core_read" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (unspec_volatile:SI [(match_operand:SI 1 "general_operand" "Hn,!r")] + VUNSPEC_CORE_READ))] + "" + "* + if (check_if_valid_regno_const (operands, 1)) + return \"mov \t%0, r%1\"; + return \"mov \t%0, r%1\"; + " + [(set_attr "length" "4") + (set_attr "type" "unary")]) + +(define_insn "core_write" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "r,r") + (match_operand:SI 1 "general_operand" "Hn,!r")] + VUNSPEC_CORE_WRITE)] + "" + "* + if (check_if_valid_regno_const (operands, 1)) + return \"mov \tr%1, %0\"; + return \"mov \tr%1, %0\"; + " + [(set_attr "length" "4") + (set_attr "type" "unary")]) + +(define_insn "lr" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r,r") + (unspec_volatile:SI [(match_operand:SI 1 "general_operand" "I,HCal,r,D")] + VUNSPEC_LR))] + "" + "lr\t%0, [%1]" + [(set_attr "length" "4,8,4,8") + (set_attr "type" "lr,lr,lr,lr")]) + +(define_insn "sr" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "Cal,r,r,r") + (match_operand:SI 1 "general_operand" "Ir,I,HCal,r")] + VUNSPEC_SR)] + "" + "sr\t%S0, [%1]" + [(set_attr "length" "8,4,8,4") + (set_attr "type" "sr,sr,sr,sr")]) + +(define_insn "trap_s" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L,Cal")] + VUNSPEC_TRAP_S)] + "TARGET_ARC700" +{ + if (which_alternative == 0) + { + arc_toggle_unalign (); + return \"trap_s %0\"; + } + + /* Keep this message in sync with the one in arc.c:arc_expand_builtin, + because *.md files do not get scanned by exgettext. */ + fatal_error (\"operand to trap_s should be an unsigned 6-bit value\"); +} + [(set_attr "length" "2") + (set_attr "type" "misc")]) + +(define_insn "unimp_s" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")] + VUNSPEC_UNIMP_S)] + "TARGET_ARC700" + "unimp_s" + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +;; End of instructions generated through builtins + +; Since the demise of REG_N_SETS as reliable data readily available to the +; target, it is no longer possible to find out +; in the prologue / epilogue expanders how many times blink is set. +; Using df_regs_ever_live_p to decide if blink needs saving means that +; any explicit use of blink will cause it to be saved; hence we cannot +; represent the blink use in return / sibcall instructions themselves, and +; instead have to show it in EPILOGUE_USES and must explicitly +; forbid instructions that change blink in the return / sibcall delay slot. +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (simple_return) + (use (match_operand 2 "" ""))])] + "" + " + { + rtx callee = XEXP (operands[0], 0); + + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + if (crtl->profile && arc_profile_call (callee)) + { + emit_insn (gen_sibcall_prof + (gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"), + operands[1], operands[2])); + DONE; + } + if (GET_CODE (callee) != REG + && (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee))) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + }" +) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "dest_reg_operand" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (simple_return) + (use (match_operand 3 "" ""))])] + "" + " + { + rtx callee = XEXP (operands[1], 0); + + if (operands[3] == NULL_RTX) + operands[3] = const0_rtx; + if (crtl->profile && arc_profile_call (XEXP (operands[1], 0))) + { + emit_insn (gen_sibcall_value_prof + (operands[0], gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"), + operands[2], operands[3])); + DONE; + } + if (GET_CODE (callee) != REG && arc_is_longcall_p (callee)) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + }" +) + +(define_insn "*sibcall_insn" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" + "Cbp,Cbr,Rs5,Rsc,Cal")) + (match_operand 1 "" "")) + (simple_return) + (use (match_operand 2 "" ""))] + "" + "@ + b%!%* %P0 + b%!%* %P0 + j%!%* [%0]%& + j%!%* [%0] + j%! %P0" + [(set_attr "type" "call,call,call,call,call_no_delay_slot") + (set_attr "predicable" "yes,no,no,yes,yes") + (set_attr "iscompact" "false,false,maybe,false,false") + (set_attr "is_SIBCALL" "yes")] +) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "dest_reg_operand" "") + (call (mem:SI (match_operand:SI 1 "call_address_operand" + "Cbp,Cbr,Rs5,Rsc,Cal")) + (match_operand 2 "" ""))) + (simple_return) + (use (match_operand 3 "" ""))] + "" + "@ + b%!%* %P1 + b%!%* %P1 + j%!%* [%1]%& + j%!%* [%1] + j%! %P1" + [(set_attr "type" "call,call,call,call,call_no_delay_slot") + (set_attr "predicable" "yes,no,no,yes,yes") + (set_attr "iscompact" "false,false,maybe,false,false") + (set_attr "is_SIBCALL" "yes")] +) + +(define_insn "sibcall_prof" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Cbr,Cal")) + (match_operand 1 "" "")) + (simple_return) + (use (match_operand 2 "" "")) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + b%!%* %P0;2 + j%! %^%S0;2" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes") + (set_attr "is_SIBCALL" "yes")] +) + +(define_insn "sibcall_value_prof" + [(set (match_operand 0 "dest_reg_operand" "") + (call (mem:SI (match_operand:SI 1 "call_address_operand" "Cbr,Cal")) + (match_operand 2 "" ""))) + (simple_return) + (use (match_operand 3 "" "")) + (use (reg:SI 8)) + (use (reg:SI 9))] + "" + "@ + b%!%* %P1;1 + j%! %^%S1;1" + [(set_attr "type" "call,call_no_delay_slot") + (set_attr "predicable" "yes") + (set_attr "is_SIBCALL" "yes")] +) + +(define_expand "prologue" + [(pc)] + "" +{ + arc_expand_prologue (); + DONE; +}) + +(define_expand "epilogue" + [(pc)] + "" +{ + arc_expand_epilogue (0); + DONE; +}) + +(define_expand "sibcall_epilogue" + [(pc)] + "" +{ + arc_expand_epilogue (1); + DONE; +}) + +; Since the demise of REG_N_SETS, it is no longer possible to find out +; in the prologue / epilogue expanders how many times blink is set. +; Using df_regs_ever_live_p to decide if blink needs saving means that +; any explicit use of blink will cause it to be saved; hence we cannot +; represent the blink use in return / sibcall instructions themselves, and +; instead have to show it in EPILOGUE_USES and must explicitly +; forbid instructions that change blink in the return / sibcall delay slot. +(define_insn "simple_return" + [(simple_return)] + "reload_completed" +{ + rtx reg + = gen_rtx_REG (Pmode, + arc_return_address_regs[arc_compute_function_type (cfun)]); + + if (TARGET_PAD_RETURN) + arc_pad_return (); + output_asm_insn (\"j%!%* [%0]%&\", ®); + return \"\"; +} + [(set_attr "type" "return") + ; predicable won't help here since the canonical rtl looks different + ; for branches. + (set_attr "cond" "canuse") + (set (attr "iscompact") + (cond [(eq (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_string "maybe")] + (const_string "false"))) + (set (attr "length") + (cond [(ne (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_int 4)] + (const_int 2)))]) + +(define_insn "p_return_i" + [(set (pc) + (if_then_else (match_operator 0 "proper_comparison_operator" + [(reg CC_REG) (const_int 0)]) + (simple_return) (pc)))] + "reload_completed" +{ + rtx xop[2]; + xop[0] = operands[0]; + xop[1] + = gen_rtx_REG (Pmode, + arc_return_address_regs[arc_compute_function_type (cfun)]); + + if (TARGET_PAD_RETURN) + arc_pad_return (); + output_asm_insn (\"j%d0%!%# [%1]%&\", xop); + /* record the condition in case there is a delay insn. */ + arc_ccfsm_record_condition (xop[0], false, insn, 0); + return \"\"; +} + [(set_attr "type" "return") + (set_attr "cond" "use") + (set (attr "iscompact") + (cond [(eq (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_string "maybe")] + (const_string "false"))) + (set (attr "length") + (cond [(ne (symbol_ref "arc_compute_function_type (cfun)") + (symbol_ref "ARC_FUNCTION_NORMAL")) + (const_int 4) + (not (match_operand 0 "equality_comparison_operator" "")) + (const_int 4) + (eq_attr "delay_slot_filled" "yes") + (const_int 4)] + (const_int 2)))]) + +(define_insn_and_split "eh_return" + [(eh_return) + (use (match_operand:SI 0 "move_src_operand" "rC32,mCalCpc")) + (clobber (match_scratch:SI 1 "=X,r")) + (clobber (match_scratch:SI 2 "=&r,r"))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 0))] +{ + int offs = arc_return_slot_offset (); + + if (offs < 0) + operands[2] = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); + else + { + if (!register_operand (operands[0], Pmode) + && !satisfies_constraint_C32 (operands[0])) + { + emit_move_insn (operands[1], operands[0]); + operands[0] = operands[1]; + } + rtx addr = plus_constant (Pmode, stack_pointer_rtx, offs); + if (!strict_memory_address_p (Pmode, addr)) + { + emit_move_insn (operands[2], addr); + addr = operands[2]; + } + operands[2] = gen_frame_mem (Pmode, addr); + } +} + [(set_attr "length" "12")]) + +;; ??? #ifdefs in function.c require the presence of this pattern, with a +;; non-constant predicate. +(define_expand "return" + [(return)] + "optimize < 0") + + ;; Comment in final.c (insn_current_reference_address) says + ;; forward branch addresses are calculated from the next insn after branch + ;; and for backward branches, it is calculated from the branch insn start. + ;; The shortening logic here is tuned to accomodate this behaviour +;; ??? This should be grokked by the ccfsm machinery. +(define_insn "cbranchsi4_scratch" + [(set (pc) + (if_then_else (match_operator 0 "proper_comparison_operator" + [(match_operand:SI 1 "register_operand" "c,c, c") + (match_operand:SI 2 "nonmemory_operand" "L,c,?Cal")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_operand 4 "cc_register" ""))] + "(reload_completed + || (TARGET_EARLY_CBRANCHSI + && brcc_nolimm_operator (operands[0], VOIDmode))) + && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "* + switch (get_attr_length (insn)) + { + case 2: return \"br%d0%? %1, %2, %^%l3%&\"; + case 4: return \"br%d0%* %1, %B2, %^%l3\"; + case 8: if (!brcc_nolimm_operator (operands[0], VOIDmode)) + return \"br%d0%* %1, %B2, %^%l3\"; + case 6: case 10: + case 12:return \"cmp%? %1, %B2\\n\\tb%d0%* %^%l3%&;br%d0 out of range\"; + default: fprintf (stderr, \"unexpected length %d\\n\", get_attr_length (insn)); fflush (stderr); gcc_unreachable (); + } + " + [(set_attr "cond" "clob, clob, clob") + (set (attr "type") + (if_then_else + (match_test "valid_brcc_with_delay_p (operands)") + (const_string "brcc") + (const_string "brcc_no_delay_slot"))) + ; For forward branches, we need to account not only for the distance to + ; the target, but also the difference between pcl and pc, the instruction + ; length, and any delay insn, if present. + (set + (attr "length") + (cond ; the outer cond does a test independent of branch shortening. + [(match_operand 0 "brcc_nolimm_operator" "") + (cond + [(and (match_operand:CC_Z 4 "cc_register") + (eq_attr "delay_slot_filled" "no") + (ge (minus (match_dup 3) (pc)) (const_int -128)) + (le (minus (match_dup 3) (pc)) + (minus (const_int 122) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 2) + (and (ge (minus (match_dup 3) (pc)) (const_int -256)) + (le (minus (match_dup 3) (pc)) + (minus (const_int 244) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4) + (match_operand:SI 1 "compact_register_operand" "") + (const_int 6)] + (const_int 8))] + (cond [(and (ge (minus (match_dup 3) (pc)) (const_int -256)) + (le (minus (match_dup 3) (pc)) (const_int 244))) + (const_int 8) + (match_operand:SI 1 "compact_register_operand" "") + (const_int 10)] + (const_int 12)))) + (set (attr "iscompact") + (if_then_else (match_test "get_attr_length (insn) & 2") + (const_string "true") (const_string "false")))]) + +; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes. +(define_insn "*bbit" + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + (const_int 1) + (match_operand:SI 2 "nonmemory_operand" "L,Lc")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC_ZN CC_REG))] + "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" +{ + switch (get_attr_length (insn)) + { + case 4: return (GET_CODE (operands[3]) == EQ + ? \"bbit0%* %1,%2,%0\" : \"bbit1%* %1,%2,%0\"); + case 6: + case 8: return \"btst%? %1,%2\n\tb%d3%* %0; bbit out of range\"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "brcc") + (set_attr "cond" "clob") + (set (attr "length") + (cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254)) + (le (minus (match_dup 0) (pc)) + (minus (const_int 248) + (symbol_ref "get_attr_delay_slot_length (insn)")))) + (const_int 4) + (eq (symbol_ref "which_alternative") (const_int 0)) + (const_int 6)] + (const_int 8))) + (set (attr "iscompact") + (if_then_else (match_test "get_attr_length (insn) == 6") + (const_string "true") (const_string "false")))]) + +; ??? When testing a bit from a DImode register, combine creates a +; zero_extract in DImode. This goes via an AND with a DImode constant, +; so can only be observed on 64 bit hosts. +(define_insn_and_split "*bbit_di" + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(zero_extract:DI (match_operand:SI 1 "register_operand" "Rcqq,c") + (const_int 1) + (match_operand 2 "immediate_operand" "L,L")) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc))) + (clobber (reg:CC_ZN CC_REG))] + "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" + "#" + "" + [(parallel + [(set (pc) (if_then_else (match_dup 3) (label_ref (match_dup 0)) (pc))) + (clobber (reg:CC_ZN CC_REG))])] +{ + rtx xtr; + + xtr = gen_rtx_ZERO_EXTRACT (SImode, operands[1], const1_rtx, operands[2]); + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + xtr, const0_rtx); +}) + +; operand 0 is the loop count pseudo register +; operand 1 is the loop end pattern +(define_expand "doloop_begin" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand 1 "" ""))] + "" +{ + /* Using the INSN_UID of the loop end pattern to identify it causes + trouble with -fcompare-debug, so allocate a debug-independent + id instead. We use negative numbers so that we can use the same + slot in doloop_end_i where we later store a CODE_LABEL_NUMBER, and + still be able to tell what kind of number this is. */ + static HOST_WIDE_INT loop_end_id = 0; + + rtx id = GEN_INT (--loop_end_id); + XEXP (XVECEXP (PATTERN (operands[1]), 0, 4), 0) = id; + emit_insn (gen_doloop_begin_i (operands[0], const0_rtx, id, + const0_rtx, const0_rtx)); + DONE; +}) + +; ??? can't describe the insn properly as then the optimizers try to +; hoist the SETs. +;(define_insn "doloop_begin_i" +; [(set (reg:SI LP_START) (pc)) +; (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_LP)) +; (use (match_operand 0 "const_int_operand" "n"))] +; "" +; "lp .L__GCC__LP%0" +;) + +; The operands of doloop_end_i are also read / written by arc_reorg with +; XVECEXP (PATTERN (lp, 0, N), so if you want to change the pattern, you +; might have to adjust arc_reorg. +; operands 0 / 2 are supplied by the expander, 1, 3 and 4 are filled in +; by arc_reorg. arc_reorg might also alter operand 0. +; +; N in XVECEXP PATTERN (lp, 0 N) +; V rtl purpose +; 0 unspec UNSPEC_LP identify pattern +; 1 clobber LP_START show LP_START is set +; 2 clobber LP_END show LP_END is set +; 3 use operand0 loop count pseudo register +; 4 use operand1 before arc_reorg: -id +; after : CODE_LABEL_NUMBER of loop top label +; 5 use operand2 INSN_UID of loop end insn +; 6 use operand3 loop setup not at start (1 above, 2 below) +; 7 use operand4 LABEL_REF of top label, if not +; immediately following +; If operand1 is still zero after arc_reorg, this is an orphaned loop +; instruction that was not at the start of the loop. +; There is no point is reloading this insn - then lp_count would still not +; be available for the loop end. +(define_insn "doloop_begin_i" + [(unspec:SI [(pc)] UNSPEC_LP) + (clobber (reg:SI LP_START)) + (clobber (reg:SI LP_END)) + (use (match_operand:SI 0 "register_operand" "l,l,????*X")) + (use (match_operand 1 "const_int_operand" "n,n,C_0")) + (use (match_operand 2 "const_int_operand" "n,n,X")) + (use (match_operand 3 "const_int_operand" "C_0,n,X")) + (use (match_operand 4 "const_int_operand" "C_0,X,X"))] + "" +{ + rtx scan; + int len, size = 0; + int n_insns = 0; + rtx loop_start = operands[4]; + + if (CONST_INT_P (loop_start)) + loop_start = NULL_RTX; + /* Size implications of the alignment will be taken care of by the + alignment inserted at the loop start. */ + if (LOOP_ALIGN (0) && INTVAL (operands[1])) + { + asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0)); + arc_clear_unalign (); + } + if (!INTVAL (operands[1])) + return "; LITTLE LOST LOOP"; + if (loop_start && flag_pic) + { + /* ??? Can do better for when a scratch register + is known. But that would require extra testing. */ + return "push_s r0\;add r0,pcl,%4-(.&-4)\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-(.&-4)\;sr r0,[3]; LP_END\;pop_s r0"; + } + /* Check if the loop end is in range to be set by the lp instruction. */ + size = INTVAL (operands[3]) < 2 ? 0 : 2048; + for (scan = insn; scan && size < 2048; scan = NEXT_INSN (scan)) + { + if (!INSN_P (scan)) + continue; + if (recog_memoized (scan) == CODE_FOR_doloop_end_i + && (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0) + == XEXP (XVECEXP (PATTERN (insn), 0, 4), 0))) + break; + len = get_attr_length (scan); + size += len; + } + /* Try to verify that there are at least three instruction fetches + between the loop setup and the first encounter of the loop end. */ + for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan)) + { + if (!INSN_P (scan)) + continue; + if (GET_CODE (PATTERN (scan)) == SEQUENCE) + scan = XVECEXP (PATTERN (scan), 0, 0); + if (JUMP_P (scan)) + { + if (recog_memoized (scan) != CODE_FOR_doloop_end_i) + { + n_insns += 2; + if (simplejump_p (scan)) + { + scan = XEXP (SET_SRC (PATTERN (scan)), 0); + continue; + } + if (JUMP_LABEL (scan) + /* JUMP_LABEL might be simple_return instead if an insn. */ + && (!INSN_P (JUMP_LABEL (scan)) + || (!next_active_insn (JUMP_LABEL (scan)) + || (recog_memoized (next_active_insn (JUMP_LABEL (scan))) + != CODE_FOR_doloop_begin_i))) + && (!next_active_insn (NEXT_INSN (PREV_INSN (scan))) + || (recog_memoized + (next_active_insn (NEXT_INSN (PREV_INSN (scan)))) + != CODE_FOR_doloop_begin_i))) + n_insns++; + } + break; + } + len = get_attr_length (scan); + /* Size estimation of asms assumes that each line which is nonempty + codes an insn, and that each has a long immediate. For minimum insn + count, assume merely that a nonempty asm has at least one insn. */ + if (GET_CODE (PATTERN (scan)) == ASM_INPUT + || asm_noperands (PATTERN (scan)) >= 0) + n_insns += (len != 0); + else + n_insns += (len > 4 ? 2 : (len ? 1 : 0)); + } + if (LOOP_ALIGN (0)) + { + asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0)); + arc_clear_unalign (); + } + gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL); + if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start) + { + if (flag_pic) + { + /* ??? Can do better for when a scratch register + is known. But that would require extra testing. */ + arc_clear_unalign (); + return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0"; + } + output_asm_insn ((size < 2048 + ? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"), + operands); + output_asm_insn (loop_start + ? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START", + operands); + if (TARGET_ARC600 && n_insns < 1) + output_asm_insn ("nop", operands); + return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:"; + } + else if (TARGET_ARC600 && n_insns < 3) + { + /* At least four instructions are needed between the setting of LP_COUNT + and the loop end - but the lp instruction qualifies as one. */ + rtx prev = prev_nonnote_insn (insn); + + if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT)) + output_asm_insn ("nop", operands); + } + return "lp .L__GCC__LP%1"; +} + [(set_attr "type" "loop_setup") + (set_attr_alternative "length" +; FIXME: length is usually 4, but we need branch shortening +; to get this right. +; [(if_then_else (match_test "TARGET_ARC600") (const_int 16) (const_int 4)) + [(if_then_else (match_test "flag_pic") (const_int 24) (const_int 16)) + (if_then_else (match_test "flag_pic") (const_int 28) (const_int 16)) + (const_int 0)])] + ;; ??? we should really branch shorten this insn, but then we'd + ;; need a proper label first. N.B. the end label can not only go out + ;; of range when it is far away, but also when it precedes the loop - + ;; which, unfortunately, it sometimes does, when the loop "optimizer" + ;; messes things up. +) + +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +; Use this for the ARC600 and ARC700. For ARCtangent-A5, this is unsafe +; without further checking for nearby branches etc., and without proper +; annotation of shift patterns that clobber lp_count +; ??? ARC600 might want to check if the loop has few iteration and only a +; single insn - loop setup is expensive then. +(define_expand "doloop_end" + [(use (match_operand 0 "register_operand" "")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_ARC600 || TARGET_ARC700" +{ + /* We could do smaller bivs with biv widening, and wider bivs by having + a high-word counter in an outer loop - but punt on this for now. */ + if (GET_MODE (operands[0]) != SImode) + FAIL; + emit_jump_insn (gen_doloop_end_i (operands[0], operands[1], const0_rtx)); + DONE; +}) + +(define_insn_and_split "doloop_end_i" + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) + (use (reg:SI LP_START)) + (use (reg:SI LP_END)) + (use (match_operand 2 "const_int_operand" "n,???Cn0,???X")) + (clobber (match_scratch:SI 3 "=X,X,&????r"))] + "" + "* +{ + rtx prev = prev_nonnote_insn (insn); + + /* If there is an immediately preceding label, we must output a nop, + lest a branch to that label will fall out of the loop. + ??? We could try to avoid this by claiming to have a delay slot if there + is a preceding label, and outputting the delay slot insn instead, if + present. + Or we could have some optimization that changes the source edge to update + the loop count and jump to the loop start instead. */ + /* For ARC600, we must also prevent jumps inside the loop and jumps where + the loop counter value is live at the target from being directly at the + loop end. Being sure that the loop counter is dead at the target is + too much hair - we can't rely on data flow information at this point - + so insert a nop for all branches. + The ARC600 also can't read the loop counter in the last insn of a loop. */ + if (LABEL_P (prev)) + output_asm_insn (\"nop%?\", operands); + return \"\\n.L__GCC__LP%2: ; loop end, start is %1\"; +}" + "&& memory_operand (operands[0], SImode)" + [(pc)] +{ + emit_move_insn (operands[3], operands[0]); + emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0])); + DONE; +} + [(set_attr "type" "loop_end") + (set (attr "length") + (if_then_else (match_test "LABEL_P (prev_nonnote_insn (insn))") + (const_int 4) (const_int 0)))] +) + +; This pattern is generated by arc_reorg when there is no recognizable +; loop start. +(define_insn "*doloop_fallback" + [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))] + ; avoid fooling the loop optimizer into assuming this is a special insn. + "reload_completed" + "*return get_attr_length (insn) == 8 + ? \"brne.d %0,1,%1\;sub %0,%0,1\" + : \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";" + [(set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256)) + (le (minus (match_dup 1) (pc)) (const_int 244))) + (const_int 8) (const_int 12))) + (set_attr "type" "brcc_no_delay_slot") + (set_attr "cond" "nocond")] +) + +; reload can't make output reloads for jump insns, so we have to do this by hand. +(define_insn "doloop_fallback_m" + [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))) + (set (match_operand:SI 2 "memory_operand" "=m") + (plus:SI (match_dup 0) (const_int -1)))] + ; avoid fooling the loop optimizer into assuming this is a special insn. + "reload_completed" + "*return get_attr_length (insn) == 12 + ? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\" + : \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";" + [(set (attr "length") + (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252)) + (le (minus (match_dup 1) (pc)) (const_int 244))) + (const_int 12) (const_int 16))) + (set_attr "type" "brcc_no_delay_slot") + (set_attr "cond" "nocond")] +) + +(define_expand "movmemsi" + [(match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "") + (match_operand:SI 2 "nonmemory_operand" "") + (match_operand 3 "immediate_operand" "")] + "" + "if (arc_expand_movmem (operands)) DONE; else FAIL;") + +;; Close http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35803 if this works +;; to the point that we can generate cmove instructions. +(define_expand "cbranch4" + [(set (reg:CC CC_REG) + (compare:CC (match_operand:SDF 1 "register_operand" "") + (match_operand:SDF 2 "register_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "comparison_operator" [(reg CC_REG) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + + "TARGET_OPTFPE" +{ + gcc_assert (XEXP (operands[0], 0) == operands[1]); + gcc_assert (XEXP (operands[0], 1) == operands[2]); + operands[0] = gen_compare_reg (operands[0], VOIDmode); + emit_jump_insn (gen_branch_insn (operands[3], operands[0])); + DONE; +}) + +(define_expand "cmp_float" + [(parallel [(set (match_operand 0 "") (match_operand 1 "")) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R12_REG))])] + "" + "") + +(define_mode_iterator OPTFPE_CMP [CC_Z CC_FP_GT CC_FP_GE CC_FP_UNEQ CC_FP_ORD]) +(define_mode_attr cmp [(CC_Z "eq") (CC_FP_GT "gt") (CC_FP_GE "ge") + (CC_FP_UNEQ "uneq") (CC_FP_ORD "ord")]) + +(define_insn "*cmpsf_" + [(set (reg:OPTFPE_CMP CC_REG) (compare:OPTFPE_CMP (reg:SF 0) (reg:SF 1))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R12_REG))] + "TARGET_OPTFPE && (!TARGET_ARGONAUT_SET || !TARGET_SPFP) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__sf2\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +;; N.B. for "*cmpdf_ord": +;; double precision fpx sets bit 31 for NaNs. We need bit 51 set +;; for the floating point emulation to recognize the NaN. +(define_insn "*cmpdf_" + [(set (reg:OPTFPE_CMP CC_REG) (compare:OPTFPE_CMP (reg:DF 0) (reg:DF 2))) + (clobber (reg:SI RETURN_ADDR_REGNUM)) + (clobber (reg:SI R12_REG))] + "TARGET_OPTFPE && (!TARGET_ARGONAUT_SET || !TARGET_DPFP) + && SFUNC_CHECK_PREDICABLE" + "*return arc_output_libcall (\"__df2\");" + [(set_attr "is_sfunc" "yes") + (set_attr "predicable" "yes")]) + +(define_insn "abssf2" + [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,Rcw,w") + (abs:SF (match_operand:SF 1 "register_operand" "0, 0,c")))] + "" + "bclr%? %0,%1,31%&" + [(set_attr "type" "unary") + (set_attr "iscompact" "maybe,false,false") + (set_attr "length" "2,4,4") + (set_attr "predicable" "no,yes,no")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w") + (neg:SF (match_operand:SF 1 "register_operand" "0,c")))] + "" + "bxor%? %0,%1,31" + [(set_attr "type" "unary") + (set_attr "predicable" "yes,no")]) + +;; ??? Should this use arc_output_libcall and set is_sfunc? +(define_insn "*millicode_thunk_st" + [(match_parallel 0 "millicode_store_operation" + [(set (mem:SI (reg:SI SP_REG)) (reg:SI 13))])] + "" +{ + output_asm_insn ("bl%* __st_r13_to_%0", + &SET_SRC (XVECEXP (operands[0], 0, + XVECLEN (operands[0], 0) - 2))); + return ""; +} + [(set_attr "type" "call")]) + +(define_insn "*millicode_thunk_ld" + [(match_parallel 0 "millicode_load_clob_operation" + [(set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])] + "" +{ + output_asm_insn ("bl%* __ld_r13_to_%0", + &SET_DEST (XVECEXP (operands[0], 0, + XVECLEN (operands[0], 0) - 2))); + return ""; +} + [(set_attr "type" "call")]) + +; the sibthunk restores blink, so we use the return rtx. +(define_insn "*millicode_sibthunk_ld" + [(match_parallel 0 "millicode_load_operation" + [(return) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (reg:SI 12))) + (set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])] + "" +{ + output_asm_insn ("b%* __ld_r13_to_%0_ret", + &SET_DEST (XVECEXP (operands[0], 0, + XVECLEN (operands[0], 0) - 1))); + return ""; +} + [(set_attr "type" "call") + (set_attr "is_SIBCALL" "yes")]) + +;; If hardware floating point is available, don't define a negdf pattern; +;; it would be something like: +;;(define_insn "negdf2" +;; [(set (match_operand:DF 0 "register_operand" "=w,w,D,?r") +;; (neg:DF (match_operand:DF 1 "register_operand" "0,c,D,D"))) +;; (clobber (match_scratch:DF 2 "=X,X,X,X,D1"))] +;; "" +;; "@ +;; bxor%? %H0,%H1,31 +;; bxor %H0,%H1,31 ` mov %L0,%L1 +;; drsubh%F0%F1 0,0,0 +;; drsubh%F2%F1 %H0,0,0 ` dexcl%F2 %L0,%H0,%L0" +;; [(set_attr "type" "unary,unary,dpfp_addsub,dpfp_addsub") +;; (set_attr "iscompact" "false,false,false,false") +;; (set_attr "length" "4,4,8,12") +;; (set_attr "cond" "canuse,nocond,nocond,nocond")]) +;; and this suffers from always requiring a long immediate when using +;; the floating point hardware. +;; We then want the sub[sd]f patterns to be used, so that we can load the +;; constant zero efficiently into a register when we want to do the +;; computation using the floating point hardware. There should be a special +;; subdf alternative that matches a zero operand 1, which then can allow +;; to use bxor to flip the high bit of an integer register. +;; ??? we actually can't use the floating point hardware for neg, because +;; this would not work right for -0. OTOH optabs.c has already code +;; to synthesyze negate by flipping the sign bit. + + +;; include the arc-FPX instructions +(include "fpx.md") + +(include "simdext.md") diff --git a/gcc-4.9/gcc/config/arc/arc.opt b/gcc-4.9/gcc/config/arc/arc.opt new file mode 100644 index 000000000..2deb9e77e --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc.opt @@ -0,0 +1,390 @@ +; Options for the Synopsys DesignWare ARC port of the compiler +; +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/arc/arc-opts.h + +mbig-endian +Target Report RejectNegative Mask(BIG_ENDIAN) +Compile code for big endian mode + +mlittle-endian +Target Report RejectNegative InverseMask(BIG_ENDIAN) +Compile code for little endian mode. This is the default + +mno-cond-exec +Target Report RejectNegative Mask(NO_COND_EXEC) +Disable ARCompact specific pass to generate conditional execution instructions + +mA5 +Target Report +Generate ARCompact 32-bit code for ARCtangent-A5 processor + +mA6 +Target Report +Generate ARCompact 32-bit code for ARC600 processor + +mARC600 +Target Report +Same as -mA6 + +mARC601 +Target Report +Generate ARCompact 32-bit code for ARC601 processor + +mA7 +Target Report +Generate ARCompact 32-bit code for ARC700 processor + +mARC700 +Target Report +Same as -mA7 + +mmixed-code +Target Report Mask(MIXED_CODE_SET) +Tweak register allocation to help 16-bit instruction generation +; originally this was: +;Generate ARCompact 16-bit instructions intermixed with 32-bit instructions for ARCtangent-A5 and higher processors +; but we do that without -mmixed-code, too, it's just a different instruction +; count / size tradeoff. + +; We use an explict definition for the negative form because that is the +; actually interesting option, and we want that to have its own comment. +mvolatile-cache +Target Report RejectNegative Mask(VOLATILE_CACHE_SET) +Use ordinarily cached memory accesses for volatile references + +mno-volatile-cache +Target Report RejectNegative InverseMask(VOLATILE_CACHE_SET) +Enable cache bypass for volatile references + +mbarrel-shifter +Target Report Mask(BARREL_SHIFTER) +Generate instructions supported by barrel shifter + +mnorm +Target Report Mask(NORM_SET) +Generate norm instruction + +mswap +Target Report Mask(SWAP_SET) +Generate swap instruction + +mmul64 +Target Report Mask(MUL64_SET) +Generate mul64 and mulu64 instructions + +mno-mpy +Target Report Mask(NOMPY_SET) +Do not generate mpy instructions for ARC700 + +mea +Target Report Mask(EA_SET) +Generate Extended arithmetic instructions. Currently only divaw, adds, subs and sat16 are supported + +msoft-float +Target Report Mask(0) +Dummy flag. This is the default unless FPX switches are provided explicitly + +mlong-calls +Target Report Mask(LONG_CALLS_SET) +Generate call insns as register indirect calls + +mno-brcc +Target Report Mask(NO_BRCC_SET) +Do no generate BRcc instructions in arc_reorg. + +msdata +Target Report InverseMask(NO_SDATA_SET) +Generate sdata references. This is the default, unless you compile for PIC. + +mno-millicode +Target Report Mask(NO_MILLICODE_THUNK_SET) +Do not generate millicode thunks (needed only with -Os) + +mspfp +Target Report Mask(SPFP_COMPACT_SET) +FPX: Generate Single Precision FPX (compact) instructions. + +mspfp-compact +Target Report Mask(SPFP_COMPACT_SET) MaskExists +FPX: Generate Single Precision FPX (compact) instructions. + +mspfp-fast +Target Report Mask(SPFP_FAST_SET) +FPX: Generate Single Precision FPX (fast) instructions. + +margonaut +Target Report Mask(ARGONAUT_SET) +FPX: Enable Argonaut ARC CPU Double Precision Floating Point extensions. + +mdpfp +Target Report Mask(DPFP_COMPACT_SET) +FPX: Generate Double Precision FPX (compact) instructions. + +mdpfp-compact +Target Report Mask(DPFP_COMPACT_SET) MaskExists +FPX: Generate Double Precision FPX (compact) instructions. + +mdpfp-fast +Target Report Mask(DPFP_FAST_SET) +FPX: Generate Double Precision FPX (fast) instructions. + +mno-dpfp-lrsr +Target Report Mask(DPFP_DISABLE_LRSR) +Disable LR and SR instructions from using FPX extension aux registers. + +msimd +Target Report Mask(SIMD_SET) +Enable generation of ARC SIMD instructions via target-specific builtins. + +mcpu= +Target RejectNegative Joined Var(arc_cpu) Enum(processor_type) Init(PROCESSOR_NONE) +-mcpu=CPU Compile code for ARC variant CPU + +Enum +Name(processor_type) Type(enum processor_type) + +EnumValue +Enum(processor_type) String(A5) Value(PROCESSOR_A5) + +EnumValue +Enum(processor_type) String(ARC600) Value(PROCESSOR_ARC600) + +EnumValue +Enum(processor_type) String(ARC601) Value(PROCESSOR_ARC601) + +EnumValue +Enum(processor_type) String(ARC700) Value(PROCESSOR_ARC700) + +msize-level= +Target RejectNegative Joined UInteger Var(arc_size_opt_level) Init(-1) +size optimization level: 0:none 1:opportunistic 2: regalloc 3:drop align, -Os + +misize +Target Report PchIgnore Var(TARGET_DUMPISIZE) +Annotate assembler instructions with estimated addresses + +mmultcost= +Target RejectNegative Joined UInteger Var(arc_multcost) Init(-1) +Cost to assume for a multiply instruction, with 4 being equal to a normal insn. + +mtune=ARC600 +Target RejectNegative Var(arc_tune, TUNE_ARC600) +Tune for ARC600 cpu. + +mtune=ARC601 +Target RejectNegative Var(arc_tune, TUNE_ARC600) +Tune for ARC601 cpu. + +mtune=ARC700 +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_STD) +Tune for ARC700 R4.2 Cpu with standard multiplier block. + +mtune=ARC700-xmac +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC) +Tune for ARC700 R4.2 Cpu with XMAC block. + +mtune=ARC725D +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC) +Tune for ARC700 R4.2 Cpu with XMAC block. + +mtune=ARC750D +Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC) +Tune for ARC700 R4.2 Cpu with XMAC block. + +mindexed-loads +Target Var(TARGET_INDEXED_LOADS) +Enable the use of indexed loads + +mauto-modify-reg +Target Var(TARGET_AUTO_MODIFY_REG) +Enable the use of pre/post modify with register displacement. + +mmul32x16 +Target Report Mask(MULMAC_32BY16_SET) +Generate 32x16 multiply and mac instructions + +; the initializer is supposed to be: Init(REG_BR_PROB_BASE/2) , +; alas, basic-block.h is not included in options.c . +munalign-prob-threshold= +Target RejectNegative Joined UInteger Var(arc_unalign_prob_threshold) Init(10000/2) +Set probability threshold for unaligning branches + +mmedium-calls +Target Var(TARGET_MEDIUM_CALLS) Init(TARGET_MMEDIUM_CALLS_DEFAULT) +Don't use less than 25 bit addressing range for calls. + +mannotate-align +Target Var(TARGET_ANNOTATE_ALIGN) +Explain what alignment considerations lead to the decision to make an insn short or long. + +malign-call +Target Var(TARGET_ALIGN_CALL) +Do alignment optimizations for call instructions. + +mRcq +Target Var(TARGET_Rcq) +Enable Rcq constraint handling - most short code generation depends on this. + +mRcw +Target Var(TARGET_Rcw) +Enable Rcw constraint handling - ccfsm condexec mostly depends on this. + +mearly-cbranchsi +Target Var(TARGET_EARLY_CBRANCHSI) +Enable pre-reload use of cbranchsi pattern + +mbbit-peephole +Target Var(TARGET_BBIT_PEEPHOLE) +Enable bbit peephole2 + +mcase-vector-pcrel +Target Var(TARGET_CASE_VECTOR_PC_RELATIVE) +Use pc-relative switch case tables - this enables case table shortening. + +mcompact-casesi +Target Var(TARGET_COMPACT_CASESI) +Enable compact casesi pattern + +mq-class +Target Var(TARGET_Q_CLASS) +Enable 'q' instruction alternatives. + +mexpand-adddi +Target Var(TARGET_EXPAND_ADDDI) +Expand adddi3 and subdi3 at rtl generation time into add.f / adc etc. + + +; Flags used by the assembler, but for which we define preprocessor +; macro symbols as well. +mcrc +Target Report +Enable variable polynomial CRC extension + +mdsp-packa +Target Report +Enable DSP 3.1 Pack A extensions + +mdvbf +Target Report +Enable dual viterbi butterfly extension + +mmac-d16 +Target Report Undocumented + +mmac-24 +Target Report Undocumented + +mtelephony +Target Report RejectNegative +Enable Dual and Single Operand Instructions for Telephony + +mxy +Target Report +Enable XY Memory extension (DSP version 3) + +; ARC700 4.10 extension instructions +mlock +Target Report +Enable Locked Load/Store Conditional extension + +mswape +Target Report +Enable swap byte ordering extension instruction + +mrtsc +Target Report +Enable 64-bit Time-Stamp Counter extension instruction + +mno-epilogue-cfi +Target Report RejectNegative InverseMask(EPILOGUE_CFI) +Disable generation of cfi for epilogues. + +mepilogue-cfi +Target RejectNegative Mask(EPILOGUE_CFI) +Enable generation of cfi for epilogues. + +EB +Target +Pass -EB option through to linker. + +EL +Target +Pass -EL option through to linker. + +marclinux +target +Pass -marclinux option through to linker. + +marclinux_prof +target +Pass -marclinux_prof option through to linker. + +;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra. +;Target InverseMask(NO_LRA) +mlra +; lra still won't allow to configure libgcc; see PR rtl-optimization/55464. +; so don't enable by default. +Target Mask(LRA) +Enable lra + +mlra-priority-none +Target RejectNegative Var(arc_lra_priority_tag, ARC_LRA_PRIORITY_NONE) +Don't indicate any priority with TARGET_REGISTER_PRIORITY + +mlra-priority-compact +Target RejectNegative Var(arc_lra_prioritytag, ARC_LRA_PRIORITY_COMPACT) +Indicate priority for r0..r3 / r12..r15 with TARGET_REGISTER_PRIORITY + +mlra-priority-noncompact +Target RejectNegative Var(arc_lra_prioritytag, ARC_LRA_PRIORITY_NONCOMPACT) +Reduce priority for r0..r3 / r12..r15 with TARGET_REGISTER_PRIORITY + +mucb-mcount +Target Report Var(TARGET_UCB_MCOUNT) +instrument with mcount calls as in the ucb code + +; backward-compatibility aliases, translated by DRIVER_SELF_SPECS + +mEA +Target + +multcost= +Target RejectNegative Joined + +; Unfortunately, listing the full option name gives us clashes +; with OPT_opt_name being claimed for both opt_name and opt-name, +; so we leave out the last character or more. +mbarrel_shifte +Target Joined + +mspfp_ +Target Joined + +mdpfp_ +Target Joined + +mdsp_pack +Target Joined + +mmac_ +Target Joined + diff --git a/gcc-4.9/gcc/config/arc/arc600.md b/gcc-4.9/gcc/config/arc/arc600.md new file mode 100644 index 000000000..8255e244d --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc600.md @@ -0,0 +1,63 @@ +;; DFA scheduling description of the Synopsys DesignWare ARC600 cpu +;; for GNU C compiler +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Contributor: Joern Rennecke +;; on behalf of Synopsys Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "ARC600") + +(define_cpu_unit "issue_600" "ARC600") +(define_cpu_unit "mul64_600" "ARC600") + +; latency from flag-setting insns to branches is 3. +(define_insn_reservation "compare_600" 3 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "compare")) + "issue_600") + +(define_insn_reservation "load_DI_600" 4 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "load") + (match_operand:DI 0 "" "")) + "issue_600") + +(define_insn_reservation "load_600" 3 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "load") + (not (match_operand:DI 0 "" ""))) + "issue_600") + +(define_insn_reservation "mul_600_fast" 3 + (and (eq_attr "tune" "arc600") + (match_test "arc_multcost < COSTS_N_INSNS (7)") + (eq_attr "type" "multi,umulti")) + "mul64_600*3") + +(define_insn_reservation "mul_600_slow" 8 + (and (eq_attr "tune" "arc600") + (match_test "arc_multcost >= COSTS_N_INSNS (7)") + (eq_attr "type" "multi,umulti")) + "mul64_600*8") + +(define_insn_reservation "mul_mac_600" 3 + (and (eq_attr "tune" "arc600") + (eq_attr "type" "mulmac_600")) + "nothing*3") + +(define_bypass 1 "mul_mac_600" "mul_mac_600") diff --git a/gcc-4.9/gcc/config/arc/arc700.md b/gcc-4.9/gcc/config/arc/arc700.md new file mode 100644 index 000000000..8e80b4f7c --- /dev/null +++ b/gcc-4.9/gcc/config/arc/arc700.md @@ -0,0 +1,170 @@ +;; DFA scheduling description of the Synopsys DesignWare ARC700 cpu +;; for GNU C compiler +;; Comments and Support For ARC700 instructions added by +;; Saurabh Verma (saurabh.verma@codito.com) +;; Ramana Radhakrishnan(ramana.radhakrishnan@codito.com) +;; Factoring out and improvement of ARC700 Scheduling by +;; Joern Rennecke (joern.rennecke@embecosm.com) +;; Copyright (C) 2006-2014 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "ARC700") + +;; aux to be added here +(define_cpu_unit "core, dmp, write_port, dmp_write_port, multiplier, issue, blockage, simd_unit" "ARC700") + +(define_insn_reservation "core_insn_DI" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "unary, move, cmove, binary") + (match_operand:DI 0 "" "")) + "issue+core, issue+core+write_port, write_port") + +(define_insn_reservation "lr" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "lr")) + "issue+blockage, blockage*2, write_port") + +(define_insn_reservation "sr" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "sr")) + "issue+dmp_write_port+blockage, blockage*9") + +(define_insn_reservation "core_insn" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "unary, move, binary")) + "issue+core, nothing, write_port") + +(define_insn_reservation "cmove" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "cmove")) + "issue+core, nothing, write_port") + +(define_insn_reservation "cc_arith" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "cc_arith")) + "issue+core, nothing, write_port") + +(define_insn_reservation "two_cycle_core_insn" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "two_cycle_core")) + "issue+core, nothing, write_port") + +(define_insn_reservation "divaw_insn" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "divaw")) + "issue+core, nothing, write_port") + +(define_insn_reservation "shift_insn" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "shift")) + "issue+core, nothing, write_port") + +; Latency from flag setters to arithmetic with carry is 3. +(define_insn_reservation "compare_700" 3 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "compare")) + "issue+core, nothing, write_port") + +; Assume here the branch is predicted correctly and has a delay slot insn +; or is properly unaligned. +(define_insn_reservation "branch_700" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "compare")) + "issue+core, nothing, write_port") + +; TODOs: is this correct ?? +(define_insn_reservation "multi_DI" 10 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "multi") + (match_operand:DI 0 "" "")) + "issue+multiplier, multiplier*2,issue+multiplier, multiplier*2, + nothing,write_port,nothing*2, write_port") + +(define_insn_reservation "umulti_DI" 9 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "umulti") + (match_operand:DI 0 "" "")) + "issue+multiplier, multiplier,issue+multiplier, multiplier*2, + write_port,nothing*3, write_port") + +(define_insn_reservation "umulti_xmac" 5 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "umulti")) + "issue+multiplier, multiplier, nothing*3, write_port") + +; latency of mpyu is lower than mpy / mpyh / mpyhu +(define_insn_reservation "umulti_std" 6 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "umulti")) + "issue+multiplier, multiplier*3, nothing*2, write_port") + +;; arc700 xmac multiplier +(define_insn_reservation "multi_xmac" 5 + (and (eq_attr "tune" "arc700_4_2_xmac") + (eq_attr "type" "multi")) + "issue+multiplier,multiplier,nothing*3,write_port") + +; arc700 standard multiplier +(define_insn_reservation "multi_std" 7 + (and (eq_attr "tune" "arc700_4_2_std") + (eq_attr "type" "multi")) + "issue+multiplier,multiplier*4,nothing*2,write_port") + +;(define_insn_reservation "multi_SI" 7 +; (eq_attr "type" "multi") +; "issue+multiplier, multiplier*2, nothing*4, write_port") + +; There is no multiplier -> multiplier bypass except for the +; mac -> mac dependency on the accumulator. + +; divaw -> divaw latency is 1 cycle +(define_bypass 1 "divaw_insn" "divaw_insn") + +(define_bypass 1 "compare_700" "branch_700,core_insn,data_store,data_load") + +; we could shedule the cmove immediately after the compare, but then +; the cmove would have higher latency... so just keep the cmove apart +; from the compare. +(define_bypass 2 "compare_700" "cmove") + +; no functional unit runs when blockage is reserved +(exclusion_set "blockage" "core, multiplier") + +(define_insn_reservation "data_load_DI" 4 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "load") + (match_operand:DI 0 "" "")) + "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port") + +(define_insn_reservation "data_load" 3 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "load") + (not (match_operand:DI 0 "" ""))) + "issue+dmp, nothing, dmp_write_port") + +(define_insn_reservation "data_store_DI" 2 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "store") + (match_operand:DI 0 "" "")) + "issue+dmp_write_port, issue+dmp_write_port") + +(define_insn_reservation "data_store" 1 + (and (eq_attr "tune_arc700" "true") + (eq_attr "type" "store") + (not (match_operand:DI 0 "" ""))) + "issue+dmp_write_port") diff --git a/gcc-4.9/gcc/config/arc/constraints.md b/gcc-4.9/gcc/config/arc/constraints.md new file mode 100644 index 000000000..d01e156c6 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/constraints.md @@ -0,0 +1,399 @@ +;; Constraint definitions for Synopsys DesignWare ARC. +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints + +; Most instructions accept arbitrary core registers for their inputs, even +; if the core register in question cannot be written to, like the multiply +; result registers of the ARCtangent-A5 and ARC600 . +; First, define a class for core registers that can be read cheaply. This +; is most or all core registers for ARC600, but only r0-r31 for ARC700 +(define_register_constraint "c" "CHEAP_CORE_REGS" + "core register @code{r0}-@code{r31}, @code{ap},@code{pcl}") + +; All core regs - e.g. for when we must have a way to reload a register. +(define_register_constraint "Rac" "ALL_CORE_REGS" + "core register @code{r0}-@code{r60}, @code{ap},@code{pcl}") + +; Some core registers (.e.g lp_count) aren't general registers because they +; can't be used as the destination of a multi-cycle operation like +; load and/or multiply, yet they are still writable in the sense that +; register-register moves and single-cycle arithmetic (e.g "add", "and", +; but not "mpy") can write to them. +(define_register_constraint "w" "WRITABLE_CORE_REGS" + "writable core register: @code{r0}-@code{r31}, @code{r60}, nonfixed core register") + +(define_register_constraint "W" "MPY_WRITABLE_CORE_REGS" + "writable core register except @code{LP_COUNT} (@code{r60}): @code{r0}-@code{r31}, nonfixed core register") + +(define_register_constraint "l" "LPCOUNT_REG" + "@internal + Loop count register @code{r60}") + +(define_register_constraint "x" "R0_REGS" + "@code{R0} register.") + +(define_register_constraint "Rgp" "GP_REG" + "@internal + Global Pointer register @code{r26}") + +(define_register_constraint "f" "FP_REG" + "@internal + Frame Pointer register @code{r27}") + +(define_register_constraint "b" "SP_REGS" + "@internal + Stack Pointer register @code{r28}") + +(define_register_constraint "k" "LINK_REGS" + "@internal + Link Registers @code{ilink1}:@code{r29}, @code{ilink2}:@code{r30}, + @code{blink}:@code{r31},") + +(define_register_constraint "q" "ARCOMPACT16_REGS" + "Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, + @code{r12}-@code{r15}") + +(define_register_constraint "e" "AC16_BASE_REGS" + "Registers usable as base-regs of memory addresses in ARCompact 16-bit memory + instructions: @code{r0}-@code{r3}, @code{r12}-@code{r15}, @code{sp}") + +(define_register_constraint "D" "DOUBLE_REGS" + "ARC FPX (dpfp) 64-bit registers. @code{D0}, @code{D1}") + +(define_register_constraint "d" "SIMD_DMA_CONFIG_REGS" + "@internal + ARC SIMD DMA configuration registers @code{di0}-@code{di7}, + @code{do0}-@code{do7}") + +(define_register_constraint "v" "SIMD_VR_REGS" + "ARC SIMD 128-bit registers @code{VR0}-@code{VR23}") + +; We could allow call-saved registers for sibling calls if we restored them +; in the delay slot of the call. However, that would not allow to adjust the +; stack pointer afterwards, so the call-saved register would have to be +; restored from a call-used register that was just loaded with the value +; before. So sticking to call-used registers for sibcalls will likely +; generate better code overall. +(define_register_constraint "Rsc" "SIBCALL_REGS" + "@internal + Sibling call register") + +;; Integer constraints + +(define_constraint "I" + "@internal + A signed 12-bit integer constant." + (and (match_code "const_int") + (match_test "SIGNED_INT12 (ival)"))) + +(define_constraint "K" + "@internal + A 3-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT3 (ival)"))) + +(define_constraint "L" + "@internal + A 6-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (ival)"))) + +(define_constraint "CnL" + "@internal + One's complement of a 6-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (~ival)"))) + +(define_constraint "CmL" + "@internal + Two's complement of a 6-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (-ival)"))) + +(define_constraint "M" + "@internal + A 5-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT5 (ival)"))) + +(define_constraint "N" + "@internal + Integer constant 1" + (and (match_code "const_int") + (match_test "IS_ONE (ival)"))) + +(define_constraint "O" + "@internal + A 7-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT7 (ival)"))) + +(define_constraint "P" + "@internal + An 8-bit unsigned integer constant" + (and (match_code "const_int") + (match_test "UNSIGNED_INT8 (ival)"))) + +(define_constraint "C_0" + "@internal + Zero" + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "Cn0" + "@internal + Negative or zero" + (and (match_code "const_int") + (match_test "ival <= 0"))) + +(define_constraint "Cca" + "@internal + Conditional or three-address add / sub constant" + (and (match_code "const_int") + (match_test "ival == -1 << 31 + || (ival >= -0x1f8 && ival <= 0x1f8 + && ((ival >= 0 ? ival : -ival) + <= 0x3f * (ival & -ival)))"))) + +; intersection of "O" and "Cca". +(define_constraint "CL2" + "@internal + A 6-bit unsigned integer constant times 2" + (and (match_code "const_int") + (match_test "!(ival & ~126)"))) + +(define_constraint "CM4" + "@internal + A 5-bit unsigned integer constant times 4" + (and (match_code "const_int") + (match_test "!(ival & ~124)"))) + +(define_constraint "Csp" + "@internal + A valid stack pointer offset for a short add" + (and (match_code "const_int") + (match_test "!(ival & ~124) || !(-ival & ~124)"))) + +(define_constraint "C2a" + "@internal + Unconditional two-address add / sub constant" + (and (match_code "const_int") + (match_test "ival == -1 << 31 + || (ival >= -0x4000 && ival <= 0x4000 + && ((ival >= 0 ? ival : -ival) + <= 0x7ff * (ival & -ival)))"))) + +(define_constraint "C0p" + "@internal + power of two" + (and (match_code "const_int") + (match_test "IS_POWEROF2_P (ival)"))) + +(define_constraint "C1p" + "@internal + constant such that x+1 is a power of two, and x != 0" + (and (match_code "const_int") + (match_test "ival && IS_POWEROF2_P (ival + 1)"))) + +(define_constraint "Ccp" + "@internal + constant such that ~x (one's Complement) is a power of two" + (and (match_code "const_int") + (match_test "IS_POWEROF2_P (~ival)"))) + +(define_constraint "Cux" + "@internal + constant such that AND gives an unsigned extension" + (and (match_code "const_int") + (match_test "ival == 0xff || ival == 0xffff"))) + +(define_constraint "Crr" + "@internal + constant that can be loaded with ror b,u6" + (and (match_code "const_int") + (match_test "(ival & ~0x8000001f) == 0 && !arc_ccfsm_cond_exec_p ()"))) + +;; Floating-point constraints + +(define_constraint "G" + "@internal + A 32-bit constant double value" + (and (match_code "const_double") + (match_test "arc_double_limm_p (op)"))) + +(define_constraint "H" + "@internal + All const_double values (including 64-bit values)" + (and (match_code "const_double") + (match_test "1"))) + +;; Memory constraints +(define_memory_constraint "T" + "@internal + A valid memory operand for ARCompact load instructions" + (and (match_code "mem") + (match_test "compact_load_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "S" + "@internal + A valid memory operand for ARCompact store instructions" + (and (match_code "mem") + (match_test "compact_store_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "Usd" + "@internal + A valid _small-data_ memory operand for ARCompact instructions" + (and (match_code "mem") + (match_test "compact_sda_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "Usc" + "@internal + A valid memory operand for storing constants" + (and (match_code "mem") + (match_test "!CONSTANT_P (XEXP (op,0))") +;; ??? the assembler rejects stores of immediates to small data. + (match_test "!compact_sda_memory_operand (op, VOIDmode)"))) + +(define_memory_constraint "Us<" + "@internal + Stack pre-decrement" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG"))) + +(define_memory_constraint "Us>" + "@internal + Stack post-increment" + (and (match_code "mem") + (match_test "GET_CODE (XEXP (op, 0)) == POST_INC") + (match_test "REG_P (XEXP (XEXP (op, 0), 0))") + (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG"))) + +;; General constraints + +(define_constraint "Cbr" + "Branch destination" + (ior (and (match_code "symbol_ref") + (match_test "!arc_is_longcall_p (op)")) + (match_code "label_ref"))) + +(define_constraint "Cbp" + "predicable branch/call destination" + (ior (and (match_code "symbol_ref") + (match_test "arc_is_shortcall_p (op)")) + (match_code "label_ref"))) + +(define_constraint "Cpc" + "pc-relative constant" + (match_test "arc_legitimate_pc_offset_p (op)")) + +(define_constraint "Clb" + "label" + (and (match_code "label_ref") + (match_test "arc_text_label (XEXP (op, 0))"))) + +(define_constraint "Cal" + "constant for arithmetic/logical operations" + (match_test "immediate_operand (op, VOIDmode) && !arc_legitimate_pc_offset_p (op)")) + +(define_constraint "C32" + "32 bit constant for arithmetic/logical operations" + (match_test "immediate_operand (op, VOIDmode) + && !arc_legitimate_pc_offset_p (op) + && !satisfies_constraint_I (op)")) + +; Note that the 'cryptic' register constraints will not make reload use the +; associated class to reload into, but this will not penalize reloading of any +; other operands, or using an alternate part of the same alternative. + +; Rcq is different in three important ways from a register class constraint: +; - It does not imply a register class, hence reload will not use it to drive +; reloads. +; - It matches even when there is no register class to describe its accepted +; set; not having such a set again lessens the impact on register allocation. +; - It won't match when the instruction is conditionalized by the ccfsm. +(define_constraint "Rcq" + "@internal + Cryptic q - for short insn generation while not affecting register allocation + Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, + @code{r12}-@code{r15}" + (and (match_code "REG") + (match_test "TARGET_Rcq + && !arc_ccfsm_cond_exec_p () + && IN_RANGE (REGNO (op) ^ 4, 4, 11)"))) + +; If we need a reload, we generally want to steer reload to use three-address +; alternatives in preference of two-address alternatives, unless the +; three-address alternative introduces a LIMM that is unnecessary for the +; two-address alternative. +(define_constraint "Rcw" + "@internal + Cryptic w - for use in early alternatives with matching constraint" + (and (match_code "REG") + (match_test + "TARGET_Rcw + && REGNO (op) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], + REGNO (op))"))) + +(define_constraint "Rcr" + "@internal + Cryptic r - for use in early alternatives with matching constraint" + (and (match_code "REG") + (match_test + "TARGET_Rcw + && REGNO (op) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], + REGNO (op))"))) + +(define_constraint "Rcb" + "@internal + Stack Pointer register @code{r28} - do not reload into its class" + (and (match_code "REG") + (match_test "REGNO (op) == 28"))) + +(define_constraint "Rck" + "@internal + blink (usful for push_s / pop_s)" + (and (match_code "REG") + (match_test "REGNO (op) == 31"))) + +(define_constraint "Rs5" + "@internal + sibcall register - only allow one of the five available 16 bit isnsn. + Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, + @code{r12}" + (and (match_code "REG") + (match_test "!arc_ccfsm_cond_exec_p ()") + (ior (match_test "(unsigned) REGNO (op) <= 3") + (match_test "REGNO (op) == 12")))) + +(define_constraint "Rcc" + "@internal + Condition Codes" + (and (match_code "REG") (match_test "cc_register (op, VOIDmode)"))) + + +(define_constraint "Q" + "@internal + Integer constant zero" + (and (match_code "const_int") + (match_test "IS_ZERO (ival)"))) diff --git a/gcc-4.9/gcc/config/arc/fpx.md b/gcc-4.9/gcc/config/arc/fpx.md new file mode 100644 index 000000000..4eee6aff9 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/fpx.md @@ -0,0 +1,674 @@ +;; Machine description of the Synopsys DesignWare ARC cpu Floating Point +;; extensions for GNU C compiler +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TODOs: +;; dpfp blocks? +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Scheduler descriptions for the fpx instructions +(define_insn_reservation "spfp_compact" 3 + (and (match_test "TARGET_SPFP_COMPACT_SET") + (eq_attr "type" "spfp")) + "issue+core, nothing*2, write_port") + +(define_insn_reservation "spfp_fast" 6 + (and (match_test "TARGET_SPFP_FAST_SET") + (eq_attr "type" "spfp")) + "issue+core, nothing*5, write_port") + +(define_insn_reservation "dpfp_compact_mult" 7 + (and (match_test "TARGET_DPFP_COMPACT_SET") + (eq_attr "type" "dpfp_mult")) + "issue+core, nothing*6, write_port") + +(define_insn_reservation "dpfp_compact_addsub" 5 + (and (match_test "TARGET_DPFP_COMPACT_SET") + (eq_attr "type" "dpfp_addsub")) + "issue+core, nothing*4, write_port") + +(define_insn_reservation "dpfp_fast" 5 + (and (match_test "TARGET_DPFP_FAST_SET") + (eq_attr "type" "dpfp_mult,dpfp_addsub")) + "issue+core, nothing*4, write_port") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r ") + (plus:SF (match_operand:SF 1 "nonmemory_operand" "0,r,GCal,r,0") + (match_operand:SF 2 "nonmemory_operand" "I,rL,r,GCal,LrCal")))] +; "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float + "TARGET_SPFP" + "@ + fadd %0,%1,%2 + fadd %0,%1,%2 + fadd %0,%S1,%2 + fadd %0,%1,%S2 + fadd%? %0,%1,%S2" + [(set_attr "type" "spfp") + (set_attr "length" "4,4,8,8,8")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r ") + (minus:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GCal,r,0") + (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GCal,LrCal")))] + ;"(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float + "TARGET_SPFP" + "@ + fsub %0,%1,%2 + fsub %0,%1,%2 + fsub %0,%S1,%2 + fsub %0,%1,%S2 + fsub%? %0,%1,%S2" + [(set_attr "type" "spfp") + (set_attr "length" "4,4,8,8,8")]) + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=r,r,r,r,r ") + (mult:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GCal,r,0") + (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GCal,LrCal")))] +; "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET" ;Add flag for float + "TARGET_SPFP" + "@ + fmul %0,%1,%2 + fmul %0,%1,%2 + fmul %0,%S1,%2 + fmul %0,%1,%S2 + fmul%? %0,%1,%S2" + [(set_attr "type" "spfp") + (set_attr "length" "4,4,8,8,8")]) + + +;; For comparisons, we can avoid storing the top half of the result into +;; a register since '.f' lets us set the Z bit for the conditional +;; branch insns. + +;; ??? FIXME (x-y)==0 is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +(define_insn "cmpsfpx_raw" + [(set (reg:CC_FPX 61) + (compare:CC_FPX (match_operand:SF 0 "register_operand" "r") + (match_operand:SF 1 "register_operand" "r")))] + "TARGET_ARGONAUT_SET && TARGET_SPFP" + "fsub.f 0,%0,%1" + [(set_attr "type" "spfp") + (set_attr "length" "4")]) + +;; ??? FIXME (x-y)==0 is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +;; ??? FIXME we claim to clobber operand 2, yet the two numbers appended +;; to the actual instructions are incorrect. The result of the d*subh +;; insn is stored in the Dx register specified by that first number. +(define_insn "cmpdfpx_raw" + [(set (reg:CC_FPX 61) + (compare:CC_FPX (match_operand:DF 0 "nonmemory_operand" "D,r") + (match_operand:DF 1 "nonmemory_operand" "r,D"))) + (clobber (match_scratch:DF 2 "=D,D"))] + "TARGET_ARGONAUT_SET && TARGET_DPFP" + "@ + dsubh%F0%F1.f 0,%H2,%L2 + drsubh%F0%F2.f 0,%H1,%L1" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4")]) + +;; ??? FIXME subtraction is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +(define_insn "*cmpfpx_gt" + [(set (reg:CC_FP_GT 61) (compare:CC_FP_GT (reg:CC_FPX 61) (const_int 0)))] + "TARGET_ARGONAUT_SET" + "cmp.ls pcl,pcl" + [(set_attr "type" "compare") + (set_attr "length" "4")]) + +;; ??? FIXME subtraction is not a correct comparison for floats: +;; http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm +(define_insn "*cmpfpx_ge" + [(set (reg:CC_FP_GE 61) (compare:CC_FP_GE (reg:CC_FPX 61) (const_int 0)))] + "TARGET_ARGONAUT_SET" + "rcmp.pnz pcl,0" + [(set_attr "type" "compare") + (set_attr "length" "4")]) + +;; DPFP instructions begin... + +;; op0_reg = D1_reg.low +(define_insn "*lr_double_lower" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "D")] VUNSPEC_LR ))] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" +"lr %0, [%1l] ; *lr_double_lower" +[(set_attr "length" "8") +(set_attr "type" "lr")] +) + +(define_insn "*lr_double_higher" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "D")] VUNSPEC_LR_HIGH ))] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" +"lr %0, [%1h] ; *lr_double_higher" +[(set_attr "length" "8") +(set_attr "type" "lr")] +) + + +(define_insn "*dexcl_3op_peep2_insn" + [(set (match_operand:SI 0 "dest_reg_operand" "=r") ; not register_operand, to accept SUBREG + (unspec_volatile:SI [ + (match_operand:DF 1 "arc_double_register_operand" "D") + (match_operand:SI 2 "shouldbe_register_operand" "r") ; r1 + (match_operand:SI 3 "shouldbe_register_operand" "r") ; r0 + ] VUNSPEC_DEXCL )) + ] + "TARGET_DPFP" + "dexcl%F1 %0, %2, %3" + [(set_attr "type" "move") + (set_attr "length" "4")] +) + +;; version which will not overwrite operand0 +(define_insn "*dexcl_3op_peep2_insn_nores" + [ (unspec_volatile:SI [ + (match_operand:DF 0 "arc_double_register_operand" "D") + (match_operand:SI 1 "shouldbe_register_operand" "r") ; r1 + (match_operand:SI 2 "shouldbe_register_operand" "r") ; r0 + ] VUNSPEC_DEXCL_NORES ) + ] + "TARGET_DPFP" + "dexcl%F0 0, %1, %2" + [(set_attr "type" "move") + (set_attr "length" "4")] +) + +;; dexcl a,b,c pattern generated by the peephole2 above +(define_insn "*dexcl_3op_peep2_insn_lr" + [(parallel [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "=D")] VUNSPEC_LR )) + (set (match_dup 1) (match_operand:DF 2 "register_operand" "r"))] + ) + ] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" + "dexcl%F1 %0, %H2, %L2" + [(set_attr "type" "move") + (set_attr "length" "4")] +) + + +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; doubles support for ARC +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; D0 = D1+{reg_pair}2 +;; (define_expand "adddf3" +;; [(set (match_operand:DF 0 "arc_double_register_operand" "") +;; (plus:DF (match_operand:DF 1 "arc_double_register_operand" "") +;; (match_operand:DF 2 "nonmemory_operand" "")))] +;; "TARGET_DPFP" +;; " " +;; ) +;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo +;; OR +;; daddh{0}{1} 0, reg3, limm2.lo +(define_expand "adddf3" + [(set (match_operand:DF 0 "arc_double_register_operand" "") + (plus:DF (match_operand:DF 1 "arc_double_register_operand" "") + (match_operand:DF 2 "nonmemory_operand" ""))) + ] + "TARGET_DPFP" + " if (GET_CODE (operands[2]) == CONST_DOUBLE) + { + rtx high, low, tmp; + split_double (operands[2], &low, &high); + tmp = force_reg (SImode, high); + emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx)); + } + else + emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx)); + DONE; + " +) + +;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1*/ +;; OR +;; daddh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0 */ +;; +(define_insn "adddf3_insn" + [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "!r,G"))) + (use (match_operand:SI 3 "" "N,r")) + (use (match_operand:SI 4 "" "N,Q")) + ; Prevent can_combine_p from combining muldf3_insn patterns with + ; different USE pairs. + (use (match_dup 2)) + ] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + daddh%F0%F1 0,%H2,%L2 + daddh%F0%F1 0,%3,%L2" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8")]) + +;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo +;; OR +;; dmulh{0}{1} 0, reg3, limm2.lo +(define_expand "muldf3" + [(set (match_operand:DF 0 "arc_double_register_operand" "") + (mult:DF (match_operand:DF 1 "arc_double_register_operand" "") + (match_operand:DF 2 "nonmemory_operand" "")))] +"TARGET_DPFP" +" if (GET_CODE (operands[2]) == CONST_DOUBLE) + { + rtx high, low, tmp; + split_double (operands[2], &low, &high); + tmp = force_reg (SImode, high); + emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx)); + } + else + emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx)); + + DONE; + ") + + +;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1*/ +;; OR +;; dmulh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/ +(define_insn "muldf3_insn" + [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "!r,G"))) + (use (match_operand:SI 3 "" "N,!r")) + (use (match_operand:SI 4 "" "N,Q")) + ; Prevent can_combine_p from combining muldf3_insn patterns with + ; different USE pairs. + (use (match_dup 2)) + ] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dmulh%F0%F1 0,%H2,%L2 + dmulh%F0%F1 0,%3, %L2" + [(set_attr "type" "dpfp_mult") + (set_attr "length" "4,8")]) + +;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo +;; OR +;; dsubh{0}{1} 0, reg3, limm2.lo +;; OR +;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo +;; OR +;; drsubh{0}{2} 0, reg3, limm1.lo +(define_expand "subdf3" + [(set (match_operand:DF 0 "arc_double_register_operand" "") + (minus:DF (match_operand:DF 1 "nonmemory_operand" "") + (match_operand:DF 2 "nonmemory_operand" "")))] +"TARGET_DPFP" +" if (GET_CODE (operands[1]) == CONST_DOUBLE || GET_CODE (operands[2]) == CONST_DOUBLE) + { + rtx high, low, tmp; + int const_index = ((GET_CODE (operands[1]) == CONST_DOUBLE) ? 1: 2); + split_double (operands[const_index], &low, &high); + tmp = force_reg (SImode, high); + emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx)); + } + else + emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx)); + + DONE; + " +) + +;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1 */ +;; OR +;; dsubh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/ +;; OR +;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo /* operand 4 = 1 */ +;; OR +;; drsubh{0}{2} 0, reg3, limm1.lo /* operand 4 = 0*/ +(define_insn "subdf3_insn" + [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D,D,D") + (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,!r,G") + (match_operand:DF 2 "nonmemory_operand" "!r,G,D,D"))) + (use (match_operand:SI 3 "" "N,r,N,r")) + (use (match_operand:SI 4 "" "N,Q,N,Q")) + ; Prevent can_combine_p from combining muldf3_insn patterns with + ; different USE pairs. + (use (match_dup 2))] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT) && + !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dsubh%F0%F1 0,%H2,%L2 + dsubh%F0%F1 0,%3,%L2 + drsubh%F0%F2 0,%H1,%L1 + drsubh%F0%F2 0,%3,%L1" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8,4,8")]) + +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; Peephole for following conversion +;; ;; D0 = D2{reg_pair}3 +;; ;; {reg_pair}5 = D0 +;; ;; D0 = {reg_pair}6 +;; ;; | +;; ;; V +;; ;; _________________________________________________________ +;; ;; / D0 = D2 {regpair3_or_limmreg34} +;; ;; ---- + {reg_pair}5.hi = ( D2{regpair3_or_limmreg34} ).hi +;; ;; | \_________________________________________________________ +;; ;; | +;; ;; | ________________________________________________________ +;; ;; | / {reg_pair}5.lo = ( D2{regpair3_or_limmreg34} ).lo +;; ;; +-----+ D0 = {reg_pair}6 +;; ;; \ _________________________________________________________ +;; ;; || +;; ;; || +;; ;; \/ +;; ;; d{0}{2}h {reg_pair}5.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; ;; dexcl{0} {reg_pair}5.lo, {reg_pair}6.lo, {reg_pair}6.hi +;; ;; ----------------------------------------------------------------------------------------- +;; ;; where is one of {+,*,-} +;; ;; is {add,mult,sub} +;; ;; +;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; ;; {regpair2_or_limmreg24} and D3 +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; (define_peephole2 +;; [(parallel [(set (match_operand:DF 0 "register_operand" "") +;; (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") +;; (match_operand:DF 3 "nonmemory_operand" "")])) +;; (use (match_operand:SI 4 "" ""))]) +;; (set (match_operand:DF 5 "register_operand" "") +;; (match_dup 0)) +;; (set (match_dup 0) +;; (match_operand:DF 6 "register_operand" "")) +;; ] +;; "TARGET_DPFP" +;; [ +;; (parallel [(set (match_dup 0) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)])) +;; (use (match_dup 4)) +;; (set (match_dup 5) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)]))]) +;; (parallel [ +;; ;; (set (subreg:SI (match_dup 5) 0) +;; (set (match_dup 7) +;; (unspec_volatile [(match_dup 0)] VUNSPEC_LR )) +;; (set (match_dup 0) (match_dup 6))] +;; ) +;; ] +;; "operands[7] = simplify_gen_subreg(SImode,operands[5],DFmode,0);" +;; ) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Peephole for following conversion +;; D0 = D2{reg_pair}3 +;; {reg_pair}6 = D0 +;; D0 = {reg_pair}7 +;; | +;; V +;; _________________________________________________________ +;; / D0 = D2 {regpair3_or_limmreg34} +;; ---- + {reg_pair}6.hi = ( D2{regpair3_or_limmreg34} ).hi +;; | \_________________________________________________________ +;; | +;; | ________________________________________________________ +;; | / {reg_pair}6.lo = ( D2{regpair3_or_limmreg34} ).lo +;; +-----+ D0 = {reg_pair}7 +;; \ _________________________________________________________ +;; || +;; || +;; \/ +;; d{0}{2}h {reg_pair}6.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; dexcl{0} {reg_pair}6.lo, {reg_pair}7.lo, {reg_pair}7.hi +;; ----------------------------------------------------------------------------------------- +;; where is one of {+,*,-} +;; is {add,mult,sub} +;; +;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; {regpair2_or_limmreg24} and D3 +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_peephole2 + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") + (match_operand:DF 3 "nonmemory_operand" "")])) + (use (match_operand:SI 4 "" "")) + (use (match_operand:SI 5 "" "")) + (use (match_operand:SI 6 "" ""))]) + (set (match_operand:DF 7 "register_operand" "") + (match_dup 0)) + (set (match_dup 0) + (match_operand:DF 8 "register_operand" "")) + ] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" + [ + (parallel [(set (match_dup 0) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)])) + (use (match_dup 4)) + (use (match_dup 5)) + (set (match_dup 7) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)]))]) + (parallel [ +;; (set (subreg:SI (match_dup 7) 0) + (set (match_dup 9) + (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR )) + (set (match_dup 0) (match_dup 8))] + ) + ] + "operands[9] = simplify_gen_subreg(SImode,operands[7],DFmode,0);" + ) + +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; Peephole to generate d{ij}h a,b,c instructions +;; ;; D0 = D2{reg_pair}3 +;; ;; {reg_pair}5 = D0 +;; ;; | +;; ;; V +;; ;; __________________________________________ +;; ;; / D0 = D2 {regpair3_or_limmreg34} +;; ;; ---- + {reg_pair}5.hi = ( D2{regpair3_or_limmreg34} ).hi +;; ;; | \__________________________________________ +;; ;; | +;; ;; + --- {reg_pair}5.lo = ( D2{regpair3_or_limmreg34} ).lo +;; ;; || +;; ;; || +;; ;; \/ +;; ;; d{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; ;; lr {reg_pair}4.lo, {D2l} +;; ;; ---------------------------------------------------------------------------------------- +;; ;; where is one of {+,*,-} +;; ;; is {add,mult,sub} +;; ;; +;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; ;; {regpair2_or_limmreg24} and D3 +;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; (define_peephole2 +;; [(parallel [(set (match_operand:DF 0 "register_operand" "") +;; (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") +;; (match_operand:DF 3 "nonmemory_operand" "")])) +;; (use (match_operand:SI 4 "" ""))]) +;; (set (match_operand:DF 5 "register_operand" "") +;; (match_dup 0)) +;; ] +;; "TARGET_DPFP" +;; [ +;; (parallel [(set (match_dup 0) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)])) +;; (use (match_dup 4)) +;; (set (match_dup 5) +;; (match_op_dup:DF 1 [(match_dup 2) +;; (match_dup 3)]))]) +;; ; (set (subreg:SI (match_dup 5) 0) +;; (set (match_dup 6) +;; (unspec_volatile [(match_dup 0)] VUNSPEC_LR )) +;; ] +;; "operands[6] = simplify_gen_subreg(SImode,operands[5],DFmode,0);" +;; ) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Peephole to generate d{ij}h a,b,c instructions +;; D0 = D2{reg_pair}3 +;; {reg_pair}6 = D0 +;; | +;; V +;; __________________________________________ +;; / D0 = D2 {regpair3_or_limmreg34} +;; ---- + {reg_pair}6.hi = ( D2{regpair3_or_limmreg34} ).hi +;; | \__________________________________________ +;; | +;; + --- {reg_pair}6.lo = ( D2{regpair3_or_limmreg34} ).lo +;; || +;; || +;; \/ +;; d{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi +;; lr {reg_pair}4.lo, {D2l} +;; ---------------------------------------------------------------------------------------- +;; where is one of {+,*,-} +;; is {add,mult,sub} +;; +;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as +;; {regpair2_or_limmreg24} and D3 +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_peephole2 + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "") + (match_operand:DF 3 "nonmemory_operand" "")])) + (use (match_operand:SI 4 "" "")) + (use (match_operand:SI 5 "" "")) + (use (match_operand:SI 6 "" ""))]) + (set (match_operand:DF 7 "register_operand" "") + (match_dup 0)) + ] + "TARGET_DPFP && !TARGET_DPFP_DISABLE_LRSR" + [ + (parallel [(set (match_dup 0) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)])) + (use (match_dup 4)) + (use (match_dup 5)) + (set (match_dup 7) + (match_op_dup:DF 1 [(match_dup 2) + (match_dup 3)]))]) +; (set (subreg:SI (match_dup 7) 0) + (set (match_dup 8) + (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR )) + ] + "operands[8] = simplify_gen_subreg(SImode,operands[7],DFmode,0);" + ) + +;; ;; _______________________________________________________ +;; ;; / D0 = D1 + {regpair2_or_limmreg23} +;; ;; + {reg_pair}4.hi = ( D1 + {regpair2_or_limmreg23} ).hi +;; ;; \_______________________________________________________ +;; (define_insn "*daddh_peep2_insn" +;; [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") +;; (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") +;; (match_operand:DF 2 "nonmemory_operand" "r,G"))) +;; (use (match_operand:SI 3 "" "N,r")) +;; (set (match_operand:DF 4 "register_operand" "=r,r") +;; (plus:DF (match_dup 1) +;; (match_dup 2)))])] +;; "TARGET_DPFP" +;; "@ +;; daddh%F0%F1 %H4, %H2, %L2 +;; daddh%F0%F1 %H4, %3, %L2" +;; [(set_attr "type" "dpfp_addsub") +;; (set_attr "length" "4,8")] +;; ) +;; _______________________________________________________ +;; / D0 = D1 + {regpair2_or_limmreg23} +;; + {reg_pair}5.hi = ( D1 + {regpair2_or_limmreg23} ).hi +;; \_______________________________________________________ +(define_insn "*daddh_peep2_insn" + [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "r,G"))) + (use (match_operand:SI 3 "" "N,r")) + (use (match_operand:SI 4 "" "N,Q")) + (use (match_operand:SI 5 "" "")) + (set (match_operand:DF 6 "register_operand" "=r,r") + (plus:DF (match_dup 1) + (match_dup 2)))])] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + daddh%F0%F1 %H6, %H2, %L2 + daddh%F0%F1 %H6, %3, %L2" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8")] +) + +;; _______________________________________________________ +;; / D0 = D1 * {regpair2_or_limmreg23} +;; + {reg_pair}5.hi = ( D1 * {regpair2_or_limmreg23} ).hi +;; \_______________________________________________________ +(define_insn "*dmulh_peep2_insn" + [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D") + (mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D") + (match_operand:DF 2 "nonmemory_operand" "r,G"))) + (use (match_operand:SI 3 "" "N,r")) + (use (match_operand:SI 4 "" "N,Q")) + (use (match_operand:SI 5 "" "")) + (set (match_operand:DF 6 "register_operand" "=r,r") + (mult:DF (match_dup 1) + (match_dup 2)))])] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dmulh%F0%F1 %H6, %H2, %L2 + dmulh%F0%F1 %H6, %3, %L2" + [(set_attr "type" "dpfp_mult") + (set_attr "length" "4,8")] +) + +;; _______________________________________________________ +;; / D0 = D1 - {regpair2_or_limmreg23} +;; + {reg_pair}5.hi = ( D1 - {regpair2_or_limmreg23} ).hi +;; \_______________________________________________________ +;; OR +;; _______________________________________________________ +;; / D0 = {regpair1_or_limmreg13} - D2 +;; + {reg_pair}5.hi = ( {regpair1_or_limmreg13} ).hi - D2 +;; \_______________________________________________________ +(define_insn "*dsubh_peep2_insn" + [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D,D,D") + (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,r,G") + (match_operand:DF 2 "nonmemory_operand" "r,G,D,D"))) + (use (match_operand:SI 3 "" "N,r,N,r")) + (use (match_operand:SI 4 "" "N,Q,N,Q")) + (use (match_operand:SI 5 "" "")) + (set (match_operand:DF 6 "register_operand" "=r,r,r,r") + (minus:DF (match_dup 1) + (match_dup 2)))])] + "TARGET_DPFP && + !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT) && + !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)" + "@ + dsubh%F0%F1 %H6, %H2, %L2 + dsubh%F0%F1 %H6, %3, %L2 + drsubh%F0%F2 %H6, %H1, %L1 + drsubh%F0%F2 %H6, %3, %L1" + [(set_attr "type" "dpfp_addsub") + (set_attr "length" "4,8,4,8")] +) diff --git a/gcc-4.9/gcc/config/arc/predicates.md b/gcc-4.9/gcc/config/arc/predicates.md new file mode 100644 index 000000000..81bf6277e --- /dev/null +++ b/gcc-4.9/gcc/config/arc/predicates.md @@ -0,0 +1,811 @@ +;; Predicate definitions for Synopsys DesignWare ARC. +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_predicate "dest_reg_operand" + (match_code "reg,subreg") +{ + rtx op0 = op; + + if (GET_CODE (op0) == SUBREG) + op0 = SUBREG_REG (op0); + if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS], + REGNO (op0)) + && !TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], + REGNO (op0))) + return 0; + return register_operand (op, mode); +}) + +(define_predicate "mpy_dest_reg_operand" + (match_code "reg,subreg") +{ + rtx op0 = op; + + if (GET_CODE (op0) == SUBREG) + op0 = SUBREG_REG (op0); + if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS], + REGNO (op0)) + /* Make sure the destination register is not LP_COUNT. */ + && !TEST_HARD_REG_BIT (reg_class_contents[MPY_WRITABLE_CORE_REGS], + REGNO (op0))) + return 0; + return register_operand (op, mode); +}) + + +;; Returns 1 if OP is a symbol reference. +(define_predicate "symbolic_operand" + (match_code "symbol_ref, label_ref, const") +) + +;; Acceptable arguments to the call insn. +(define_predicate "call_address_operand" + (ior (match_code "const_int, reg") + (match_operand 0 "symbolic_operand") + (match_test "CONSTANT_P (op) + && arc_legitimate_constant_p (VOIDmode, op)")) +) + +(define_predicate "call_operand" + (and (match_code "mem") + (match_test "call_address_operand (XEXP (op, 0), mode)")) +) + +;; Return true if OP is a unsigned 6-bit immediate (u6) value. +(define_predicate "u6_immediate_operand" + (and (match_code "const_int") + (match_test "UNSIGNED_INT6 (INTVAL (op))")) +) + +;; Return true if OP is a short immediate (shimm) value. +(define_predicate "short_immediate_operand" + (and (match_code "const_int") + (match_test "SMALL_INT (INTVAL (op))")) +) + +(define_predicate "p2_immediate_operand" + (and (match_code "const_int") + (match_test "((INTVAL (op) - 1) & INTVAL (op)) == 0") + (match_test "INTVAL (op)")) +) + +;; Return true if OP will require a long immediate (limm) value. +;; This is currently only used when calculating length attributes. +(define_predicate "long_immediate_operand" + (match_code "symbol_ref, label_ref, const, const_double, const_int") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + return !SIGNED_INT12 (INTVAL (op)); + case CONST_DOUBLE : + /* These can happen because large unsigned 32 bit constants are + represented this way (the multiplication patterns can cause these + to be generated). They also occur for SFmode values. */ + return 1; + default: + break; + } + return 0; +} +) + +;; Return true if OP is a MEM that when used as a load or store address will +;; require an 8 byte insn. +;; Load and store instructions don't allow the same possibilities but they're +;; similar enough that this one function will do. +;; This is currently only used when calculating length attributes. */ +(define_predicate "long_immediate_loadstore_operand" + (match_code "mem") +{ + int size = GET_MODE_SIZE (GET_MODE (op)); + + op = XEXP (op, 0); + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + /* This must be handled as "st c,[limm]". Ditto for load. + Technically, the assembler could translate some possibilities to + "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't + assume that it does. */ + return 1; + case CONST_DOUBLE : + /* These can happen because large unsigned 32 bit constants are + represented this way (the multiplication patterns can cause these + to be generated). They also occur for SFmode values. */ + return 1; + case REG : + return 0; + case PLUS : + { + rtx x = XEXP (op, 1); + + if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS) + x = XEXP (x, 0); + } + if (CONST_INT_P (x)) + return (!SMALL_INT (INTVAL (x)) + && (size <= 1 || size > 4 + || (INTVAL (x) & (size - 1)) != 0 + || !SMALL_INT (INTVAL (x) / size))); + else if (GET_CODE (x) == SYMBOL_REF) + return TARGET_NO_SDATA_SET || !SYMBOL_REF_SMALL_P (x); + return 0; + } + default: + break; + } + return 0; +} +) + +;; Return true if OP is any of R0-R3,R12-R15 for ARCompact 16-bit +;; instructions +(define_predicate "compact_register_operand" + (match_code "reg, subreg") + { + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG) + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || COMPACT_GP_REG_P (REGNO (op))) ; + } +) + +;; Return true if OP is an acceptable memory operand for ARCompact +;; 16-bit load instructions. +(define_predicate "compact_load_memory_operand" + (match_code "mem") +{ + rtx addr, plus0, plus1; + int size, off; + + /* Eliminate non-memory operations. */ + if (GET_CODE (op) != MEM) + return 0; + + /* .di instructions have no 16-bit form. */ + if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET) + return 0; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + size = GET_MODE_SIZE (mode); + + /* dword operations really put out 2 instructions, so eliminate them. */ + if (size > UNITS_PER_WORD) + return 0; + + /* Decode the address now. */ + addr = XEXP (op, 0); + switch (GET_CODE (addr)) + { + case REG: + return (REGNO (addr) >= FIRST_PSEUDO_REGISTER + || COMPACT_GP_REG_P (REGNO (addr)) + || (SP_REG_P (REGNO (addr)) && (size != 2))); + /* Reverting for the moment since ldw_s does not have sp as a valid + parameter. */ + case PLUS: + plus0 = XEXP (addr, 0); + plus1 = XEXP (addr, 1); + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus0))) + && ((GET_CODE (plus1) == REG) + && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus1))))) + { + return 1; + } + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + + /* Negative offset is not supported in 16-bit load/store insns. */ + if (off < 0) + return 0; + + switch (size) + { + case 1: + return (off < 32); + case 2: + return ((off < 64) && (off % 2 == 0)); + case 4: + return ((off < 128) && (off % 4 == 0)); + } + } + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || SP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0)); + } + default: + break ; + /* TODO: 'gp' and 'pcl' are to supported as base address operand + for 16-bit load instructions. */ + } + return 0; + +} +) + +;; Return true if OP is an acceptable memory operand for ARCompact +;; 16-bit store instructions +(define_predicate "compact_store_memory_operand" + (match_code "mem") +{ + rtx addr, plus0, plus1; + int size, off; + + if (mode == VOIDmode) + mode = GET_MODE (op); + + /* .di instructions have no 16-bit form. */ + if (MEM_VOLATILE_P (op) && !TARGET_VOLATILE_CACHE_SET) + return 0; + + size = GET_MODE_SIZE (mode); + + /* dword operations really put out 2 instructions, so eliminate them. */ + if (size > UNITS_PER_WORD) + return 0; + + /* Decode the address now. */ + addr = XEXP (op, 0); + switch (GET_CODE (addr)) + { + case REG: + return (REGNO (addr) >= FIRST_PSEUDO_REGISTER + || COMPACT_GP_REG_P (REGNO (addr)) + || (SP_REG_P (REGNO (addr)) && (size != 2))); + /* stw_s does not support SP as a parameter. */ + case PLUS: + plus0 = XEXP (addr, 0); + plus1 = XEXP (addr, 1); + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || COMPACT_GP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + + /* Negative offset is not supported in 16-bit load/store insns. */ + if (off < 0) + return 0; + + switch (size) + { + case 1: + return (off < 32); + case 2: + return ((off < 64) && (off % 2 == 0)); + case 4: + return ((off < 128) && (off % 4 == 0)); + } + } + + if ((GET_CODE (plus0) == REG) + && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER) + || SP_REG_P (REGNO (plus0))) + && (GET_CODE (plus1) == CONST_INT)) + { + off = INTVAL (plus1); + + return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0)); + } + default: + break; + } + return 0; + } +) + +;; Return true if OP is an acceptable argument for a single word +;; move source. +(define_predicate "move_src_operand" + (match_code "symbol_ref, label_ref, const, const_int, const_double, reg, subreg, mem") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return (!flag_pic || arc_legitimate_pic_operand_p(op)); + case CONST_INT : + return (LARGE_INT (INTVAL (op))); + case CONST_DOUBLE : + /* We can handle DImode integer constants in SImode if the value + (signed or unsigned) will fit in 32 bits. This is needed because + large unsigned 32 bit constants are represented as CONST_DOUBLEs. */ + if (mode == SImode) + return arc_double_limm_p (op); + /* We can handle 32 bit floating point constants. */ + if (mode == SFmode) + return GET_MODE (op) == SFmode; + return 0; + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + default : + return 0; + } +} +) + +;; Return true if OP is an acceptable argument for a double word +;; move source. +(define_predicate "move_double_src_operand" + (match_code "reg, subreg, mem, const_int, const_double") +{ + switch (GET_CODE (op)) + { + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return move_double_src_operand (SUBREG_REG (op), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + case CONST_INT : + case CONST_DOUBLE : + return 1; + default : + return 0; + } +} +) + +;; Return true if OP is an acceptable argument for a move destination. +(define_predicate "move_dest_operand" + (match_code "reg, subreg, mem") +{ + switch (GET_CODE (op)) + { + case REG : + /* Program Counter register cannot be the target of a move. It is + a readonly register. */ + if (REGNO (op) == PROGRAM_COUNTER_REGNO) + return 0; + else if (TARGET_MULMAC_32BY16_SET + && (REGNO (op) == 56 || REGNO(op) == 57)) + return 0; + else if (TARGET_MUL64_SET + && (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 )) + return 0; + else + return dest_reg_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return dest_reg_operand (op, mode); + case MEM : + { + rtx addr = XEXP (op, 0); + + if (GET_CODE (addr) == PLUS + && (GET_CODE (XEXP (addr, 0)) == MULT + || (!CONST_INT_P (XEXP (addr, 1)) + && (TARGET_NO_SDATA_SET + || GET_CODE (XEXP (addr, 1)) != SYMBOL_REF + || !SYMBOL_REF_SMALL_P (XEXP (addr, 1)))))) + return 0; + if ((GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY) + && (GET_CODE (XEXP (addr, 1)) != PLUS + || !CONST_INT_P (XEXP (XEXP (addr, 1), 1)))) + return 0; + return address_operand (addr, mode); + } + default : + return 0; + } + +} +) + +;; Return true if OP is valid load with update operand. +(define_predicate "load_update_operand" + (match_code "mem") +{ + if (GET_CODE (op) != MEM + || GET_MODE (op) != mode) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_MODE (op) != Pmode + || !register_operand (XEXP (op, 0), Pmode) + || !nonmemory_operand (XEXP (op, 1), Pmode)) + return 0; + return 1; + +} +) + +;; Return true if OP is valid store with update operand. +(define_predicate "store_update_operand" + (match_code "mem") +{ + if (GET_CODE (op) != MEM + || GET_MODE (op) != mode) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_MODE (op) != Pmode + || !register_operand (XEXP (op, 0), Pmode) + || !(GET_CODE (XEXP (op, 1)) == CONST_INT + && SMALL_INT (INTVAL (XEXP (op, 1))))) + return 0; + return 1; +} +) + +;; Return true if OP is a non-volatile non-immediate operand. +;; Volatile memory refs require a special "cache-bypass" instruction +;; and only the standard movXX patterns are set up to handle them. +(define_predicate "nonvol_nonimm_operand" + (and (match_code "subreg, reg, mem") + (match_test "(GET_CODE (op) != MEM || !MEM_VOLATILE_P (op)) && nonimmediate_operand (op, mode)")) +) + +;; Return 1 if OP is a comparison operator valid for the mode of CC. +;; This allows the use of MATCH_OPERATOR to recognize all the branch insns. + +(define_predicate "proper_comparison_operator" + (match_code "eq, ne, le, lt, ge, gt, leu, ltu, geu, gtu, unordered, ordered, uneq, unge, ungt, unle, unlt, ltgt") +{ + enum rtx_code code = GET_CODE (op); + + if (!COMPARISON_P (op)) + return 0; + + /* After generic flag-setting insns, we can use eq / ne / pl / mi / pnz . + There are some creative uses for hi / ls after shifts, but these are + hard to understand for the compiler and could be at best the target of + a peephole. */ + switch (GET_MODE (XEXP (op, 0))) + { + case CC_ZNmode: + return (code == EQ || code == NE || code == GE || code == LT + || code == GT); + case CC_Zmode: + return code == EQ || code == NE; + case CC_Cmode: + return code == LTU || code == GEU; + case CC_FP_GTmode: + return code == GT || code == UNLE; + case CC_FP_GEmode: + return code == GE || code == UNLT; + case CC_FP_ORDmode: + return code == ORDERED || code == UNORDERED; + case CC_FP_UNEQmode: + return code == UNEQ || code == LTGT; + case CC_FPXmode: + return (code == EQ || code == NE || code == UNEQ || code == LTGT + || code == ORDERED || code == UNORDERED); + + case CCmode: + case SImode: /* Used for BRcc. */ + return 1; + /* From combiner. */ + case QImode: case HImode: case DImode: case SFmode: case DFmode: + return 0; + default: + gcc_unreachable (); + } +}) + +(define_predicate "equality_comparison_operator" + (match_code "eq, ne")) + +(define_predicate "brcc_nolimm_operator" + (ior (match_test "REG_P (XEXP (op, 1))") + (and (match_code "eq, ne, lt, ge, ltu, geu") + (match_test "u6_immediate_operand (XEXP (op, 1), SImode)")) + (and (match_code "le, gt, leu, gtu") + (match_test "UNSIGNED_INT6 (INTVAL (XEXP (op, 1)) + 1)")))) + +;; Return TRUE if this is the condition code register, if we aren't given +;; a mode, accept any CCmode register +(define_special_predicate "cc_register" + (match_code "reg") +{ + if (mode == VOIDmode) + { + mode = GET_MODE (op); + if (GET_MODE_CLASS (mode) != MODE_CC) + return FALSE; + } + + if (mode == GET_MODE (op) && GET_CODE (op) == REG && REGNO (op) == CC_REG) + return TRUE; + + return FALSE; +}) + +;; Return TRUE if this is the condition code register; if we aren't given +;; a mode, accept any CCmode register. If we are given a mode, accept +;; modes that set a subset of flags. +(define_special_predicate "cc_set_register" + (match_code "reg") +{ + enum machine_mode rmode = GET_MODE (op); + + if (mode == VOIDmode) + { + mode = rmode; + if (GET_MODE_CLASS (mode) != MODE_CC) + return FALSE; + } + + if (REGNO (op) != 61) + return FALSE; + if (mode == rmode + || (mode == CC_ZNmode && rmode == CC_Zmode) + || (mode == CCmode && rmode == CC_Zmode) + || (mode == CCmode && rmode == CC_ZNmode) + || (mode == CCmode && rmode == CC_Cmode)) + return TRUE; + + return FALSE; +}) + +; Accept CC_REG in modes which provide the flags needed for MODE. */ +(define_special_predicate "cc_use_register" + (match_code "reg") +{ + if (REGNO (op) != CC_REG) + return 0; + if (GET_MODE (op) == mode) + return 1; + switch (mode) + { + case CC_Zmode: + if (GET_MODE (op) == CC_ZNmode) + return 1; + /* Fall through. */ + case CC_ZNmode: case CC_Cmode: + return GET_MODE (op) == CCmode; + default: + gcc_unreachable (); + } +}) + +(define_special_predicate "zn_compare_operator" + (match_code "compare") +{ + return GET_MODE (op) == CC_ZNmode || GET_MODE (op) == CC_Zmode; +}) + +;; Return true if OP is a shift operator. +(define_predicate "shift_operator" + (match_code "ashiftrt, lshiftrt, ashift") +) + +;; Return true if OP is a left shift operator that can be implemented in +;; four insn words or less without a barrel shifter or multiplier. +(define_predicate "shiftl4_operator" + (and (match_code "ashift") + (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ") + (match_test "UINTVAL (XEXP (op, 1)) <= 9U + || INTVAL (XEXP (op, 1)) == 29 + || INTVAL (XEXP (op, 1)) == 30 + || INTVAL (XEXP (op, 1)) == 31"))) + +;; Return true if OP is a right shift operator that can be implemented in +;; four insn words or less without a barrel shifter or multiplier. +(define_predicate "shiftr4_operator" + (and (match_code "ashiftrt, lshiftrt") + (match_test "const_int_operand (XEXP (op, 1), VOIDmode) ") + (match_test "UINTVAL (XEXP (op, 1)) <= 4U + || INTVAL (XEXP (op, 1)) == 30 + || INTVAL (XEXP (op, 1)) == 31"))) + +;; Return true if OP is a shift operator that can be implemented in +;; four insn words or less without a barrel shifter or multiplier. +(define_predicate "shift4_operator" + (ior (match_operand 0 "shiftl4_operator") + (match_operand 0 "shiftr4_operator"))) + +(define_predicate "mult_operator" + (and (match_code "mult") (match_test "TARGET_ARC700 && !TARGET_NOMPY_SET")) +) + +(define_predicate "commutative_operator" + (ior (match_code "plus,ior,xor,and") + (match_operand 0 "mult_operator") + (and (match_code "ss_plus") + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "commutative_operator_sans_mult" + (ior (match_code "plus,ior,xor,and") + (and (match_code "ss_plus") + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "noncommutative_operator" + (ior (match_code "minus,ashift,ashiftrt,lshiftrt,rotatert") + (and (match_code "ss_minus") + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "unary_operator" + (ior (match_code "abs,neg,not,sign_extend,zero_extend") + (and (ior (match_code "ss_neg") + (and (match_code "ss_truncate") + (match_test "GET_MODE (XEXP (op, 0)) == HImode"))) + (match_test "TARGET_ARC700 || TARGET_EA_SET"))) +) + +(define_predicate "_2_4_8_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8")) +) + +(define_predicate "arc_double_register_operand" + (match_code "reg") +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == DOUBLE_REGS)); +}) + +(define_predicate "shouldbe_register_operand" + (match_code "reg,subreg,mem") +{ + return ((reload_in_progress || reload_completed) + ? general_operand : register_operand) (op, mode); +}) + +(define_predicate "vector_register_operand" + (match_code "reg") +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS)); +}) + +(define_predicate "vector_register_or_memory_operand" + ( ior (match_code "reg") + (match_code "mem")) +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + if ((GET_CODE (op) == MEM) + && (mode == V8HImode) + && GET_CODE (XEXP (op,0)) == REG) + return 1; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS)); +}) + +(define_predicate "arc_dpfp_operator" + (match_code "plus, mult,minus") +) + +(define_predicate "arc_simd_dma_register_operand" + (match_code "reg") +{ + if ((GET_MODE (op) != mode) && (mode != VOIDmode)) + return 0; + + return (GET_CODE (op) == REG + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == SIMD_DMA_CONFIG_REGS)); +}) + +(define_predicate "acc1_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 56 : 57)"))) + +(define_predicate "acc2_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 57 : 56)"))) + +(define_predicate "mlo_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 59 : 58)"))) + +(define_predicate "mhi_operand" + (and (match_code "reg") + (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 58 : 59)"))) + +; Unfortunately, we can not allow a const_int_operand before reload, because +; reload needs a non-void mode to guide it how to reload the inside of a +; {sign_}extend. +(define_predicate "extend_operand" + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "immediate_operand") + (ior (not (match_operand 0 "const_int_operand")) + (match_test "reload_in_progress || reload_completed"))))) + +(define_predicate "millicode_store_operation" + (match_code "parallel") +{ + return arc_check_millicode (op, 0, 0); +}) + +(define_predicate "millicode_load_operation" + (match_code "parallel") +{ + return arc_check_millicode (op, 2, 2); +}) + +(define_predicate "millicode_load_clob_operation" + (match_code "parallel") +{ + return arc_check_millicode (op, 0, 1); +}) + +(define_special_predicate "immediate_usidi_operand" + (if_then_else + (match_code "const_int") + (match_test "INTVAL (op) >= 0") + (and (match_test "const_double_operand (op, mode)") + (match_test "CONST_DOUBLE_HIGH (op) == 0")))) diff --git a/gcc-4.9/gcc/config/arc/simdext.md b/gcc-4.9/gcc/config/arc/simdext.md new file mode 100644 index 000000000..13e268c11 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/simdext.md @@ -0,0 +1,1292 @@ +;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_constants + [ + ;; Va, Vb, Vc builtins + (UNSPEC_ARC_SIMD_VADDAW 1000) + (UNSPEC_ARC_SIMD_VADDW 1001) + (UNSPEC_ARC_SIMD_VAVB 1002) + (UNSPEC_ARC_SIMD_VAVRB 1003) + (UNSPEC_ARC_SIMD_VDIFAW 1004) + (UNSPEC_ARC_SIMD_VDIFW 1005) + (UNSPEC_ARC_SIMD_VMAXAW 1006) + (UNSPEC_ARC_SIMD_VMAXW 1007) + (UNSPEC_ARC_SIMD_VMINAW 1008) + (UNSPEC_ARC_SIMD_VMINW 1009) + (UNSPEC_ARC_SIMD_VMULAW 1010) + (UNSPEC_ARC_SIMD_VMULFAW 1011) + (UNSPEC_ARC_SIMD_VMULFW 1012) + (UNSPEC_ARC_SIMD_VMULW 1013) + (UNSPEC_ARC_SIMD_VSUBAW 1014) + (UNSPEC_ARC_SIMD_VSUBW 1015) + (UNSPEC_ARC_SIMD_VSUMMW 1016) + (UNSPEC_ARC_SIMD_VAND 1017) + (UNSPEC_ARC_SIMD_VANDAW 1018) + (UNSPEC_ARC_SIMD_VBIC 1019) + (UNSPEC_ARC_SIMD_VBICAW 1020) + (UNSPEC_ARC_SIMD_VOR 1021) + (UNSPEC_ARC_SIMD_VXOR 1022) + (UNSPEC_ARC_SIMD_VXORAW 1023) + (UNSPEC_ARC_SIMD_VEQW 1024) + (UNSPEC_ARC_SIMD_VLEW 1025) + (UNSPEC_ARC_SIMD_VLTW 1026) + (UNSPEC_ARC_SIMD_VNEW 1027) + (UNSPEC_ARC_SIMD_VMR1AW 1028) + (UNSPEC_ARC_SIMD_VMR1W 1029) + (UNSPEC_ARC_SIMD_VMR2AW 1030) + (UNSPEC_ARC_SIMD_VMR2W 1031) + (UNSPEC_ARC_SIMD_VMR3AW 1032) + (UNSPEC_ARC_SIMD_VMR3W 1033) + (UNSPEC_ARC_SIMD_VMR4AW 1034) + (UNSPEC_ARC_SIMD_VMR4W 1035) + (UNSPEC_ARC_SIMD_VMR5AW 1036) + (UNSPEC_ARC_SIMD_VMR5W 1037) + (UNSPEC_ARC_SIMD_VMR6AW 1038) + (UNSPEC_ARC_SIMD_VMR6W 1039) + (UNSPEC_ARC_SIMD_VMR7AW 1040) + (UNSPEC_ARC_SIMD_VMR7W 1041) + (UNSPEC_ARC_SIMD_VMRB 1042) + (UNSPEC_ARC_SIMD_VH264F 1043) + (UNSPEC_ARC_SIMD_VH264FT 1044) + (UNSPEC_ARC_SIMD_VH264FW 1045) + (UNSPEC_ARC_SIMD_VVC1F 1046) + (UNSPEC_ARC_SIMD_VVC1FT 1047) + ;; Va, Vb, rc/limm builtins + (UNSPEC_ARC_SIMD_VBADDW 1050) + (UNSPEC_ARC_SIMD_VBMAXW 1051) + (UNSPEC_ARC_SIMD_VBMINW 1052) + (UNSPEC_ARC_SIMD_VBMULAW 1053) + (UNSPEC_ARC_SIMD_VBMULFW 1054) + (UNSPEC_ARC_SIMD_VBMULW 1055) + (UNSPEC_ARC_SIMD_VBRSUBW 1056) + (UNSPEC_ARC_SIMD_VBSUBW 1057) + + ;; Va, Vb, Ic builtins + (UNSPEC_ARC_SIMD_VASRW 1060) + (UNSPEC_ARC_SIMD_VSR8 1061) + (UNSPEC_ARC_SIMD_VSR8AW 1062) + + ;; Va, Vb, Ic builtins + (UNSPEC_ARC_SIMD_VASRRWi 1065) + (UNSPEC_ARC_SIMD_VASRSRWi 1066) + (UNSPEC_ARC_SIMD_VASRWi 1067) + (UNSPEC_ARC_SIMD_VASRPWBi 1068) + (UNSPEC_ARC_SIMD_VASRRPWBi 1069) + (UNSPEC_ARC_SIMD_VSR8AWi 1070) + (UNSPEC_ARC_SIMD_VSR8i 1071) + + ;; Va, Vb, u8 (simm) builtins + (UNSPEC_ARC_SIMD_VMVAW 1075) + (UNSPEC_ARC_SIMD_VMVW 1076) + (UNSPEC_ARC_SIMD_VMVZW 1077) + (UNSPEC_ARC_SIMD_VD6TAPF 1078) + + ;; Va, rlimm, u8 (simm) builtins + (UNSPEC_ARC_SIMD_VMOVAW 1080) + (UNSPEC_ARC_SIMD_VMOVW 1081) + (UNSPEC_ARC_SIMD_VMOVZW 1082) + + ;; Va, Vb builtins + (UNSPEC_ARC_SIMD_VABSAW 1085) + (UNSPEC_ARC_SIMD_VABSW 1086) + (UNSPEC_ARC_SIMD_VADDSUW 1087) + (UNSPEC_ARC_SIMD_VSIGNW 1088) + (UNSPEC_ARC_SIMD_VEXCH1 1089) + (UNSPEC_ARC_SIMD_VEXCH2 1090) + (UNSPEC_ARC_SIMD_VEXCH4 1091) + (UNSPEC_ARC_SIMD_VUPBAW 1092) + (UNSPEC_ARC_SIMD_VUPBW 1093) + (UNSPEC_ARC_SIMD_VUPSBAW 1094) + (UNSPEC_ARC_SIMD_VUPSBW 1095) + + (UNSPEC_ARC_SIMD_VDIRUN 1100) + (UNSPEC_ARC_SIMD_VDORUN 1101) + (UNSPEC_ARC_SIMD_VDIWR 1102) + (UNSPEC_ARC_SIMD_VDOWR 1103) + + (UNSPEC_ARC_SIMD_VREC 1105) + (UNSPEC_ARC_SIMD_VRUN 1106) + (UNSPEC_ARC_SIMD_VRECRUN 1107) + (UNSPEC_ARC_SIMD_VENDREC 1108) + + (UNSPEC_ARC_SIMD_VCAST 1200) + (UNSPEC_ARC_SIMD_VINTI 1201) + ] +) + +;; Scheduler descriptions for the simd instructions +(define_insn_reservation "simd_lat_0_insn" 1 + (eq_attr "type" "simd_dma, simd_vstore, simd_vcontrol") + "issue+simd_unit") + +(define_insn_reservation "simd_lat_1_insn" 2 + (eq_attr "type" "simd_vcompare, simd_vlogic, + simd_vmove_else_zero, simd_varith_1cycle") + "issue+simd_unit, nothing") + +(define_insn_reservation "simd_lat_2_insn" 3 + (eq_attr "type" "simd_valign, simd_vpermute, + simd_vpack, simd_varith_2cycle") + "issue+simd_unit, nothing*2") + +(define_insn_reservation "simd_lat_3_insn" 4 + (eq_attr "type" "simd_valign_with_acc, simd_vpack_with_acc, + simd_vlogic_with_acc, simd_vload128, + simd_vmove_with_acc, simd_vspecial_3cycle, + simd_varith_with_acc") + "issue+simd_unit, nothing*3") + +(define_insn_reservation "simd_lat_4_insn" 5 + (eq_attr "type" "simd_vload, simd_vmove, simd_vspecial_4cycle") + "issue+simd_unit, nothing*4") + +(define_expand "movv8hi" + [(set (match_operand:V8HI 0 "general_operand" "") + (match_operand:V8HI 1 "general_operand" ""))] + "" + " +{ + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM) + operands[1] = force_reg (V8HImode, operands[1]); +}") + +;; This pattern should appear before the movv8hi_insn pattern +(define_insn "vld128_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))) + (match_operand:SI 3 "immediate_operand" "P"))))] + "TARGET_SIMD_SET" + "vld128 %0, [i%2, %3]" + [(set_attr "type" "simd_vload128") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "vst128_insn" + [(set (mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand" "v") + (parallel [(match_operand:SI 1 "immediate_operand" "L")]))) + (match_operand:SI 2 "immediate_operand" "P"))) + (match_operand:V8HI 3 "vector_register_operand" "=v"))] + "TARGET_SIMD_SET" + "vst128 %3, [i%1, %2]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "vst64_insn" + [(set (mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand" "v") + (parallel [(match_operand:SI 1 "immediate_operand" "L")]))) + (match_operand:SI 2 "immediate_operand" "P"))) + (vec_select:V4HI (match_operand:V8HI 3 "vector_register_operand" "=v") + (parallel [(const_int 0)])))] + "TARGET_SIMD_SET" + "vst64 %3, [i%1, %2]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "movv8hi_insn" + [(set (match_operand:V8HI 0 "vector_register_or_memory_operand" "=v,m,v") + (match_operand:V8HI 1 "vector_register_or_memory_operand" "m,v,v"))] + "TARGET_SIMD_SET && !(GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM)" + "@ + vld128r %0, %1 + vst128r %1, %0 + vmvzw %0,%1,0xffff" + [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero") + (set_attr "length" "8,8,4") + (set_attr "cond" "nocond, nocond, nocond")]) + +(define_insn "movti_insn" + [(set (match_operand:TI 0 "vector_register_or_memory_operand" "=v,m,v") + (match_operand:TI 1 "vector_register_or_memory_operand" "m,v,v"))] + "" + "@ + vld128r %0, %1 + vst128r %1, %0 + vmvzw %0,%1,0xffff" + [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero") + (set_attr "length" "8,8,4") + (set_attr "cond" "nocond, nocond, nocond")]) + +;; (define_insn "*movv8hi_insn_rr" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (match_operand:V8HI 1 "vector_register_operand" "v"))] +;; "" +;; "mov reg,reg" +;; [(set_attr "length" "8") +;; (set_attr "type" "move")]) + +;; (define_insn "*movv8_out" +;; [(set (match_operand:V8HI 0 "memory_operand" "=m") +;; (match_operand:V8HI 1 "vector_register_operand" "v"))] +;; "" +;; "mov out" +;; [(set_attr "length" "8") +;; (set_attr "type" "move")]) + + +;; (define_insn "addv8hi3" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (plus:V8HI (match_operand:V8HI 1 "vector_register_operand" "v") +;; (match_operand:V8HI 2 "vector_register_operand" "v")))] +;; "TARGET_SIMD_SET" +;; "vaddw %0, %1, %2" +;; [(set_attr "length" "8") +;; (set_attr "cond" "nocond")]) + +;; (define_insn "vaddw_insn" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (unspec [(match_operand:V8HI 1 "vector_register_operand" "v") +;; (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))] +;; "TARGET_SIMD_SET" +;; "vaddw %0, %1, %2" +;; [(set_attr "length" "8") +;; (set_attr "cond" "nocond")]) + +;; V V V Insns +(define_insn "vaddaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDAW))] + "TARGET_SIMD_SET" + "vaddaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vaddw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))] + "TARGET_SIMD_SET" + "vaddw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vavb_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVB))] + "TARGET_SIMD_SET" + "vavb %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vavrb_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVRB))] + "TARGET_SIMD_SET" + "vavrb %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdifaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFAW))] + "TARGET_SIMD_SET" + "vdifaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdifw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFW))] + "TARGET_SIMD_SET" + "vdifw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmaxaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXAW))] + "TARGET_SIMD_SET" + "vmaxaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmaxw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXW))] + "TARGET_SIMD_SET" + "vmaxw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vminaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINAW))] + "TARGET_SIMD_SET" + "vminaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vminw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINW))] + "TARGET_SIMD_SET" + "vminw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULAW))] + "TARGET_SIMD_SET" + "vmulaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulfaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFAW))] + "TARGET_SIMD_SET" + "vmulfaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulfw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFW))] + "TARGET_SIMD_SET" + "vmulfw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmulw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULW))] + "TARGET_SIMD_SET" + "vmulw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsubaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBAW))] + "TARGET_SIMD_SET" + "vsubaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsubw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBW))] + "TARGET_SIMD_SET" + "vsubw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsummw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUMMW))] + "TARGET_SIMD_SET" + "vsummw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vand_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAND))] + "TARGET_SIMD_SET" + "vand %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vandaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VANDAW))] + "TARGET_SIMD_SET" + "vandaw %0, %1, %2" + [(set_attr "type" "simd_vlogic_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbic_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBIC))] + "TARGET_SIMD_SET" + "vbic %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbicaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBICAW))] + "TARGET_SIMD_SET" + "vbicaw %0, %1, %2" + [(set_attr "type" "simd_vlogic_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vor_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VOR))] + "TARGET_SIMD_SET" + "vor %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vxor_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXOR))] + "TARGET_SIMD_SET" + "vxor %0, %1, %2" + [(set_attr "type" "simd_vlogic") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vxoraw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXORAW))] + "TARGET_SIMD_SET" + "vxoraw %0, %1, %2" + [(set_attr "type" "simd_vlogic_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "veqw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEQW))] + "TARGET_SIMD_SET" + "veqw %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vlew_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLEW))] + "TARGET_SIMD_SET" + "vlew %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vltw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLTW))] + "TARGET_SIMD_SET" + "vltw %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vnew_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VNEW))] + "TARGET_SIMD_SET" + "vnew %0, %1, %2" + [(set_attr "type" "simd_vcompare") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr1aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1AW))] + "TARGET_SIMD_SET" + "vmr1aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr1w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1W))] + "TARGET_SIMD_SET" + "vmr1w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr2aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2AW))] + "TARGET_SIMD_SET" + "vmr2aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr2w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2W))] + "TARGET_SIMD_SET" + "vmr2w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr3aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3AW))] + "TARGET_SIMD_SET" + "vmr3aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr3w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3W))] + "TARGET_SIMD_SET" + "vmr3w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr4aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4AW))] + "TARGET_SIMD_SET" + "vmr4aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr4w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4W))] + "TARGET_SIMD_SET" + "vmr4w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr5aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5AW))] + "TARGET_SIMD_SET" + "vmr5aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr5w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5W))] + "TARGET_SIMD_SET" + "vmr5w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr6aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6AW))] + "TARGET_SIMD_SET" + "vmr6aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr6w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6W))] + "TARGET_SIMD_SET" + "vmr6w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr7aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7AW))] + "TARGET_SIMD_SET" + "vmr7aw %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmr7w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7W))] + "TARGET_SIMD_SET" + "vmr7w %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmrb_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMRB))] + "TARGET_SIMD_SET" + "vmrb %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vh264f_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264F))] + "TARGET_SIMD_SET" + "vh264f %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vh264ft_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FT))] + "TARGET_SIMD_SET" + "vh264ft %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vh264fw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FW))] + "TARGET_SIMD_SET" + "vh264fw %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vvc1f_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1F))] + "TARGET_SIMD_SET" + "vvc1f %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vvc1ft_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1FT))] + "TARGET_SIMD_SET" + "vvc1ft %0, %1, %2" + [(set_attr "type" "simd_vspecial_3cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + + + +;;--- +;; V V r/limm Insns + +;; (define_insn "vbaddw_insn" +;; [(set (match_operand:V8HI 0 "vector_register_operand" "=v") +;; (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") +;; (match_operand:SI 2 "nonmemory_operand" "rCal")] UNSPEC_ARC_SIMD_VBADDW))] +;; "TARGET_SIMD_SET" +;; "vbaddw %0, %1, %2" +;; [(set_attr "length" "4") +;; (set_attr "cond" "nocond")]) + +(define_insn "vbaddw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBADDW))] + "TARGET_SIMD_SET" + "vbaddw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmaxw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMAXW))] + "TARGET_SIMD_SET" + "vbmaxw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbminw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMINW))] + "TARGET_SIMD_SET" + "vbminw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmulaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULAW))] + "TARGET_SIMD_SET" + "vbmulaw %0, %1, %2" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmulfw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULFW))] + "TARGET_SIMD_SET" + "vbmulfw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbmulw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULW))] + "TARGET_SIMD_SET" + "vbmulw %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbrsubw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBRSUBW))] + "TARGET_SIMD_SET" + "vbrsubw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vbsubw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBSUBW))] + "TARGET_SIMD_SET" + "vbsubw %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) +; Va, Vb, Ic instructions + +; Va, Vb, u6 instructions +(define_insn "vasrrwi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRWi))] + "TARGET_SIMD_SET" + "vasrrwi %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrsrwi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRSRWi))] + "TARGET_SIMD_SET" + "vasrsrwi %0, %1, %2" + [(set_attr "type" "simd_varith_2cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrwi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRWi))] + "TARGET_SIMD_SET" + "vasrwi %0, %1, %2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrpwbi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRPWBi))] + "TARGET_SIMD_SET" + "vasrpwbi %0, %1, %2" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrrpwbi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRPWBi))] + "TARGET_SIMD_SET" + "vasrrpwbi %0, %1, %2" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsr8awi_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8AWi))] + "TARGET_SIMD_SET" + "vsr8awi %0, %1, %2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsr8i_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8i))] + "TARGET_SIMD_SET" + "vsr8i %0, %1, %2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, Vb, u8 (simm) insns + +(define_insn "vmvaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVAW))] + "TARGET_SIMD_SET" + "vmvaw %0, %1, %2" + [(set_attr "type" "simd_vmove_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmvw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVW))] + "TARGET_SIMD_SET" + "vmvw %0, %1, %2" + [(set_attr "type" "simd_vmove") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmvzw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVZW))] + "TARGET_SIMD_SET" + "vmvzw %0, %1, %2" + [(set_attr "type" "simd_vmove_else_zero") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vd6tapf_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VD6TAPF))] + "TARGET_SIMD_SET" + "vd6tapf %0, %1, %2" + [(set_attr "type" "simd_vspecial_4cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, rlimm, u8 (simm) insns +(define_insn "vmovaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVAW))] + "TARGET_SIMD_SET" + "vmovaw %0, %1, %2" + [(set_attr "type" "simd_vmove_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmovw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVW))] + "TARGET_SIMD_SET" + "vmovw %0, %1, %2" + [(set_attr "type" "simd_vmove") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vmovzw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVZW))] + "TARGET_SIMD_SET" + "vmovzw %0, %1, %2" + [(set_attr "type" "simd_vmove_else_zero") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, rlimm, Ic insns +(define_insn "vsr8_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "K") + (match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSR8))] + "TARGET_SIMD_SET" + "vsr8 %0, %1, i%2" + [(set_attr "type" "simd_valign") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vasrw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "K") + (match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VASRW))] + "TARGET_SIMD_SET" + "vasrw %0, %1, i%2" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsr8aw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v") + (match_operand:SI 2 "immediate_operand" "K") + (match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSR8AW))] + "TARGET_SIMD_SET" + "vsr8aw %0, %1, i%2" + [(set_attr "type" "simd_valign_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; Va, Vb insns +(define_insn "vabsaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VABSAW))] + "TARGET_SIMD_SET" + "vabsaw %0, %1" + [(set_attr "type" "simd_varith_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vabsw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VABSW))] + "TARGET_SIMD_SET" + "vabsw %0, %1" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vaddsuw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDSUW))] + "TARGET_SIMD_SET" + "vaddsuw %0, %1" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vsignw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSIGNW))] + "TARGET_SIMD_SET" + "vsignw %0, %1" + [(set_attr "type" "simd_varith_1cycle") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vexch1_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEXCH1))] + "TARGET_SIMD_SET" + "vexch1 %0, %1" + [(set_attr "type" "simd_vpermute") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vexch2_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEXCH2))] + "TARGET_SIMD_SET" + "vexch2 %0, %1" + [(set_attr "type" "simd_vpermute") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vexch4_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEXCH4))] + "TARGET_SIMD_SET" + "vexch4 %0, %1" + [(set_attr "type" "simd_vpermute") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupbaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPBAW))] + "TARGET_SIMD_SET" + "vupbaw %0, %1" + [(set_attr "type" "simd_vpack_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupbw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPBW))] + "TARGET_SIMD_SET" + "vupbw %0, %1" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupsbaw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPSBAW))] + "TARGET_SIMD_SET" + "vupsbaw %0, %1" + [(set_attr "type" "simd_vpack_with_acc") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vupsbw_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VUPSBW))] + "TARGET_SIMD_SET" + "vupsbw %0, %1" + [(set_attr "type" "simd_vpack") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +; DMA setup instructions +(define_insn "vdirun_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VDIRUN))] + "TARGET_SIMD_SET" + "vdirun %1, %2" + [(set_attr "type" "simd_dma") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdorun_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r") + (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VDORUN))] + "TARGET_SIMD_SET" + "vdorun %1, %2" + [(set_attr "type" "simd_dma") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vdiwr_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d,d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r,Cal")] UNSPEC_ARC_SIMD_VDIWR))] + "TARGET_SIMD_SET" + "vdiwr %0, %1" + [(set_attr "type" "simd_dma") + (set_attr "length" "4,8") + (set_attr "cond" "nocond,nocond")]) + +(define_insn "vdowr_insn" + [(set (match_operand:SI 0 "arc_simd_dma_register_operand" "=d,d") + (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand" "r,Cal")] UNSPEC_ARC_SIMD_VDOWR))] + "TARGET_SIMD_SET" + "vdowr %0, %1" + [(set_attr "type" "simd_dma") + (set_attr "length" "4,8") + (set_attr "cond" "nocond,nocond")]) + +;; vector record and run instructions +(define_insn "vrec_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VREC)] + "TARGET_SIMD_SET" + "vrec %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vrun_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VRUN)] + "TARGET_SIMD_SET" + "vrun %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vrecrun_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VRECRUN)] + "TARGET_SIMD_SET" + "vrecrun %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vendrec_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VENDREC)] + "TARGET_SIMD_SET" + "vendrec %S0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld32wh_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) + (vec_select:V4HI (match_dup 0) + (parallel [(const_int 0)]))))] + "TARGET_SIMD_SET" + "vld32wh %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld32wl_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (vec_select:V4HI (match_dup 0) + (parallel [(const_int 1)])) + (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))] + "TARGET_SIMD_SET" + "vld32wl %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld64w_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (zero_extend:V8HI (mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))) + (match_operand:SI 3 "immediate_operand" "P")))))] + "TARGET_SIMD_SET" + "vld64w %0, [i%2, %3]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")] +) + +(define_insn "vld64_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (vec_select:V4HI (match_dup 0) + (parallel [(const_int 1)])) + (mem:V4HI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")]))))) ))] + "TARGET_SIMD_SET" + "vld64 %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vld32_insn" + [(set (match_operand:V8HI 0 "vector_register_operand" "=v") + (vec_concat:V8HI (vec_select:V4HI (match_dup 0) + (parallel [(const_int 1)])) + (vec_concat:V4HI (vec_select:V2HI (match_dup 0) + (parallel [(const_int 1)])) + (mem:V2HI (plus:SI (match_operand:SI 1 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand" "v") + (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))] + "TARGET_SIMD_SET" + "vld32 %0, [i%3,%1]" + [(set_attr "type" "simd_vload") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vst16_n_insn" + [(set (mem:HI (plus:SI (match_operand:SI 0 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))))) + (vec_select:HI (match_operand:V8HI 3 "vector_register_operand" "v") + (parallel [(match_operand:SI 4 "immediate_operand" "L")])))] + "TARGET_SIMD_SET" + "vst16_%4 %3,[i%2, %0]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +(define_insn "vst32_n_insn" + [(set (mem:SI (plus:SI (match_operand:SI 0 "immediate_operand" "P") + (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand" "v") + (parallel [(match_operand:SI 2 "immediate_operand" "L")]))))) + (vec_select:SI (unspec:V4SI [(match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VCAST) + (parallel [(match_operand:SI 4 "immediate_operand" "L")])))] + "TARGET_SIMD_SET" + "vst32_%4 %3,[i%2, %0]" + [(set_attr "type" "simd_vstore") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) + +;; SIMD unit interrupt +(define_insn "vinti_insn" + [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "L")] UNSPEC_ARC_SIMD_VINTI)] + "TARGET_SIMD_SET" + "vinti %0" + [(set_attr "type" "simd_vcontrol") + (set_attr "length" "4") + (set_attr "cond" "nocond")]) diff --git a/gcc-4.9/gcc/config/arc/t-arc-newlib b/gcc-4.9/gcc/config/arc/t-arc-newlib new file mode 100644 index 000000000..5c1cb26b6 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/t-arc-newlib @@ -0,0 +1,38 @@ +# GCC Makefile fragment for Synopsys DesignWare ARC with newlib. + +# Copyright (C) 2007-2014 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3, or (at your option) any later version. + +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along +# with GCC; see the file COPYING3. If not see +# . + +# Selecting -mA5 uses the same functional multilib files/libraries +# as get used for -mARC600 aka -mA6. +MULTILIB_OPTIONS=mcpu=ARC600/mcpu=ARC601 mmul64/mmul32x16 mnorm +MULTILIB_DIRNAMES=arc600 arc601 mul64 mul32x16 norm +# +# Aliases: +MULTILIB_MATCHES = mcpu?ARC600=mcpu?arc600 +MULTILIB_MATCHES += mcpu?ARC600=mARC600 +MULTILIB_MATCHES += mcpu?ARC600=mA6 +MULTILIB_MATCHES += mcpu?ARC600=mA5 +MULTILIB_MATCHES += mcpu?ARC600=mno-mpy +MULTILIB_MATCHES += mcpu?ARC601=mcpu?arc601 +MULTILIB_MATCHES += EL=mlittle-endian +MULTILIB_MATCHES += EB=mbig-endian +# +# These don't make sense for the ARC700 default target: +MULTILIB_EXCEPTIONS=mmul64* mmul32x16* mnorm* +# And neither of the -mmul* options make sense without -mnorm: +MULTILIB_EXCLUSIONS=mARC600/mmul64/!mnorm mcpu=ARC601/mmul64/!mnorm mARC600/mmul32x16/!mnorm diff --git a/gcc-4.9/gcc/config/arc/t-arc-uClibc b/gcc-4.9/gcc/config/arc/t-arc-uClibc new file mode 100644 index 000000000..704a3aa67 --- /dev/null +++ b/gcc-4.9/gcc/config/arc/t-arc-uClibc @@ -0,0 +1,20 @@ +# GCC Makefile fragment for Synopsys DesignWare ARC with uClibc + +# Copyright (C) 2007-2014 Free Software Foundation, Inc. + +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3, or (at your option) any later version. + +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. + +# You should have received a copy of the GNU General Public License along +# with GCC; see the file COPYING3. If not see +# . + +MULTILIB_EXTRA_OPTS = mno-sdata diff --git a/gcc-4.9/gcc/config/arm/README-interworking b/gcc-4.9/gcc/config/arm/README-interworking new file mode 100644 index 000000000..3e36f12a8 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/README-interworking @@ -0,0 +1,749 @@ + Arm / Thumb Interworking + ======================== + +The Cygnus GNU Pro Toolkit for the ARM7T processor supports function +calls between code compiled for the ARM instruction set and code +compiled for the Thumb instruction set and vice versa. This document +describes how that interworking support operates and explains the +command line switches that should be used in order to produce working +programs. + +Note: The Cygnus GNU Pro Toolkit does not support switching between +compiling for the ARM instruction set and the Thumb instruction set +on anything other than a per file basis. There are in fact two +completely separate compilers, one that produces ARM assembler +instructions and one that produces Thumb assembler instructions. The +two compilers share the same assembler, linker and so on. + + +1. Explicit interworking support for C and C++ files +==================================================== + +By default if a file is compiled without any special command line +switches then the code produced will not support interworking. +Provided that a program is made up entirely from object files and +libraries produced in this way and which contain either exclusively +ARM instructions or exclusively Thumb instructions then this will not +matter and a working executable will be created. If an attempt is +made to link together mixed ARM and Thumb object files and libraries, +then warning messages will be produced by the linker and a non-working +executable will be created. + +In order to produce code which does support interworking it should be +compiled with the + + -mthumb-interwork + +command line option. Provided that a program is made up entirely from +object files and libraries built with this command line switch a +working executable will be produced, even if both ARM and Thumb +instructions are used by the various components of the program. (No +warning messages will be produced by the linker either). + +Note that specifying -mthumb-interwork does result in slightly larger, +slower code being produced. This is why interworking support must be +specifically enabled by a switch. + + +2. Explicit interworking support for assembler files +==================================================== + +If assembler files are to be included into an interworking program +then the following rules must be obeyed: + + * Any externally visible functions must return by using the BX + instruction. + + * Normal function calls can just use the BL instruction. The + linker will automatically insert code to switch between ARM + and Thumb modes as necessary. + + * Calls via function pointers should use the BX instruction if + the call is made in ARM mode: + + .code 32 + mov lr, pc + bx rX + + This code sequence will not work in Thumb mode however, since + the mov instruction will not set the bottom bit of the lr + register. Instead a branch-and-link to the _call_via_rX + functions should be used instead: + + .code 16 + bl _call_via_rX + + where rX is replaced by the name of the register containing + the function address. + + * All externally visible functions which should be entered in + Thumb mode must have the .thumb_func pseudo op specified just + before their entry point. e.g.: + + .code 16 + .global function + .thumb_func + function: + ...start of function.... + + * All assembler files must be assembled with the switch + -mthumb-interwork specified on the command line. (If the file + is assembled by calling gcc it will automatically pass on the + -mthumb-interwork switch to the assembler, provided that it + was specified on the gcc command line in the first place.) + + +3. Support for old, non-interworking aware code. +================================================ + +If it is necessary to link together code produced by an older, +non-interworking aware compiler, or code produced by the new compiler +but without the -mthumb-interwork command line switch specified, then +there are two command line switches that can be used to support this. + +The switch + + -mcaller-super-interworking + +will allow calls via function pointers in Thumb mode to work, +regardless of whether the function pointer points to old, +non-interworking aware code or not. Specifying this switch does +produce slightly slower code however. + +Note: There is no switch to allow calls via function pointers in ARM +mode to be handled specially. Calls via function pointers from +interworking aware ARM code to non-interworking aware ARM code work +without any special considerations by the compiler. Calls via +function pointers from interworking aware ARM code to non-interworking +aware Thumb code however will not work. (Actually under some +circumstances they may work, but there are no guarantees). This is +because only the new compiler is able to produce Thumb code, and this +compiler already has a command line switch to produce interworking +aware code. + + +The switch + + -mcallee-super-interworking + +will allow non-interworking aware ARM or Thumb code to call Thumb +functions, either directly or via function pointers. Specifying this +switch does produce slightly larger, slower code however. + +Note: There is no switch to allow non-interworking aware ARM or Thumb +code to call ARM functions. There is no need for any special handling +of calls from non-interworking aware ARM code to interworking aware +ARM functions, they just work normally. Calls from non-interworking +aware Thumb functions to ARM code however, will not work. There is no +option to support this, since it is always possible to recompile the +Thumb code to be interworking aware. + +As an alternative to the command line switch +-mcallee-super-interworking, which affects all externally visible +functions in a file, it is possible to specify an attribute or +declspec for individual functions, indicating that that particular +function should support being called by non-interworking aware code. +The function should be defined like this: + + int __attribute__((interfacearm)) function + { + ... body of function ... + } + +or + + int __declspec(interfacearm) function + { + ... body of function ... + } + + + +4. Interworking support in dlltool +================================== + +It is possible to create DLLs containing mixed ARM and Thumb code. It +is also possible to call Thumb code in a DLL from an ARM program and +vice versa. It is even possible to call ARM DLLs that have been compiled +without interworking support (say by an older version of the compiler), +from Thumb programs and still have things work properly. + + A version of the `dlltool' program which supports the `--interwork' +command line switch is needed, as well as the following special +considerations when building programs and DLLs: + +*Use `-mthumb-interwork'* + When compiling files for a DLL or a program the `-mthumb-interwork' + command line switch should be specified if calling between ARM and + Thumb code can happen. If a program is being compiled and the + mode of the DLLs that it uses is not known, then it should be + assumed that interworking might occur and the switch used. + +*Use `-m thumb'* + If the exported functions from a DLL are all Thumb encoded then the + `-m thumb' command line switch should be given to dlltool when + building the stubs. This will make dlltool create Thumb encoded + stubs, rather than its default of ARM encoded stubs. + + If the DLL consists of both exported Thumb functions and exported + ARM functions then the `-m thumb' switch should not be used. + Instead the Thumb functions in the DLL should be compiled with the + `-mcallee-super-interworking' switch, or with the `interfacearm' + attribute specified on their prototypes. In this way they will be + given ARM encoded prologues, which will work with the ARM encoded + stubs produced by dlltool. + +*Use `-mcaller-super-interworking'* + If it is possible for Thumb functions in a DLL to call + non-interworking aware code via a function pointer, then the Thumb + code must be compiled with the `-mcaller-super-interworking' + command line switch. This will force the function pointer calls + to use the _interwork_call_via_rX stub functions which will + correctly restore Thumb mode upon return from the called function. + +*Link with `libgcc.a'* + When the dll is built it may have to be linked with the GCC + library (`libgcc.a') in order to extract the _call_via_rX functions + or the _interwork_call_via_rX functions. This represents a partial + redundancy since the same functions *may* be present in the + application itself, but since they only take up 372 bytes this + should not be too much of a consideration. + +*Use `--support-old-code'* + When linking a program with an old DLL which does not support + interworking, the `--support-old-code' command line switch to the + linker should be used. This causes the linker to generate special + interworking stubs which can cope with old, non-interworking aware + ARM code, at the cost of generating bulkier code. The linker will + still generate a warning message along the lines of: + "Warning: input file XXX does not support interworking, whereas YYY does." + but this can now be ignored because the --support-old-code switch + has been used. + + + +5. How interworking support works +================================= + +Switching between the ARM and Thumb instruction sets is accomplished +via the BX instruction which takes as an argument a register name. +Control is transferred to the address held in this register (with the +bottom bit masked out), and if the bottom bit is set, then Thumb +instruction processing is enabled, otherwise ARM instruction +processing is enabled. + +When the -mthumb-interwork command line switch is specified, gcc +arranges for all functions to return to their caller by using the BX +instruction. Thus provided that the return address has the bottom bit +correctly initialized to indicate the instruction set of the caller, +correct operation will ensue. + +When a function is called explicitly (rather than via a function +pointer), the compiler generates a BL instruction to do this. The +Thumb version of the BL instruction has the special property of +setting the bottom bit of the LR register after it has stored the +return address into it, so that a future BX instruction will correctly +return the instruction after the BL instruction, in Thumb mode. + +The BL instruction does not change modes itself however, so if an ARM +function is calling a Thumb function, or vice versa, it is necessary +to generate some extra instructions to handle this. This is done in +the linker when it is storing the address of the referenced function +into the BL instruction. If the BL instruction is an ARM style BL +instruction, but the referenced function is a Thumb function, then the +linker automatically generates a calling stub that converts from ARM +mode to Thumb mode, puts the address of this stub into the BL +instruction, and puts the address of the referenced function into the +stub. Similarly if the BL instruction is a Thumb BL instruction, and +the referenced function is an ARM function, the linker generates a +stub which converts from Thumb to ARM mode, puts the address of this +stub into the BL instruction, and the address of the referenced +function into the stub. + +This is why it is necessary to mark Thumb functions with the +.thumb_func pseudo op when creating assembler files. This pseudo op +allows the assembler to distinguish between ARM functions and Thumb +functions. (The Thumb version of GCC automatically generates these +pseudo ops for any Thumb functions that it generates). + +Calls via function pointers work differently. Whenever the address of +a function is taken, the linker examines the type of the function +being referenced. If the function is a Thumb function, then it sets +the bottom bit of the address. Technically this makes the address +incorrect, since it is now one byte into the start of the function, +but this is never a problem because: + + a. with interworking enabled all calls via function pointer + are done using the BX instruction and this ignores the + bottom bit when computing where to go to. + + b. the linker will always set the bottom bit when the address + of the function is taken, so it is never possible to take + the address of the function in two different places and + then compare them and find that they are not equal. + +As already mentioned any call via a function pointer will use the BX +instruction (provided that interworking is enabled). The only problem +with this is computing the return address for the return from the +called function. For ARM code this can easily be done by the code +sequence: + + mov lr, pc + bx rX + +(where rX is the name of the register containing the function +pointer). This code does not work for the Thumb instruction set, +since the MOV instruction will not set the bottom bit of the LR +register, so that when the called function returns, it will return in +ARM mode not Thumb mode. Instead the compiler generates this +sequence: + + bl _call_via_rX + +(again where rX is the name if the register containing the function +pointer). The special call_via_rX functions look like this: + + .thumb_func +_call_via_r0: + bx r0 + nop + +The BL instruction ensures that the correct return address is stored +in the LR register and then the BX instruction jumps to the address +stored in the function pointer, switch modes if necessary. + + +6. How caller-super-interworking support works +============================================== + +When the -mcaller-super-interworking command line switch is specified +it changes the code produced by the Thumb compiler so that all calls +via function pointers (including virtual function calls) now go via a +different stub function. The code to call via a function pointer now +looks like this: + + bl _interwork_call_via_r0 + +Note: The compiler does not insist that r0 be used to hold the +function address. Any register will do, and there are a suite of stub +functions, one for each possible register. The stub functions look +like this: + + .code 16 + .thumb_func +_interwork_call_via_r0 + bx pc + nop + + .code 32 + tst r0, #1 + stmeqdb r13!, {lr} + adreq lr, _arm_return + bx r0 + +The stub first switches to ARM mode, since it is a lot easier to +perform the necessary operations using ARM instructions. It then +tests the bottom bit of the register containing the address of the +function to be called. If this bottom bit is set then the function +being called uses Thumb instructions and the BX instruction to come +will switch back into Thumb mode before calling this function. (Note +that it does not matter how this called function chooses to return to +its caller, since the both the caller and callee are Thumb functions, +and mode switching is necessary). If the function being called is an +ARM mode function however, the stub pushes the return address (with +its bottom bit set) onto the stack, replaces the return address with +the address of the a piece of code called '_arm_return' and then +performs a BX instruction to call the function. + +The '_arm_return' code looks like this: + + .code 32 +_arm_return: + ldmia r13!, {r12} + bx r12 + .code 16 + + +It simply retrieves the return address from the stack, and then +performs a BX operation to return to the caller and switch back into +Thumb mode. + + +7. How callee-super-interworking support works +============================================== + +When -mcallee-super-interworking is specified on the command line the +Thumb compiler behaves as if every externally visible function that it +compiles has had the (interfacearm) attribute specified for it. What +this attribute does is to put a special, ARM mode header onto the +function which forces a switch into Thumb mode: + + without __attribute__((interfacearm)): + + .code 16 + .thumb_func + function: + ... start of function ... + + with __attribute__((interfacearm)): + + .code 32 + function: + orr r12, pc, #1 + bx r12 + + .code 16 + .thumb_func + .real_start_of_function: + + ... start of function ... + +Note that since the function now expects to be entered in ARM mode, it +no longer has the .thumb_func pseudo op specified for its name. +Instead the pseudo op is attached to a new label .real_start_of_ +(where is the name of the function) which indicates the start +of the Thumb code. This does have the interesting side effect in that +if this function is now called from a Thumb mode piece of code +outside of the current file, the linker will generate a calling stub +to switch from Thumb mode into ARM mode, and then this is immediately +overridden by the function's header which switches back into Thumb +mode. + +In addition the (interfacearm) attribute also forces the function to +return by using the BX instruction, even if has not been compiled with +the -mthumb-interwork command line flag, so that the correct mode will +be restored upon exit from the function. + + +8. Some examples +================ + + Given these two test files: + + int arm (void) { return 1 + thumb (); } + + int thumb (void) { return 2 + arm (); } + + The following pieces of assembler are produced by the ARM and Thumb +version of GCC depending upon the command line options used: + + `-O2': + .code 32 .code 16 + .global _arm .global _thumb + .thumb_func + _arm: _thumb: + mov ip, sp + stmfd sp!, {fp, ip, lr, pc} push {lr} + sub fp, ip, #4 + bl _thumb bl _arm + add r0, r0, #1 add r0, r0, #2 + ldmea fp, {fp, sp, pc} pop {pc} + + Note how the functions return without using the BX instruction. If +these files were assembled and linked together they would fail to work +because they do not change mode when returning to their caller. + + `-O2 -mthumb-interwork': + + .code 32 .code 16 + .global _arm .global _thumb + .thumb_func + _arm: _thumb: + mov ip, sp + stmfd sp!, {fp, ip, lr, pc} push {lr} + sub fp, ip, #4 + bl _thumb bl _arm + add r0, r0, #1 add r0, r0, #2 + ldmea fp, {fp, sp, lr} pop {r1} + bx lr bx r1 + + Now the functions use BX to return their caller. They have grown by +4 and 2 bytes respectively, but they can now successfully be linked +together and be expect to work. The linker will replace the +destinations of the two BL instructions with the addresses of calling +stubs which convert to the correct mode before jumping to the called +function. + + `-O2 -mcallee-super-interworking': + + .code 32 .code 32 + .global _arm .global _thumb + _arm: _thumb: + orr r12, pc, #1 + bx r12 + mov ip, sp .code 16 + stmfd sp!, {fp, ip, lr, pc} push {lr} + sub fp, ip, #4 + bl _thumb bl _arm + add r0, r0, #1 add r0, r0, #2 + ldmea fp, {fp, sp, lr} pop {r1} + bx lr bx r1 + + The thumb function now has an ARM encoded prologue, and it no longer +has the `.thumb-func' pseudo op attached to it. The linker will not +generate a calling stub for the call from arm() to thumb(), but it will +still have to generate a stub for the call from thumb() to arm(). Also +note how specifying `--mcallee-super-interworking' automatically +implies `-mthumb-interworking'. + + +9. Some Function Pointer Examples +================================= + + Given this test file: + + int func (void) { return 1; } + + int call (int (* ptr)(void)) { return ptr (); } + + The following varying pieces of assembler are produced by the Thumb +version of GCC depending upon the command line options used: + + `-O2': + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .thumb_func + _call: + push {lr} + bl __call_via_r0 + pop {pc} + + Note how the two functions have different exit sequences. In +particular call() uses pop {pc} to return, which would not work if the +caller was in ARM mode. func() however, uses the BX instruction, even +though `-mthumb-interwork' has not been specified, as this is the most +efficient way to exit a function when the return address is held in the +link register. + + `-O2 -mthumb-interwork': + + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .thumb_func + _call: + push {lr} + bl __call_via_r0 + pop {r1} + bx r1 + + This time both functions return by using the BX instruction. This +means that call() is now two bytes longer and several cycles slower +than the previous version. + + `-O2 -mcaller-super-interworking': + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .thumb_func + _call: + push {lr} + bl __interwork_call_via_r0 + pop {pc} + + Very similar to the first (non-interworking) version, except that a +different stub is used to call via the function pointer. This new stub +will work even if the called function is not interworking aware, and +tries to return to call() in ARM mode. Note that the assembly code for +call() is still not interworking aware itself, and so should not be +called from ARM code. + + `-O2 -mcallee-super-interworking': + + .code 32 + .globl _func + _func: + orr r12, pc, #1 + bx r12 + + .code 16 + .globl .real_start_of_func + .thumb_func + .real_start_of_func: + mov r0, #1 + bx lr + + .code 32 + .globl _call + _call: + orr r12, pc, #1 + bx r12 + + .code 16 + .globl .real_start_of_call + .thumb_func + .real_start_of_call: + push {lr} + bl __call_via_r0 + pop {r1} + bx r1 + + Now both functions have an ARM coded prologue, and both functions +return by using the BX instruction. These functions are interworking +aware therefore and can safely be called from ARM code. The code for +the call() function is now 10 bytes longer than the original, non +interworking aware version, an increase of over 200%. + + If a prototype for call() is added to the source code, and this +prototype includes the `interfacearm' attribute: + + int __attribute__((interfacearm)) call (int (* ptr)(void)); + + then this code is produced (with only -O2 specified on the command +line): + + .code 16 + .globl _func + .thumb_func + _func: + mov r0, #1 + bx lr + + .globl _call + .code 32 + _call: + orr r12, pc, #1 + bx r12 + + .code 16 + .globl .real_start_of_call + .thumb_func + .real_start_of_call: + push {lr} + bl __call_via_r0 + pop {r1} + bx r1 + + So now both call() and func() can be safely called via +non-interworking aware ARM code. If, when such a file is assembled, +the assembler detects the fact that call() is being called by another +function in the same file, it will automatically adjust the target of +the BL instruction to point to .real_start_of_call. In this way there +is no need for the linker to generate a Thumb-to-ARM calling stub so +that call can be entered in ARM mode. + + +10. How to use dlltool to build ARM/Thumb DLLs +============================================== + Given a program (`prog.c') like this: + + extern int func_in_dll (void); + + int main (void) { return func_in_dll(); } + + And a DLL source file (`dll.c') like this: + + int func_in_dll (void) { return 1; } + + Here is how to build the DLL and the program for a purely ARM based +environment: + +*Step One + Build a `.def' file describing the DLL: + + ; example.def + ; This file describes the contents of the DLL + LIBRARY example + HEAPSIZE 0x40000, 0x2000 + EXPORTS + func_in_dll 1 + +*Step Two + Compile the DLL source code: + + arm-pe-gcc -O2 -c dll.c + +*Step Three + Use `dlltool' to create an exports file and a library file: + + dlltool --def example.def --output-exp example.o --output-lib example.a + +*Step Four + Link together the complete DLL: + + arm-pe-ld dll.o example.o -o example.dll + +*Step Five + Compile the program's source code: + + arm-pe-gcc -O2 -c prog.c + +*Step Six + Link together the program and the DLL's library file: + + arm-pe-gcc prog.o example.a -o prog + + If instead this was a Thumb DLL being called from an ARM program, the +steps would look like this. (To save space only those steps that are +different from the previous version are shown): + +*Step Two + Compile the DLL source code (using the Thumb compiler): + + thumb-pe-gcc -O2 -c dll.c -mthumb-interwork + +*Step Three + Build the exports and library files (and support interworking): + + dlltool -d example.def -z example.o -l example.a --interwork -m thumb + +*Step Five + Compile the program's source code (and support interworking): + + arm-pe-gcc -O2 -c prog.c -mthumb-interwork + + If instead, the DLL was an old, ARM DLL which does not support +interworking, and which cannot be rebuilt, then these steps would be +used. + +*Step One + Skip. If you do not have access to the sources of a DLL, there is + no point in building a `.def' file for it. + +*Step Two + Skip. With no DLL sources there is nothing to compile. + +*Step Three + Skip. Without a `.def' file you cannot use dlltool to build an + exports file or a library file. + +*Step Four + Skip. Without a set of DLL object files you cannot build the DLL. + Besides it has already been built for you by somebody else. + +*Step Five + Compile the program's source code, this is the same as before: + + arm-pe-gcc -O2 -c prog.c + +*Step Six + Link together the program and the DLL's library file, passing the + `--support-old-code' option to the linker: + + arm-pe-gcc prog.o example.a -Wl,--support-old-code -o prog + + Ignore the warning message about the input file not supporting + interworking as the --support-old-code switch has taken care if this. + + +Copyright (C) 1998-2014 Free Software Foundation, Inc. + +Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. diff --git a/gcc-4.9/gcc/config/arm/aarch-common-protos.h b/gcc-4.9/gcc/config/arm/aarch-common-protos.h new file mode 100644 index 000000000..a5ff6b4f9 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/aarch-common-protos.h @@ -0,0 +1,134 @@ +/* Functions and structures shared between arm and aarch64. + + Copyright (C) 1991-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#ifndef GCC_AARCH_COMMON_PROTOS_H +#define GCC_AARCH_COMMON_PROTOS_H + +extern int arm_early_load_addr_dep (rtx, rtx); +extern int arm_early_store_addr_dep (rtx, rtx); +extern int arm_mac_accumulator_is_mul_result (rtx, rtx); +extern int arm_mac_accumulator_is_result (rtx, rtx); +extern int arm_no_early_alu_shift_dep (rtx, rtx); +extern int arm_no_early_alu_shift_value_dep (rtx, rtx); +extern int arm_no_early_mul_dep (rtx, rtx); +extern int arm_no_early_store_addr_dep (rtx, rtx); +extern bool arm_rtx_shift_left_p (rtx); + +/* RTX cost table definitions. These are used when tuning for speed rather + than for size and should reflect the _additional_ cost over the cost + of the fastest instruction in the machine, which is COSTS_N_INSNS (1). + Therefore it's okay for some costs to be 0. + Costs may not have a negative value. */ +struct alu_cost_table +{ + const int arith; /* ADD/SUB. */ + const int logical; /* AND/ORR/EOR/BIC, etc. */ + const int shift; /* Simple shift. */ + const int shift_reg; /* Simple shift by reg. */ + const int arith_shift; /* Additional when arith also shifts... */ + const int arith_shift_reg; /* ... and when the shift is by a reg. */ + const int log_shift; /* Additional when logic also shifts... */ + const int log_shift_reg; /* ... and when the shift is by a reg. */ + const int extend; /* Zero/sign extension. */ + const int extend_arith; /* Extend and arith. */ + const int bfi; /* Bit-field insert. */ + const int bfx; /* Bit-field extraction. */ + const int clz; /* Count Leading Zeros. */ + const int non_exec; /* Extra cost when not executing insn. */ + const bool non_exec_costs_exec; /* True if non-execution must add the exec + cost. */ +}; + +struct mult_cost_table +{ + const int simple; + const int flag_setting; /* Additional cost if multiply sets flags. */ + const int extend; + const int add; + const int extend_add; + const int idiv; +}; + +/* Calculations of LDM costs are complex. We assume an initial cost + (ldm_1st) which will load the number of registers mentioned in + ldm_regs_per_insn_1st registers; then each additional + ldm_regs_per_insn_subsequent registers cost one more insn. + Similarly for STM operations. + Therefore the ldm_regs_per_insn_1st/stm_regs_per_insn_1st and + ldm_regs_per_insn_subsequent/stm_regs_per_insn_subsequent fields indicate + the number of registers loaded/stored and are expressed by a simple integer + and not by a COSTS_N_INSNS (N) expression. + */ +struct mem_cost_table +{ + const int load; + const int load_sign_extend; /* Additional to load cost. */ + const int ldrd; /* Cost of LDRD. */ + const int ldm_1st; + const int ldm_regs_per_insn_1st; + const int ldm_regs_per_insn_subsequent; + const int loadf; /* SFmode. */ + const int loadd; /* DFmode. */ + const int load_unaligned; /* Extra for unaligned loads. */ + const int store; + const int strd; + const int stm_1st; + const int stm_regs_per_insn_1st; + const int stm_regs_per_insn_subsequent; + const int storef; /* SFmode. */ + const int stored; /* DFmode. */ + const int store_unaligned; /* Extra for unaligned stores. */ +}; + +struct fp_cost_table +{ + const int div; + const int mult; + const int mult_addsub; /* Non-fused. */ + const int fma; /* Fused. */ + const int addsub; + const int fpconst; /* Immediate. */ + const int neg; /* NEG and ABS. */ + const int compare; + const int widen; /* Widen to this size. */ + const int narrow; /* Narrow from this size. */ + const int toint; + const int fromint; + const int roundint; /* V8 round to integral, remains FP format. */ +}; + +struct vector_cost_table +{ + const int alu; +}; + +struct cpu_cost_table +{ + const struct alu_cost_table alu; + const struct mult_cost_table mult[2]; /* SImode and DImode. */ + const struct mem_cost_table ldst; + const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ + const struct vector_cost_table vect; +}; + + +#endif /* GCC_AARCH_COMMON_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/arm/aarch-common.c b/gcc-4.9/gcc/config/arm/aarch-common.c new file mode 100644 index 000000000..c11f7e954 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/aarch-common.c @@ -0,0 +1,353 @@ +/* Dependency checks for instruction scheduling, shared between ARM and + AARCH64. + + Copyright (C) 1991-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "rtl.h" +#include "tree.h" +#include "c-family/c-common.h" +#include "rtl.h" + +typedef struct +{ + rtx_code search_code; + rtx search_result; + bool find_any_shift; +} search_term; + +/* Return TRUE if X is either an arithmetic shift left, or + is a multiplication by a power of two. */ +bool +arm_rtx_shift_left_p (rtx x) +{ + enum rtx_code code = GET_CODE (x); + + if (code == MULT && CONST_INT_P (XEXP (x, 1)) + && exact_log2 (INTVAL (XEXP (x, 1))) > 0) + return true; + + if (code == ASHIFT) + return true; + + return false; +} + +static rtx_code shift_rtx_codes[] = + { ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT, + ROTATERT, ZERO_EXTEND, SIGN_EXTEND }; + +/* Callback function for arm_find_sub_rtx_with_code. + DATA is safe to treat as a SEARCH_TERM, ST. This will + hold a SEARCH_CODE. PATTERN is checked to see if it is an + RTX with that code. If it is, write SEARCH_RESULT in ST + and return 1. Otherwise, or if we have been passed a NULL_RTX + return 0. If ST.FIND_ANY_SHIFT then we are interested in + anything which can reasonably be described as a SHIFT RTX. */ +static int +arm_find_sub_rtx_with_search_term (rtx *pattern, void *data) +{ + search_term *st = (search_term *) data; + rtx_code pattern_code; + int found = 0; + + gcc_assert (pattern); + gcc_assert (st); + + /* Poorly formed patterns can really ruin our day. */ + if (*pattern == NULL_RTX) + return 0; + + pattern_code = GET_CODE (*pattern); + + if (st->find_any_shift) + { + unsigned i = 0; + + /* Left shifts might have been canonicalized to a MULT of some + power of two. Make sure we catch them. */ + if (arm_rtx_shift_left_p (*pattern)) + found = 1; + else + for (i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++) + if (pattern_code == shift_rtx_codes[i]) + found = 1; + } + + if (pattern_code == st->search_code) + found = 1; + + if (found) + st->search_result = *pattern; + + return found; +} + +/* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE. */ +static rtx +arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift) +{ + search_term st; + int result = 0; + + gcc_assert (pattern != NULL_RTX); + st.search_code = code; + st.search_result = NULL_RTX; + st.find_any_shift = find_any_shift; + result = for_each_rtx (&pattern, arm_find_sub_rtx_with_search_term, &st); + if (result) + return st.search_result; + else + return NULL_RTX; +} + +/* Traverse PATTERN looking for any sub-rtx which looks like a shift. */ +static rtx +arm_find_shift_sub_rtx (rtx pattern) +{ + return arm_find_sub_rtx_with_code (pattern, ASHIFT, true); +} + +/* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER + (possibly) contains a SET which will provide a result we can access + using the SET_DEST macro. We will place the RTX which would be + written by PRODUCER in SET_SOURCE. + Similarly, CONSUMER (possibly) contains a SET which has an operand + we can access using SET_SRC. We place this operand in + SET_DESTINATION. + + Return nonzero if we found the SET RTX we expected. */ +static int +arm_get_set_operands (rtx producer, rtx consumer, + rtx *set_source, rtx *set_destination) +{ + rtx set_producer = arm_find_sub_rtx_with_code (producer, SET, false); + rtx set_consumer = arm_find_sub_rtx_with_code (consumer, SET, false); + + if (set_producer && set_consumer) + { + *set_source = SET_DEST (set_producer); + *set_destination = SET_SRC (set_consumer); + return 1; + } + return 0; +} + +/* Return nonzero if the CONSUMER instruction (a load) does need + PRODUCER's value to calculate the address. */ +int +arm_early_load_addr_dep (rtx producer, rtx consumer) +{ + rtx value, addr; + + if (!arm_get_set_operands (producer, consumer, &value, &addr)) + return 0; + + return reg_overlap_mentioned_p (value, addr); +} + +/* Return nonzero if the CONSUMER instruction (an ALU op) does not + have an early register shift value or amount dependency on the + result of PRODUCER. */ +int +arm_no_early_alu_shift_dep (rtx producer, rtx consumer) +{ + rtx value, op; + rtx early_op; + + if (!arm_get_set_operands (producer, consumer, &value, &op)) + return 0; + + if ((early_op = arm_find_shift_sub_rtx (op))) + { + if (REG_P (early_op)) + early_op = op; + + return !reg_overlap_mentioned_p (value, early_op); + } + + return 0; +} + +/* Return nonzero if the CONSUMER instruction (an ALU op) does not + have an early register shift value dependency on the result of + PRODUCER. */ +int +arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer) +{ + rtx value, op; + rtx early_op; + + if (!arm_get_set_operands (producer, consumer, &value, &op)) + return 0; + + if ((early_op = arm_find_shift_sub_rtx (op))) + /* We want to check the value being shifted. */ + if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0))) + return 1; + + return 0; +} + +/* Return nonzero if the CONSUMER (a mul or mac op) does not + have an early register mult dependency on the result of + PRODUCER. */ +int +arm_no_early_mul_dep (rtx producer, rtx consumer) +{ + rtx value, op; + + if (!arm_get_set_operands (producer, consumer, &value, &op)) + return 0; + + if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS) + { + if (GET_CODE (XEXP (op, 0)) == MULT) + return !reg_overlap_mentioned_p (value, XEXP (op, 0)); + else + return !reg_overlap_mentioned_p (value, XEXP (op, 1)); + } + + return 0; +} + +/* Return nonzero if the CONSUMER instruction (a store) does not need + PRODUCER's value to calculate the address. */ + +int +arm_no_early_store_addr_dep (rtx producer, rtx consumer) +{ + rtx value = arm_find_sub_rtx_with_code (producer, SET, false); + rtx addr = arm_find_sub_rtx_with_code (consumer, SET, false); + + if (value) + value = SET_DEST (value); + + if (addr) + addr = SET_DEST (addr); + + if (!value || !addr) + return 0; + + return !reg_overlap_mentioned_p (value, addr); +} + +/* Return nonzero if the CONSUMER instruction (a store) does need + PRODUCER's value to calculate the address. */ + +int +arm_early_store_addr_dep (rtx producer, rtx consumer) +{ + return !arm_no_early_store_addr_dep (producer, consumer); +} + +/* Return non-zero iff the consumer (a multiply-accumulate or a + multiple-subtract instruction) has an accumulator dependency on the + result of the producer and no other dependency on that result. It + does not check if the producer is multiply-accumulate instruction. */ +int +arm_mac_accumulator_is_result (rtx producer, rtx consumer) +{ + rtx result; + rtx op0, op1, acc; + + producer = PATTERN (producer); + consumer = PATTERN (consumer); + + if (GET_CODE (producer) == COND_EXEC) + producer = COND_EXEC_CODE (producer); + if (GET_CODE (consumer) == COND_EXEC) + consumer = COND_EXEC_CODE (consumer); + + if (GET_CODE (producer) != SET) + return 0; + + result = XEXP (producer, 0); + + if (GET_CODE (consumer) != SET) + return 0; + + /* Check that the consumer is of the form + (set (...) (plus (mult ...) (...))) + or + (set (...) (minus (...) (mult ...))). */ + if (GET_CODE (XEXP (consumer, 1)) == PLUS) + { + if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT) + return 0; + + op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0); + op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1); + acc = XEXP (XEXP (consumer, 1), 1); + } + else if (GET_CODE (XEXP (consumer, 1)) == MINUS) + { + if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT) + return 0; + + op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0); + op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1); + acc = XEXP (XEXP (consumer, 1), 0); + } + else + return 0; + + return (reg_overlap_mentioned_p (result, acc) + && !reg_overlap_mentioned_p (result, op0) + && !reg_overlap_mentioned_p (result, op1)); +} + +/* Return non-zero if the consumer (a multiply-accumulate instruction) + has an accumulator dependency on the result of the producer (a + multiplication instruction) and no other dependency on that result. */ +int +arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) +{ + rtx mul = PATTERN (producer); + rtx mac = PATTERN (consumer); + rtx mul_result; + rtx mac_op0, mac_op1, mac_acc; + + if (GET_CODE (mul) == COND_EXEC) + mul = COND_EXEC_CODE (mul); + if (GET_CODE (mac) == COND_EXEC) + mac = COND_EXEC_CODE (mac); + + /* Check that mul is of the form (set (...) (mult ...)) + and mla is of the form (set (...) (plus (mult ...) (...))). */ + if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT) + || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS + || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT)) + return 0; + + mul_result = XEXP (mul, 0); + mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0); + mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1); + mac_acc = XEXP (XEXP (mac, 1), 1); + + return (reg_overlap_mentioned_p (mul_result, mac_acc) + && !reg_overlap_mentioned_p (mul_result, mac_op0) + && !reg_overlap_mentioned_p (mul_result, mac_op1)); +} diff --git a/gcc-4.9/gcc/config/arm/aarch-cost-tables.h b/gcc-4.9/gcc/config/arm/aarch-cost-tables.h new file mode 100644 index 000000000..c30ea2f92 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/aarch-cost-tables.h @@ -0,0 +1,325 @@ +/* RTX cost tables shared between arm and aarch64. + + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_AARCH_COST_TABLES_H +#define GCC_AARCH_COST_TABLES_H + +const struct cpu_cost_table generic_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + 0, /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + 0, /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + COSTS_N_INSNS (1), /* non_exec. */ + false /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (2), /* simple. */ + COSTS_N_INSNS (1), /* flag_setting. */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (3), /* add. */ + COSTS_N_INSNS (3), /* extend_add. */ + COSTS_N_INSNS (8) /* idiv. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (2), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (3), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (2), /* load. */ + COSTS_N_INSNS (2), /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (2), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 1, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* loadf. */ + COSTS_N_INSNS (3), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (2), /* store. */ + COSTS_N_INSNS (3), /* strd. */ + COSTS_N_INSNS (2), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 1, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (3), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (7), /* div. */ + COSTS_N_INSNS (2), /* mult. */ + COSTS_N_INSNS (3), /* mult_addsub. */ + COSTS_N_INSNS (3), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0, /* compare. */ + 0, /* widen. */ + 0, /* narrow. */ + 0, /* toint. */ + 0, /* fromint. */ + 0 /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (15), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0, /* compare. */ + 0, /* widen. */ + 0, /* narrow. */ + 0, /* toint. */ + 0, /* fromint. */ + 0 /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct cpu_cost_table cortexa53_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + COSTS_N_INSNS (1), /* shift. */ + COSTS_N_INSNS (2), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (2), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (2), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + 0, /* clz. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (1), /* simple. */ + COSTS_N_INSNS (2), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (7) /* idiv. */ + }, + /* MULT DImode */ + { + COSTS_N_INSNS (2), /* simple. */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (2), /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + COSTS_N_INSNS (15) /* idiv. */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (1), /* load. */ + COSTS_N_INSNS (1), /* load_sign_extend. */ + COSTS_N_INSNS (1), /* ldrd. */ + COSTS_N_INSNS (1), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* loadf. */ + COSTS_N_INSNS (1), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + 0, /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (15), /* div. */ + COSTS_N_INSNS (3), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (1), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (1), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (30), /* div. */ + COSTS_N_INSNS (3), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (1), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (1), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct cpu_cost_table cortexa57_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (2), /* simple. */ + COSTS_N_INSNS (3), /* flag_setting. */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (2), /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + COSTS_N_INSNS (18) /* idiv. */ + }, + /* MULT DImode */ + { + COSTS_N_INSNS (4), /* simple. */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (4), /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + COSTS_N_INSNS (34) /* idiv. */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (3), /* load. */ + COSTS_N_INSNS (3), /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (2), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (4), /* loadf. */ + COSTS_N_INSNS (4), /* loadd. */ + COSTS_N_INSNS (5), /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + 0, /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (17), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (9), /* mult_addsub. */ + COSTS_N_INSNS (9), /* fma. */ + COSTS_N_INSNS (4), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (31), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (9), /* mult_addsub. */ + COSTS_N_INSNS (9), /* fma. */ + COSTS_N_INSNS (4), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +#endif /* GCC_AARCH_COST_TABLES_H */ diff --git a/gcc-4.9/gcc/config/arm/aout.h b/gcc-4.9/gcc/config/arm/aout.h new file mode 100644 index 000000000..51d32a9d4 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/aout.h @@ -0,0 +1,303 @@ +/* Definitions of target machine for GNU compiler, for ARM with a.out + Copyright (C) 1995-2014 Free Software Foundation, Inc. + Contributed by Richard Earnshaw (rearnsha@armltd.co.uk). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef ASM_APP_ON +#define ASM_APP_ON "" +#endif +#ifndef ASM_APP_OFF +#define ASM_APP_OFF "" +#endif + +/* Switch to the text or data segment. */ +#define TEXT_SECTION_ASM_OP "\t.text" +#define DATA_SECTION_ASM_OP "\t.data" +#define BSS_SECTION_ASM_OP "\t.bss" + +/* Note: If USER_LABEL_PREFIX or LOCAL_LABEL_PREFIX are changed, + make sure that this change is reflected in the function + coff_arm_is_local_label_name() in bfd/coff-arm.c. */ +#ifndef REGISTER_PREFIX +#define REGISTER_PREFIX "" +#endif + +#ifndef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" +#endif + +#ifndef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "" +#endif + +/* The assembler's names for the registers. Note that the ?xx registers are + there so that VFPv3/NEON registers D16-D31 have the same spacing as D0-D15 + (each of which is overlaid on two S registers), although there are no + actual single-precision registers which correspond to D16-D31. */ +#ifndef REGISTER_NAMES +#define REGISTER_NAMES \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc", \ + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", \ + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", \ + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", \ + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", \ + "d16", "?16", "d17", "?17", "d18", "?18", "d19", "?19", \ + "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23", \ + "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27", \ + "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31", \ + "wr0", "wr1", "wr2", "wr3", \ + "wr4", "wr5", "wr6", "wr7", \ + "wr8", "wr9", "wr10", "wr11", \ + "wr12", "wr13", "wr14", "wr15", \ + "wcgr0", "wcgr1", "wcgr2", "wcgr3", \ + "cc", "vfpcc", "sfp", "afp" \ +} +#endif + +#ifndef ADDITIONAL_REGISTER_NAMES +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + {"a1", 0}, \ + {"a2", 1}, \ + {"a3", 2}, \ + {"a4", 3}, \ + {"v1", 4}, \ + {"v2", 5}, \ + {"v3", 6}, \ + {"v4", 7}, \ + {"v5", 8}, \ + {"v6", 9}, \ + {"rfp", 9}, /* Historical. */ \ + {"sb", 9}, /* Historical. */ \ + {"v7", 10}, \ + {"sl", 10}, /* Historical. */ \ + {"r11", 11}, /* fp */ \ + {"r12", 12}, /* ip */ \ + {"r13", 13}, /* sp */ \ + {"r14", 14}, /* lr */ \ + {"r15", 15} /* pc */ \ +} +#endif + +#ifndef OVERLAPPING_REGISTER_NAMES +#define OVERLAPPING_REGISTER_NAMES \ +{ \ + {"d0", FIRST_VFP_REGNUM + 0, 2}, \ + {"d1", FIRST_VFP_REGNUM + 2, 2}, \ + {"d2", FIRST_VFP_REGNUM + 4, 2}, \ + {"d3", FIRST_VFP_REGNUM + 6, 2}, \ + {"d4", FIRST_VFP_REGNUM + 8, 2}, \ + {"d5", FIRST_VFP_REGNUM + 10, 2}, \ + {"d6", FIRST_VFP_REGNUM + 12, 2}, \ + {"d7", FIRST_VFP_REGNUM + 14, 2}, \ + {"d8", FIRST_VFP_REGNUM + 16, 2}, \ + {"d9", FIRST_VFP_REGNUM + 18, 2}, \ + {"d10", FIRST_VFP_REGNUM + 20, 2}, \ + {"d11", FIRST_VFP_REGNUM + 22, 2}, \ + {"d12", FIRST_VFP_REGNUM + 24, 2}, \ + {"d13", FIRST_VFP_REGNUM + 26, 2}, \ + {"d14", FIRST_VFP_REGNUM + 28, 2}, \ + {"d15", FIRST_VFP_REGNUM + 30, 2}, \ + {"q0", FIRST_VFP_REGNUM + 0, 4}, \ + {"q1", FIRST_VFP_REGNUM + 4, 4}, \ + {"q2", FIRST_VFP_REGNUM + 8, 4}, \ + {"q3", FIRST_VFP_REGNUM + 12, 4}, \ + {"q4", FIRST_VFP_REGNUM + 16, 4}, \ + {"q5", FIRST_VFP_REGNUM + 20, 4}, \ + {"q6", FIRST_VFP_REGNUM + 24, 4}, \ + {"q7", FIRST_VFP_REGNUM + 28, 4}, \ + {"q8", FIRST_VFP_REGNUM + 32, 4}, \ + {"q9", FIRST_VFP_REGNUM + 36, 4}, \ + {"q10", FIRST_VFP_REGNUM + 40, 4}, \ + {"q11", FIRST_VFP_REGNUM + 44, 4}, \ + {"q12", FIRST_VFP_REGNUM + 48, 4}, \ + {"q13", FIRST_VFP_REGNUM + 52, 4}, \ + {"q14", FIRST_VFP_REGNUM + 56, 4}, \ + {"q15", FIRST_VFP_REGNUM + 60, 4} \ +} +#endif + +#ifndef NO_DOLLAR_IN_LABEL +#define NO_DOLLAR_IN_LABEL 1 +#endif + +/* Generate DBX debugging information. riscix.h will undefine this because + the native assembler does not support stabs. */ +#define DBX_DEBUGGING_INFO 1 + +/* Acorn dbx moans about continuation chars, so don't use any. */ +#ifndef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 0 +#endif + +/* Output a function label definition. */ +#ifndef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ + do \ + { \ + ARM_DECLARE_FUNCTION_NAME (STREAM, NAME, DECL); \ + ASM_OUTPUT_LABEL (STREAM, NAME); \ + } \ + while (0) +#endif + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* Make an internal label into a string. */ +#ifndef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \ + sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned int)(NUM)) +#endif + +/* Output an element of a dispatch table. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + do \ + { \ + gcc_assert (!TARGET_THUMB2); \ + asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE); \ + } \ + while (0) + + +/* Thumb-2 always uses addr_diff_elf so that the Table Branch instructions + can be used. For non-pic code where the offsets do not suitable for + TBB/TBH the elements are output as absolute labels. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ + do \ + { \ + if (TARGET_ARM) \ + asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE); \ + else if (TARGET_THUMB1) \ + { \ + if (flag_pic || optimize_size) \ + { \ + switch (GET_MODE(body)) \ + { \ + case QImode: \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case HImode: /* TBH */ \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d\n", \ + VALUE, REL); \ + break; \ + default: \ + gcc_unreachable(); \ + } \ + } \ + else \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE); \ + } \ + else /* Thumb-2 */ \ + { \ + switch (GET_MODE(body)) \ + { \ + case QImode: /* TBB */ \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case HImode: /* TBH */ \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d)/2\n", \ + VALUE, REL); \ + break; \ + case SImode: \ + if (flag_pic) \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1-%LL%d\n", VALUE, REL); \ + else \ + asm_fprintf (STREAM, "\t.word\t%LL%d+1\n", VALUE); \ + break; \ + default: \ + gcc_unreachable(); \ + } \ + } \ + } \ + while (0) + + +#undef ASM_OUTPUT_ASCII +#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN) \ + output_ascii_pseudo_op (STREAM, (const unsigned char *) (PTR), LEN) + +/* Output a gap. In fact we fill it with nulls. */ +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(STREAM, NBYTES) \ + fprintf (STREAM, "\t.space\t%d\n", (int) (NBYTES)) + +/* Align output to a power of two. Horrible /bin/as. */ +#ifndef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + do \ + { \ + register int amount = 1 << (POWER); \ + \ + if (amount == 2) \ + fprintf (STREAM, "\t.even\n"); \ + else if (amount != 1) \ + fprintf (STREAM, "\t.align\t%d\n", amount - 4); \ + } \ + while (0) +#endif + +/* Output a common block. */ +#ifndef ASM_OUTPUT_COMMON +#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED) \ + do \ + { \ + fprintf (STREAM, "\t.comm\t"); \ + assemble_name (STREAM, NAME); \ + asm_fprintf (STREAM, ", %d\t%@ %d\n", \ + (int)(ROUNDED), (int)(SIZE)); \ + } \ + while (0) +#endif + +/* Output a local common block. /bin/as can't do this, so hack a + `.space' into the bss segment. Note that this is *bad* practice, + which is guaranteed NOT to work since it doesn't define STATIC + COMMON space but merely STATIC BSS space. */ +#ifndef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN) \ + do \ + { \ + switch_to_section (bss_section); \ + ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (STREAM, NAME); \ + fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE)); \ + } \ + while (0) +#endif + +/* Output a zero-initialized block. */ +#ifndef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(STREAM, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (STREAM, DECL, NAME, SIZE, ALIGN) +#endif + +#ifndef ASM_COMMENT_START +#define ASM_COMMENT_START "@" +#endif + +/* This works for GAS and some other assemblers. */ +#define SET_ASM_OP "\t.set\t" diff --git a/gcc-4.9/gcc/config/arm/arm-arches.def b/gcc-4.9/gcc/config/arm/arm-arches.def new file mode 100644 index 000000000..9adb791db --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-arches.def @@ -0,0 +1,60 @@ +/* ARM CPU architectures. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Before using #include to read this file, define a macro: + + ARM_ARCH(NAME, CORE, ARCH, FLAGS) + + The NAME is the name of the architecture, represented as a string + constant. The CORE is the identifier for a core representative of + this architecture. ARCH is the architecture revision. FLAGS are + the flags implied by the architecture. + + genopt.sh assumes no whitespace up to the first "," in each entry. */ + +ARM_ARCH("armv2", arm2, 2, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2) +ARM_ARCH("armv2a", arm2, 2, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2) +ARM_ARCH("armv3", arm6, 3, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3) +ARM_ARCH("armv3m", arm7m, 3M, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M) +ARM_ARCH("armv4", arm7tdmi, 4, FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4) +/* Strictly, FL_MODE26 is a permitted option for v4t, but there are no + implementations that support it, so we will leave it out for now. */ +ARM_ARCH("armv4t", arm7tdmi, 4T, FL_CO_PROC | FL_FOR_ARCH4T) +ARM_ARCH("armv5", arm10tdmi, 5, FL_CO_PROC | FL_FOR_ARCH5) +ARM_ARCH("armv5t", arm10tdmi, 5T, FL_CO_PROC | FL_FOR_ARCH5T) +ARM_ARCH("armv5e", arm1026ejs, 5E, FL_CO_PROC | FL_FOR_ARCH5E) +ARM_ARCH("armv5te", arm1026ejs, 5TE, FL_CO_PROC | FL_FOR_ARCH5TE) +ARM_ARCH("armv6", arm1136js, 6, FL_CO_PROC | FL_FOR_ARCH6) +ARM_ARCH("armv6j", arm1136js, 6J, FL_CO_PROC | FL_FOR_ARCH6J) +ARM_ARCH("armv6k", mpcore, 6K, FL_CO_PROC | FL_FOR_ARCH6K) +ARM_ARCH("armv6z", arm1176jzs, 6Z, FL_CO_PROC | FL_FOR_ARCH6Z) +ARM_ARCH("armv6zk", arm1176jzs, 6ZK, FL_CO_PROC | FL_FOR_ARCH6ZK) +ARM_ARCH("armv6t2", arm1156t2s, 6T2, FL_CO_PROC | FL_FOR_ARCH6T2) +ARM_ARCH("armv6-m", cortexm1, 6M, FL_FOR_ARCH6M) +ARM_ARCH("armv6s-m", cortexm1, 6M, FL_FOR_ARCH6M) +ARM_ARCH("armv7", cortexa8, 7, FL_CO_PROC | FL_FOR_ARCH7) +ARM_ARCH("armv7-a", cortexa8, 7A, FL_CO_PROC | FL_FOR_ARCH7A) +ARM_ARCH("armv7ve", cortexa8, 7A, FL_CO_PROC | FL_FOR_ARCH7VE) +ARM_ARCH("armv7-r", cortexr4, 7R, FL_CO_PROC | FL_FOR_ARCH7R) +ARM_ARCH("armv7-m", cortexm3, 7M, FL_CO_PROC | FL_FOR_ARCH7M) +ARM_ARCH("armv7e-m", cortexm4, 7EM, FL_CO_PROC | FL_FOR_ARCH7EM) +ARM_ARCH("armv8-a", cortexa53, 8A, FL_CO_PROC | FL_FOR_ARCH8A) +ARM_ARCH("armv8-a+crc",cortexa53, 8A,FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A) +ARM_ARCH("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT) +ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2) diff --git a/gcc-4.9/gcc/config/arm/arm-c.c b/gcc-4.9/gcc/config/arm/arm-c.c new file mode 100644 index 000000000..af64f7a1f --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-c.c @@ -0,0 +1,44 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "tree.h" +#include "c-family/c-common.h" + +/* Output C specific EABI object attributes. These can not be done in + arm.c because they require information from the C frontend. */ + +static void +arm_output_c_attributes (void) +{ + int wchar_size = (int)(TYPE_PRECISION (wchar_type_node) / BITS_PER_UNIT); + arm_emit_eabi_attribute ("Tag_ABI_PCS_wchar_t", 18, wchar_size); +} + + +/* Setup so that common code calls arm_output_c_attributes. */ + +void +arm_lang_object_attributes_init (void) +{ + arm_lang_output_object_attributes_hook = arm_output_c_attributes; +} diff --git a/gcc-4.9/gcc/config/arm/arm-cores.def b/gcc-4.9/gcc/config/arm/arm-cores.def new file mode 100644 index 000000000..42f00b463 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-cores.def @@ -0,0 +1,159 @@ +/* ARM CPU Cores + Copyright (C) 2003-2014 Free Software Foundation, Inc. + Written by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Before using #include to read this file, define a macro: + + ARM_CORE(CORE_NAME, INTERNAL_IDENT, TUNE_IDENT, ARCH, FLAGS, COSTS) + + The CORE_NAME is the name of the core, represented as a string constant. + The INTERNAL_IDENT is the name of the core represented as an identifier. + This must be unique for each entry in this table. + The TUNE_IDENT is the name of the core for which scheduling decisions + should be made, represented as an identifier. + ARCH is the architecture revision implemented by the chip. + FLAGS are the bitwise-or of the traits that apply to that core. + This need not include flags implied by the architecture. + COSTS is the name of the rtx_costs routine to use. + + If you update this table, you must update the "tune" attribute in + arm.md. + + Some tools assume no whitespace up to the first "," in each entry. */ + +/* V2/V2A Architecture Processors */ +ARM_CORE("arm2", arm2, arm2, 2, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm250", arm250, arm250, 2, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm3", arm3, arm3, 2, FL_CO_PROC | FL_MODE26, slowmul) + +/* V3 Architecture Processors */ +ARM_CORE("arm6", arm6, arm6, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm60", arm60, arm60, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm600", arm600, arm600, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm610", arm610, arm610, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm620", arm620, arm620, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7", arm7, arm7, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm7d", arm7d, arm7d, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm7di", arm7di, arm7di, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm70", arm70, arm70, 3, FL_CO_PROC | FL_MODE26, slowmul) +ARM_CORE("arm700", arm700, arm700, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm700i", arm700i, arm700i, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm710", arm710, arm710, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm720", arm720, arm720, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm710c", arm710c, arm710c, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7100", arm7100, arm7100, 3, FL_MODE26 | FL_WBUF, slowmul) +ARM_CORE("arm7500", arm7500, arm7500, 3, FL_MODE26 | FL_WBUF, slowmul) +/* Doesn't have an external co-proc, but does have embedded fpa. */ +ARM_CORE("arm7500fe", arm7500fe, arm7500fe, 3, FL_CO_PROC | FL_MODE26 | FL_WBUF, slowmul) + +/* V3M Architecture Processors */ +/* arm7m doesn't exist on its own, but only with D, ("and", and I), but + those don't alter the code, so arm7m is sometimes used. */ +ARM_CORE("arm7m", arm7m, arm7m, 3M, FL_CO_PROC | FL_MODE26, fastmul) +ARM_CORE("arm7dm", arm7dm, arm7dm, 3M, FL_CO_PROC | FL_MODE26, fastmul) +ARM_CORE("arm7dmi", arm7dmi, arm7dmi, 3M, FL_CO_PROC | FL_MODE26, fastmul) + +/* V4 Architecture Processors */ +ARM_CORE("arm8", arm8, arm8, 4, FL_MODE26 | FL_LDSCHED, fastmul) +ARM_CORE("arm810", arm810, arm810, 4, FL_MODE26 | FL_LDSCHED, fastmul) +ARM_CORE("strongarm", strongarm, strongarm, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) +ARM_CORE("strongarm110", strongarm110, strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) +ARM_CORE("strongarm1100", strongarm1100, strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) +ARM_CORE("strongarm1110", strongarm1110, strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm) +ARM_CORE("fa526", fa526, fa526, 4, FL_LDSCHED, fastmul) +ARM_CORE("fa626", fa626, fa626, 4, FL_LDSCHED, fastmul) + +/* V4T Architecture Processors */ +ARM_CORE("arm7tdmi", arm7tdmi, arm7tdmi, 4T, FL_CO_PROC, fastmul) +ARM_CORE("arm7tdmi-s", arm7tdmis, arm7tdmis, 4T, FL_CO_PROC, fastmul) +ARM_CORE("arm710t", arm710t, arm710t, 4T, FL_WBUF, fastmul) +ARM_CORE("arm720t", arm720t, arm720t, 4T, FL_WBUF, fastmul) +ARM_CORE("arm740t", arm740t, arm740t, 4T, FL_WBUF, fastmul) +ARM_CORE("arm9", arm9, arm9, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm9tdmi", arm9tdmi, arm9tdmi, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm920", arm920, arm920, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm920t", arm920t, arm920t, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm922t", arm922t, arm922t, 4T, FL_LDSCHED, fastmul) +ARM_CORE("arm940t", arm940t, arm940t, 4T, FL_LDSCHED, fastmul) +ARM_CORE("ep9312", ep9312, ep9312, 4T, FL_LDSCHED, fastmul) + +/* V5T Architecture Processors */ +ARM_CORE("arm10tdmi", arm10tdmi, arm10tdmi, 5T, FL_LDSCHED, fastmul) +ARM_CORE("arm1020t", arm1020t, arm1020t, 5T, FL_LDSCHED, fastmul) + +/* V5TE Architecture Processors */ +ARM_CORE("arm9e", arm9e, arm9e, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm946e-s", arm946es, arm946es, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm966e-s", arm966es, arm966es, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm968e-s", arm968es, arm968es, 5TE, FL_LDSCHED, 9e) +ARM_CORE("arm10e", arm10e, arm10e, 5TE, FL_LDSCHED, fastmul) +ARM_CORE("arm1020e", arm1020e, arm1020e, 5TE, FL_LDSCHED, fastmul) +ARM_CORE("arm1022e", arm1022e, arm1022e, 5TE, FL_LDSCHED, fastmul) +ARM_CORE("xscale", xscale, xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale) +ARM_CORE("iwmmxt", iwmmxt, iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) +ARM_CORE("iwmmxt2", iwmmxt2, iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2, xscale) +ARM_CORE("fa606te", fa606te, fa606te, 5TE, FL_LDSCHED, 9e) +ARM_CORE("fa626te", fa626te, fa626te, 5TE, FL_LDSCHED, 9e) +ARM_CORE("fmp626", fmp626, fmp626, 5TE, FL_LDSCHED, 9e) +ARM_CORE("fa726te", fa726te, fa726te, 5TE, FL_LDSCHED, fa726te) + +/* V5TEJ Architecture Processors */ +ARM_CORE("arm926ej-s", arm926ejs, arm926ejs, 5TEJ, FL_LDSCHED, 9e) +ARM_CORE("arm1026ej-s", arm1026ejs, arm1026ejs, 5TEJ, FL_LDSCHED, 9e) + +/* V6 Architecture Processors */ +ARM_CORE("arm1136j-s", arm1136js, arm1136js, 6J, FL_LDSCHED, 9e) +ARM_CORE("arm1136jf-s", arm1136jfs, arm1136jfs, 6J, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("arm1176jz-s", arm1176jzs, arm1176jzs, 6ZK, FL_LDSCHED, 9e) +ARM_CORE("arm1176jzf-s", arm1176jzfs, arm1176jzfs, 6ZK, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("mpcorenovfp", mpcorenovfp, mpcorenovfp, 6K, FL_LDSCHED, 9e) +ARM_CORE("mpcore", mpcore, mpcore, 6K, FL_LDSCHED | FL_VFPV2, 9e) +ARM_CORE("arm1156t2-s", arm1156t2s, arm1156t2s, 6T2, FL_LDSCHED, v6t2) +ARM_CORE("arm1156t2f-s", arm1156t2fs, arm1156t2fs, 6T2, FL_LDSCHED | FL_VFPV2, v6t2) + +/* V6M Architecture Processors */ +ARM_CORE("cortex-m1", cortexm1, cortexm1, 6M, FL_LDSCHED, v6m) +ARM_CORE("cortex-m0", cortexm0, cortexm0, 6M, FL_LDSCHED, v6m) +ARM_CORE("cortex-m0plus", cortexm0plus, cortexm0plus, 6M, FL_LDSCHED, v6m) + +/* V7 Architecture Processors */ +ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex) +ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5) +ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7) +ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex) +ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9) +ARM_CORE("cortex-a12", cortexa12, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) +ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) +ARM_CORE("cortex-r4", cortexr4, cortexr4, 7R, FL_LDSCHED, cortex) +ARM_CORE("cortex-r4f", cortexr4f, cortexr4f, 7R, FL_LDSCHED, cortex) +ARM_CORE("cortex-r5", cortexr5, cortexr5, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) +ARM_CORE("cortex-r7", cortexr7, cortexr7, 7R, FL_LDSCHED | FL_ARM_DIV, cortex) +ARM_CORE("cortex-m4", cortexm4, cortexm4, 7EM, FL_LDSCHED, v7m) +ARM_CORE("cortex-m3", cortexm3, cortexm3, 7M, FL_LDSCHED, v7m) +ARM_CORE("marvell-pj4", marvell_pj4, marvell_pj4, 7A, FL_LDSCHED, 9e) + +/* V7 big.LITTLE implementations */ +ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) + +/* V8 Architecture Processors */ +ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a53) +ARM_CORE("cortex-a57", cortexa57, cortexa15, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) + +/* V8 big.LITTLE implementations */ +ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, FL_LDSCHED | FL_CRC32, cortex_a57) diff --git a/gcc-4.9/gcc/config/arm/arm-fixed.md b/gcc-4.9/gcc/config/arm/arm-fixed.md new file mode 100644 index 000000000..4ab9d3597 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-fixed.md @@ -0,0 +1,429 @@ +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; This file contains ARM instructions that support fixed-point operations. + +(define_insn "add3" + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] + "TARGET_32BIT" + "add%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "alu_reg")]) + +(define_insn "add3" + [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") + (plus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r") + (match_operand:ADDSUB 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "sadd%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) + +(define_insn "usadd3" + [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") + (us_plus:UQADDSUB (match_operand:UQADDSUB 1 "s_register_operand" "r") + (match_operand:UQADDSUB 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "uqadd%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) + +(define_insn "ssadd3" + [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") + (ss_plus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r") + (match_operand:QADDSUB 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "qadd%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) + +(define_insn "sub3" + [(set (match_operand:FIXED 0 "s_register_operand" "=l,r") + (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r") + (match_operand:FIXED 2 "s_register_operand" "l,r")))] + "TARGET_32BIT" + "sub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "alu_reg")]) + +(define_insn "sub3" + [(set (match_operand:ADDSUB 0 "s_register_operand" "=r") + (minus:ADDSUB (match_operand:ADDSUB 1 "s_register_operand" "r") + (match_operand:ADDSUB 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "ssub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) + +(define_insn "ussub3" + [(set (match_operand:UQADDSUB 0 "s_register_operand" "=r") + (us_minus:UQADDSUB + (match_operand:UQADDSUB 1 "s_register_operand" "r") + (match_operand:UQADDSUB 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "uqsub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) + +(define_insn "sssub3" + [(set (match_operand:QADDSUB 0 "s_register_operand" "=r") + (ss_minus:QADDSUB (match_operand:QADDSUB 1 "s_register_operand" "r") + (match_operand:QADDSUB 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "qsub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_reg")]) + +;; Fractional multiplies. + +; Note: none of these do any rounding. + +(define_expand "mulqq3" + [(set (match_operand:QQ 0 "s_register_operand" "") + (mult:QQ (match_operand:QQ 1 "s_register_operand" "") + (match_operand:QQ 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY && arm_arch_thumb2" +{ + rtx tmp1 = gen_reg_rtx (HImode); + rtx tmp2 = gen_reg_rtx (HImode); + rtx tmp3 = gen_reg_rtx (SImode); + + emit_insn (gen_extendqihi2 (tmp1, gen_lowpart (QImode, operands[1]))); + emit_insn (gen_extendqihi2 (tmp2, gen_lowpart (QImode, operands[2]))); + emit_insn (gen_mulhisi3 (tmp3, tmp1, tmp2)); + emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp3, GEN_INT (8), + GEN_INT (7))); + DONE; +}) + +(define_expand "mulhq3" + [(set (match_operand:HQ 0 "s_register_operand" "") + (mult:HQ (match_operand:HQ 1 "s_register_operand" "") + (match_operand:HQ 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY && arm_arch_thumb2" +{ + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]))); + /* We're doing a s.15 * s.15 multiplication, getting an s.30 result. Extract + an s.15 value from that. This won't overflow/saturate for _Fract + values. */ + emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, + GEN_INT (16), GEN_INT (15))); + DONE; +}) + +(define_expand "mulsq3" + [(set (match_operand:SQ 0 "s_register_operand" "") + (mult:SQ (match_operand:SQ 1 "s_register_operand" "") + (match_operand:SQ 2 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" +{ + rtx tmp1 = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx tmp3 = gen_reg_rtx (SImode); + + /* s.31 * s.31 -> s.62 multiplication. */ + emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2]))); + emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (31))); + emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (1))); + emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3)); + + DONE; +}) + +;; Accumulator multiplies. + +(define_expand "mulsa3" + [(set (match_operand:SA 0 "s_register_operand" "") + (mult:SA (match_operand:SA 1 "s_register_operand" "") + (match_operand:SA 2 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" +{ + rtx tmp1 = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx tmp3 = gen_reg_rtx (SImode); + + emit_insn (gen_mulsidi3 (tmp1, gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2]))); + emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (15))); + emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (17))); + emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3)); + + DONE; +}) + +(define_expand "mulusa3" + [(set (match_operand:USA 0 "s_register_operand" "") + (mult:USA (match_operand:USA 1 "s_register_operand" "") + (match_operand:USA 2 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" +{ + rtx tmp1 = gen_reg_rtx (DImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx tmp3 = gen_reg_rtx (SImode); + + emit_insn (gen_umulsidi3 (tmp1, gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2]))); + emit_insn (gen_lshrsi3 (tmp2, gen_lowpart (SImode, tmp1), GEN_INT (16))); + emit_insn (gen_ashlsi3 (tmp3, gen_highpart (SImode, tmp1), GEN_INT (16))); + emit_insn (gen_iorsi3 (gen_lowpart (SImode, operands[0]), tmp2, tmp3)); + + DONE; +}) + +;; The code sequence emitted by this insn pattern uses the Q flag, which GCC +;; doesn't generally know about, so we don't bother expanding to individual +;; instructions. It may be better to just use an out-of-line asm libcall for +;; this. + +(define_insn "ssmulsa3" + [(set (match_operand:SA 0 "s_register_operand" "=r") + (ss_mult:SA (match_operand:SA 1 "s_register_operand" "r") + (match_operand:SA 2 "s_register_operand" "r"))) + (clobber (match_scratch:DI 3 "=r")) + (clobber (match_scratch:SI 4 "=r")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && arm_arch6" +{ + /* s16.15 * s16.15 -> s32.30. */ + output_asm_insn ("smull\\t%Q3, %R3, %1, %2", operands); + + if (TARGET_ARM) + output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands); + else + { + output_asm_insn ("mov\\t%4, #0", operands); + output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands); + } + + /* We have: + 31 high word 0 31 low word 0 + + [ S i i .... i i i ] [ i f f f ... f f ] + | + v + [ S i ... i f ... f f ] + + Need 16 integral bits, so saturate at 15th bit of high word. */ + + output_asm_insn ("ssat\\t%R3, #15, %R3", operands); + output_asm_insn ("mrs\\t%4, APSR", operands); + output_asm_insn ("tst\\t%4, #1<<27", operands); + if (arm_restrict_it) + { + output_asm_insn ("mvn\\t%4, %R3, asr #32", operands); + output_asm_insn ("it\\tne", operands); + output_asm_insn ("movne\\t%Q3, %4", operands); + } + else + { + if (TARGET_THUMB2) + output_asm_insn ("it\\tne", operands); + output_asm_insn ("mvnne\\t%Q3, %R3, asr #32", operands); + } + output_asm_insn ("mov\\t%0, %Q3, lsr #15", operands); + output_asm_insn ("orr\\t%0, %0, %R3, asl #17", operands); + return ""; +} + [(set_attr "conds" "clob") + (set_attr "type" "multiple") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (if_then_else (match_test "arm_restrict_it") + (const_int 40) + (const_int 38)) + (const_int 32)))]) + +;; Same goes for this. + +(define_insn "usmulusa3" + [(set (match_operand:USA 0 "s_register_operand" "=r") + (us_mult:USA (match_operand:USA 1 "s_register_operand" "r") + (match_operand:USA 2 "s_register_operand" "r"))) + (clobber (match_scratch:DI 3 "=r")) + (clobber (match_scratch:SI 4 "=r")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && arm_arch6" +{ + /* 16.16 * 16.16 -> 32.32. */ + output_asm_insn ("umull\\t%Q3, %R3, %1, %2", operands); + + if (TARGET_ARM) + output_asm_insn ("msr\\tAPSR_nzcvq, #0", operands); + else + { + output_asm_insn ("mov\\t%4, #0", operands); + output_asm_insn ("msr\\tAPSR_nzcvq, %4", operands); + } + + /* We have: + 31 high word 0 31 low word 0 + + [ i i i .... i i i ] [ f f f f ... f f ] + | + v + [ i i ... i f ... f f ] + + Need 16 integral bits, so saturate at 16th bit of high word. */ + + output_asm_insn ("usat\\t%R3, #16, %R3", operands); + output_asm_insn ("mrs\\t%4, APSR", operands); + output_asm_insn ("tst\\t%4, #1<<27", operands); + if (arm_restrict_it) + { + output_asm_insn ("sbfx\\t%4, %R3, #15, #1", operands); + output_asm_insn ("it\\tne", operands); + output_asm_insn ("movne\\t%Q3, %4", operands); + } + else + { + if (TARGET_THUMB2) + output_asm_insn ("it\\tne", operands); + output_asm_insn ("sbfxne\\t%Q3, %R3, #15, #1", operands); + } + output_asm_insn ("lsr\\t%0, %Q3, #16", operands); + output_asm_insn ("orr\\t%0, %0, %R3, asl #16", operands); + return ""; +} + [(set_attr "conds" "clob") + (set_attr "type" "multiple") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (if_then_else (match_test "arm_restrict_it") + (const_int 40) + (const_int 38)) + (const_int 32)))]) + +(define_expand "mulha3" + [(set (match_operand:HA 0 "s_register_operand" "") + (mult:HA (match_operand:HA 1 "s_register_operand" "") + (match_operand:HA 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY && arm_arch_thumb2" +{ + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]))); + emit_insn (gen_extv (gen_lowpart (SImode, operands[0]), tmp, GEN_INT (16), + GEN_INT (7))); + + DONE; +}) + +(define_expand "muluha3" + [(set (match_operand:UHA 0 "s_register_operand" "") + (mult:UHA (match_operand:UHA 1 "s_register_operand" "") + (match_operand:UHA 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY" +{ + rtx tmp1 = gen_reg_rtx (SImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx tmp3 = gen_reg_rtx (SImode); + + /* 8.8 * 8.8 -> 16.16 multiply. */ + emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1]))); + emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2]))); + emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2)); + emit_insn (gen_extzv (gen_lowpart (SImode, operands[0]), tmp3, + GEN_INT (16), GEN_INT (8))); + + DONE; +}) + +(define_expand "ssmulha3" + [(set (match_operand:HA 0 "s_register_operand" "") + (ss_mult:HA (match_operand:HA 1 "s_register_operand" "") + (match_operand:HA 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_DSP_MULTIPLY && arm_arch6" +{ + rtx tmp = gen_reg_rtx (SImode); + rtx rshift; + + emit_insn (gen_mulhisi3 (tmp, gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]))); + + rshift = gen_rtx_ASHIFTRT (SImode, tmp, GEN_INT (7)); + + emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (HImode, operands[0]), + gen_rtx_SS_TRUNCATE (HImode, rshift))); + + DONE; +}) + +(define_expand "usmuluha3" + [(set (match_operand:UHA 0 "s_register_operand" "") + (us_mult:UHA (match_operand:UHA 1 "s_register_operand" "") + (match_operand:UHA 2 "s_register_operand" "")))] + "TARGET_INT_SIMD" +{ + rtx tmp1 = gen_reg_rtx (SImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx tmp3 = gen_reg_rtx (SImode); + rtx rshift_tmp = gen_reg_rtx (SImode); + + /* Note: there's no smul[bt][bt] equivalent for unsigned multiplies. Use a + normal 32x32->32-bit multiply instead. */ + emit_insn (gen_zero_extendhisi2 (tmp1, gen_lowpart (HImode, operands[1]))); + emit_insn (gen_zero_extendhisi2 (tmp2, gen_lowpart (HImode, operands[2]))); + + emit_insn (gen_mulsi3 (tmp3, tmp1, tmp2)); + + /* The operand to "usat" is signed, so we cannot use the "..., asr #8" + form of that instruction since the multiplication result TMP3 may have the + top bit set, thus be negative and saturate to zero. Use a separate + logical right-shift instead. */ + emit_insn (gen_lshrsi3 (rshift_tmp, tmp3, GEN_INT (8))); + emit_insn (gen_arm_usatsihi (gen_lowpart (HImode, operands[0]), rshift_tmp)); + + DONE; +}) + +(define_insn "arm_ssatsihi_shift" + [(set (match_operand:HI 0 "s_register_operand" "=r") + (ss_truncate:HI (match_operator:SI 1 "sat_shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "immediate_operand" "I")])))] + "TARGET_32BIT && arm_arch6" + "ssat%?\\t%0, #16, %2%S1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "1") + (set_attr "type" "alu_shift_imm")]) + +(define_insn "arm_usatsihi" + [(set (match_operand:HI 0 "s_register_operand" "=r") + (us_truncate:HI (match_operand:SI 1 "s_register_operand")))] + "TARGET_INT_SIMD" + "usat%?\\t%0, #16, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_imm")] +) diff --git a/gcc-4.9/gcc/config/arm/arm-fpus.def b/gcc-4.9/gcc/config/arm/arm-fpus.def new file mode 100644 index 000000000..85d9693c1 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-fpus.def @@ -0,0 +1,46 @@ +/* ARM FPU variants. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Before using #include to read this file, define a macro: + + ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) + + The arguments are the fields of struct arm_fpu_desc. + + genopt.sh assumes no whitespace up to the first "," in each entry. */ + +ARM_FPU("vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false, false) +ARM_FPU("vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false, false) +ARM_FPU("vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true, false) +ARM_FPU("vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false, false) +ARM_FPU("vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true, false) +ARM_FPU("vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false, false) +ARM_FPU("vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true, false) +ARM_FPU("neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false, false) +ARM_FPU("neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true, true, false) +ARM_FPU("vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true, false) +ARM_FPU("vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true, false) +ARM_FPU("fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true, false) +ARM_FPU("neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true, false) +ARM_FPU("fp-armv8", ARM_FP_MODEL_VFP, 8, VFP_REG_D32, false, true, false) +ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, false) +ARM_FPU("crypto-neon-fp-armv8", + ARM_FP_MODEL_VFP, 8, VFP_REG_D32, true, true, true) +/* Compatibility aliases. */ +ARM_FPU("vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false, false) diff --git a/gcc-4.9/gcc/config/arm/arm-generic.md b/gcc-4.9/gcc/config/arm/arm-generic.md new file mode 100644 index 000000000..b26c72c44 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-generic.md @@ -0,0 +1,152 @@ +;; Generic ARM Pipeline Description +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +(define_automaton "arm") + +;; Write buffer +; +; Strictly, we should model a 4-deep write buffer for ARM7xx based chips +; +; The write buffer on some of the arm6 processors is hard to model exactly. +; There is room in the buffer for up to two addresses and up to eight words +; of memory, but the two needn't be split evenly. When writing the two +; addresses are fully pipelined. However, a read from memory that is not +; currently in the cache will block until the writes have completed. +; It is normally the case that FCLK and MCLK will be in the ratio 2:1, so +; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous +; (they aren't allowed to be at present) then there is a startup cost of 1MCLK +; cycle to add as well. +(define_cpu_unit "write_buf" "arm") + +;; Write blockage unit +; +; The write_blockage unit models (partially), the fact that reads will stall +; until the write buffer empties. +(define_cpu_unit "write_blockage" "arm") + +;; Core +; +(define_cpu_unit "core" "arm") + +(define_insn_reservation "store_wbuf" 5 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store1"))) + "core+write_buf*3+write_blockage*5") + +(define_insn_reservation "store2_wbuf" 7 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store2"))) + "core+write_buf*4+write_blockage*7") + +(define_insn_reservation "store3_wbuf" 9 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store3"))) + "core+write_buf*5+write_blockage*9") + +(define_insn_reservation "store4_wbuf" 11 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "yes") + (eq_attr "type" "store4"))) + "core+write_buf*6+write_blockage*11") + +(define_insn_reservation "store2" 3 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "no") + (eq_attr "type" "store2"))) + "core*3") + +(define_insn_reservation "store3" 4 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "no") + (eq_attr "type" "store3"))) + "core*4") + +(define_insn_reservation "store4" 5 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "model_wbuf" "no") + (eq_attr "type" "store4"))) + "core*5") + +(define_insn_reservation "store_ldsched" 1 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (eq_attr "type" "store1"))) + "core") + +(define_insn_reservation "load_ldsched_xscale" 3 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "type" "load_byte,load1") + (eq_attr "tune" "xscale,iwmmxt,iwmmxt2")))) + "core") + +(define_insn_reservation "load_ldsched" 2 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "type" "load_byte,load1") + (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2")))) + "core") + +(define_insn_reservation "load_or_store" 2 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "!yes") + (eq_attr "type" "load_byte,load1,load2,load3,load4,store1"))) + "core*2") + +(define_insn_reservation "mult" 16 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "no") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) + "core*16") + +(define_insn_reservation "mult_ldsched_strongarm" 3 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "tune" + "strongarm,strongarm110,strongarm1100,strongarm1110") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))))) + "core*2") + +(define_insn_reservation "mult_ldsched" 4 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "ldsched" "yes") + (and (eq_attr "tune" + "!strongarm,strongarm110,strongarm1100,strongarm1110") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))))) + "core*4") + +(define_insn_reservation "multi_cycle" 32 + (and (eq_attr "generic_sched" "yes") + (and (eq_attr "core_cycles" "multi") + (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\ + store1,store2,store3,store4") + (not (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))))) + "core*32") + +(define_insn_reservation "single_cycle" 1 + (and (eq_attr "generic_sched" "yes") + (eq_attr "core_cycles" "single")) + "core") diff --git a/gcc-4.9/gcc/config/arm/arm-ldmstm.ml b/gcc-4.9/gcc/config/arm/arm-ldmstm.ml new file mode 100644 index 000000000..2d8f9e267 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-ldmstm.ml @@ -0,0 +1,345 @@ +(* Auto-generate ARM ldm/stm patterns + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Run with: + ocaml arm-ldmstm.ml >/path/to/gcc/config/arm/ldmstm.md +*) + +type amode = IA | IB | DA | DB + +type optype = IN | OUT | INOUT + +let rec string_of_addrmode addrmode = + match addrmode with + IA -> "ia" | IB -> "ib" | DA -> "da" | DB -> "db" + +let rec initial_offset addrmode nregs = + match addrmode with + IA -> 0 + | IB -> 4 + | DA -> -4 * nregs + 4 + | DB -> -4 * nregs + +let rec final_offset addrmode nregs = + match addrmode with + IA -> nregs * 4 + | IB -> nregs * 4 + | DA -> -4 * nregs + | DB -> -4 * nregs + +let constr thumb = + if thumb then "l" else "rk" + +let inout_constr op_type = + match op_type with + OUT -> "=&" + | INOUT -> "+&" + | IN -> "" + +let destreg nregs first op_type thumb = + if not first then + Printf.sprintf "(match_dup %d)" (nregs + 1) + else + Printf.sprintf ("(match_operand:SI %d \"s_register_operand\" \"%s%s\")") + (nregs + 1) (inout_constr op_type) (constr thumb) + +let reg_predicate thumb = + if thumb then "low_register_operand" else "arm_hard_general_register_operand" + +let write_ldm_set thumb nregs offset opnr first = + let indent = " " in + Printf.printf "%s" (if first then " [" else indent); + Printf.printf "(set (match_operand:SI %d \"%s\" \"\")\n" opnr (reg_predicate thumb); + Printf.printf "%s (mem:SI " indent; + begin if offset != 0 then Printf.printf "(plus:SI " end; + Printf.printf "%s" (destreg nregs first IN thumb); + begin if offset != 0 then Printf.printf "\n%s (const_int %d))" indent offset end; + Printf.printf "))" + +let write_stm_set thumb nregs offset opnr first = + let indent = " " in + Printf.printf "%s" (if first then " [" else indent); + Printf.printf "(set (mem:SI "; + begin if offset != 0 then Printf.printf "(plus:SI " end; + Printf.printf "%s" (destreg nregs first IN thumb); + begin if offset != 0 then Printf.printf " (const_int %d))" offset end; + Printf.printf ")\n%s (match_operand:SI %d \"%s\" \"\"))" indent opnr (reg_predicate thumb) + +let write_ldm_peep_set extra_indent nregs opnr first = + let indent = " " ^ extra_indent in + Printf.printf "%s" (if first then extra_indent ^ " [" else indent); + Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr; + Printf.printf "%s (match_operand:SI %d \"memory_operand\" \"\"))" indent (nregs + opnr) + +let write_stm_peep_set extra_indent nregs opnr first = + let indent = " " ^ extra_indent in + Printf.printf "%s" (if first then extra_indent ^ " [" else indent); + Printf.printf "(set (match_operand:SI %d \"memory_operand\" \"\")\n" (nregs + opnr); + Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\"))" indent opnr + +let write_any_load optype nregs opnr first = + let indent = " " in + Printf.printf "%s" (if first then " [" else indent); + Printf.printf "(set (match_operand:SI %d \"s_register_operand\" \"\")\n" opnr; + Printf.printf "%s (match_operand:SI %d \"%s\" \"\"))" indent (nregs * 2 + opnr) optype + +let write_const_store nregs opnr first = + let indent = " " in + Printf.printf "%s(set (match_operand:SI %d \"memory_operand\" \"\")\n" indent (nregs + opnr); + Printf.printf "%s (match_dup %d))" indent opnr + +let write_const_stm_peep_set nregs opnr first = + write_any_load "const_int_operand" nregs opnr first; + Printf.printf "\n"; + write_const_store nregs opnr false + + +let rec write_pat_sets func opnr offset first n_left = + func offset opnr first; + begin + if n_left > 1 then begin + Printf.printf "\n"; + write_pat_sets func (opnr + 1) (offset + 4) false (n_left - 1); + end else + Printf.printf "]" + end + +let rec write_peep_sets func opnr first n_left = + func opnr first; + begin + if n_left > 1 then begin + Printf.printf "\n"; + write_peep_sets func (opnr + 1) false (n_left - 1); + end + end + +let can_thumb addrmode update is_store = + match addrmode, update, is_store with + (* Thumb1 mode only supports IA with update. However, for LDMIA, + if the address register also appears in the list of loaded + registers, the loaded value is stored, hence the RTL pattern + to describe such an insn does not have an update. We check + in the match_parallel predicate that the condition described + above is met. *) + IA, _, false -> true + | IA, true, true -> true + | _ -> false + +exception InvalidAddrMode of string;; + +let target addrmode thumb = + match addrmode, thumb with + IA, true -> "TARGET_THUMB1" + | IA, false -> "TARGET_32BIT" + | DB, false -> "TARGET_32BIT" + | _, false -> "TARGET_ARM" + | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.") + +let write_pattern_1 name ls addrmode nregs write_set_fn update thumb = + let astr = string_of_addrmode addrmode in + Printf.printf "(define_insn \"*%s%s%d_%s%s\"\n" + (if thumb then "thumb_" else "") name nregs astr + (if update then "_update" else ""); + Printf.printf " [(match_parallel 0 \"%s_multiple_operation\"\n" ls; + begin + if update then begin + Printf.printf " [(set %s\n (plus:SI %s" + (destreg nregs true INOUT thumb) (destreg nregs false IN thumb); + Printf.printf " (const_int %d)))\n" + (final_offset addrmode nregs) + end + end; + write_pat_sets + (write_set_fn thumb nregs) 1 + (initial_offset addrmode nregs) + (not update) nregs; + Printf.printf ")]\n \"%s && XVECLEN (operands[0], 0) == %d\"\n" + (target addrmode thumb) + (if update then nregs + 1 else nregs); + Printf.printf " \"%s%%(%s%%)\\t%%%d%s, {" + name astr (nregs + 1) (if update then "!" else ""); + for n = 1 to nregs; do + Printf.printf "%%%d%s" n (if n < nregs then ", " else "") + done; + Printf.printf "}\"\n"; + Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs; + if not thumb then begin + Printf.printf "\n (set_attr \"predicable\" \"yes\")"; + if addrmode == IA || addrmode == DB then + Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")"; + end; + Printf.printf "])\n\n" + +let write_ldm_pattern addrmode nregs update = + write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update false; + begin if can_thumb addrmode update false then + write_pattern_1 "ldm" "load" addrmode nregs write_ldm_set update true; + end + +let write_stm_pattern addrmode nregs update = + write_pattern_1 "stm" "store" addrmode nregs write_stm_set update false; + begin if can_thumb addrmode update true then + write_pattern_1 "stm" "store" addrmode nregs write_stm_set update true; + end + +let write_ldm_commutative_peephole thumb = + let nregs = 2 in + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs; + let indent = " " in + if thumb then begin + Printf.printf "\n%s(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2); + Printf.printf "%s (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1); + Printf.printf "%s [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2); + Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))]\n" indent (nregs * 2 + 3) + end else begin + Printf.printf "\n%s(parallel\n" indent; + Printf.printf "%s [(set (match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2); + Printf.printf "%s (match_operator:SI %d \"commutative_binary_operator\"\n" indent (nregs * 2 + 1); + Printf.printf "%s [(match_operand:SI %d \"s_register_operand\" \"\")\n" indent (nregs * 2 + 2); + Printf.printf "%s (match_operand:SI %d \"s_register_operand\" \"\")]))\n" indent (nregs * 2 + 3); + Printf.printf "%s (clobber (reg:CC CC_REGNUM))])]\n" indent + end; + Printf.printf " \"((((REGNO (operands[%d]) == REGNO (operands[0]))\n" (nregs * 2 + 2); + Printf.printf " && (REGNO (operands[%d]) == REGNO (operands[1])))\n" (nregs * 2 + 3); + Printf.printf " || ((REGNO (operands[%d]) == REGNO (operands[0]))\n" (nregs * 2 + 3); + Printf.printf " && (REGNO (operands[%d]) == REGNO (operands[1]))))\n" (nregs * 2 + 2); + Printf.printf " && (peep2_regno_dead_p (%d, REGNO (operands[0]))\n" (nregs + 1); + Printf.printf " || (REGNO (operands[0]) == REGNO (operands[%d])))\n" (nregs * 2); + Printf.printf " && (peep2_regno_dead_p (%d, REGNO (operands[1]))\n" (nregs + 1); + Printf.printf " || (REGNO (operands[1]) == REGNO (operands[%d]))))\"\n" (nregs * 2); + begin + if thumb then + Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))]\n" + (nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3) + else begin + Printf.printf " [(parallel\n"; + Printf.printf " [(set (match_dup %d) (match_op_dup %d [(match_dup %d) (match_dup %d)]))\n" + (nregs * 2) (nregs * 2 + 1) (nregs * 2 + 2) (nregs * 2 + 3); + Printf.printf " (clobber (reg:CC CC_REGNUM))])]\n" + end + end; + Printf.printf "{\n if (!gen_ldm_seq (operands, %d, true))\n FAIL;\n" nregs; + Printf.printf "})\n\n" + +let write_ldm_peephole nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_ldm_peep_set "" nregs) 0 true nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_ldm_seq (operands, %d, false))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let write_ldm_peephole_b nregs = + if nregs > 2 then begin + Printf.printf "(define_peephole2\n"; + write_ldm_peep_set "" nregs 0 true; + Printf.printf "\n (parallel\n"; + write_peep_sets (write_ldm_peep_set " " nregs) 1 true (nregs - 1); + Printf.printf "])]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_ldm_seq (operands, %d, false))\n DONE;\n else\n FAIL;\n})\n\n" nregs + end + +let write_stm_peephole nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_stm_peep_set "" nregs) 0 true nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let write_stm_peephole_b nregs = + if nregs > 2 then begin + Printf.printf "(define_peephole2\n"; + write_stm_peep_set "" nregs 0 true; + Printf.printf "\n (parallel\n"; + write_peep_sets (write_stm_peep_set "" nregs) 1 true (nregs - 1); + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + end + +let write_const_stm_peephole_a nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_const_stm_peep_set nregs) 0 true nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_const_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let write_const_stm_peephole_b nregs = + Printf.printf "(define_peephole2\n"; + write_peep_sets (write_any_load "const_int_operand" nregs) 0 true nregs; + Printf.printf "\n"; + write_peep_sets (write_const_store nregs) 0 false nregs; + Printf.printf "]\n \"\"\n [(const_int 0)]\n{\n"; + Printf.printf " if (gen_const_stm_seq (operands, %d))\n DONE;\n else\n FAIL;\n})\n\n" nregs + +let patterns () = + let addrmodes = [ IA; IB; DA; DB ] in + let sizes = [ 4; 3; 2] in + List.iter + (fun n -> + List.iter + (fun addrmode -> + write_ldm_pattern addrmode n false; + write_ldm_pattern addrmode n true; + write_stm_pattern addrmode n false; + write_stm_pattern addrmode n true) + addrmodes; + write_ldm_peephole n; + write_ldm_peephole_b n; + write_const_stm_peephole_a n; + write_const_stm_peephole_b n; + write_stm_peephole n;) + sizes; + write_ldm_commutative_peephole false; + write_ldm_commutative_peephole true + +let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) + +(* Do it. *) + +let _ = + print_lines [ +"/* ARM ldm/stm instruction patterns. This file was automatically generated"; +" using arm-ldmstm.ml. Please do not edit manually."; +""; +" Copyright (C) 2010-2014 Free Software Foundation, Inc."; +" Contributed by CodeSourcery."; +""; +" This file is part of GCC."; +""; +" GCC is free software; you can redistribute it and/or modify it"; +" under the terms of the GNU General Public License as published"; +" by the Free Software Foundation; either version 3, or (at your"; +" option) any later version."; +""; +" GCC is distributed in the hope that it will be useful, but WITHOUT"; +" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; +" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; +" License for more details."; +""; +" You should have received a copy of the GNU General Public License and"; +" a copy of the GCC Runtime Library Exception along with this program;"; +" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see"; +" . */"; +""]; + patterns (); diff --git a/gcc-4.9/gcc/config/arm/arm-modes.def b/gcc-4.9/gcc/config/arm/arm-modes.def new file mode 100644 index 000000000..882aa55c5 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-modes.def @@ -0,0 +1,84 @@ +/* Definitions of target machine for GNU compiler, for ARM. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) + and Martin Simmons (@harleqn.co.uk). + More major hacks by Richard Earnshaw (rearnsha@arm.com) + Minor hacks by Nick Clifton (nickc@cygnus.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Extended precision floating point. + FIXME What format is this? */ +FLOAT_MODE (XF, 12, 0); + +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + ? &arm_half_format : &ieee_half_format)); + +/* CCFPEmode should be used with floating inequalities, + CCFPmode should be used with floating equalities. + CC_NOOVmode should be used with SImode integer equalities. + CC_Zmode should be used if only the Z flag is set correctly + CC_Cmode should be used if only the C flag is set correctly, after an + addition. + CC_Nmode should be used if only the N (sign) flag is set correctly + CC_CZmode should be used if only the C and Z flags are correct + (used for DImode unsigned comparisons). + CC_NCVmode should be used if only the N, C, and V flags are correct + (used for DImode signed comparisons). + CCmode should be used otherwise. */ + +CC_MODE (CC_NOOV); +CC_MODE (CC_Z); +CC_MODE (CC_CZ); +CC_MODE (CC_NCV); +CC_MODE (CC_SWP); +CC_MODE (CCFP); +CC_MODE (CCFPE); +CC_MODE (CC_DNE); +CC_MODE (CC_DEQ); +CC_MODE (CC_DLE); +CC_MODE (CC_DLT); +CC_MODE (CC_DGE); +CC_MODE (CC_DGT); +CC_MODE (CC_DLEU); +CC_MODE (CC_DLTU); +CC_MODE (CC_DGEU); +CC_MODE (CC_DGTU); +CC_MODE (CC_C); +CC_MODE (CC_N); + +/* Vector modes. */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ +VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ +VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ + +/* Fraction and accumulator vector modes. */ +VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ +VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ +VECTOR_MODES (ACCUM, 4); /* V2HA */ +VECTOR_MODES (UACCUM, 4); /* V2UHA */ + +/* Opaque integer modes for 3, 4, 6 or 8 Neon double registers (2 is + TImode). */ +INT_MODE (EI, 24); +INT_MODE (OI, 32); +INT_MODE (CI, 48); +INT_MODE (XI, 64); diff --git a/gcc-4.9/gcc/config/arm/arm-opts.h b/gcc-4.9/gcc/config/arm/arm-opts.h new file mode 100644 index 000000000..a8393975a --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-opts.h @@ -0,0 +1,75 @@ +/* Definitions for option handling for ARM. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef ARM_OPTS_H +#define ARM_OPTS_H + +/* The various ARM cores. */ +enum processor_type +{ +#undef ARM_CORE +#define ARM_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \ + INTERNAL_IDENT, +#include "arm-cores.def" +#undef ARM_CORE + /* Used to indicate that no processor has been specified. */ + arm_none +}; + +/* Which __fp16 format to use. + The enumeration values correspond to the numbering for the + Tag_ABI_FP_16bit_format attribute. + */ +enum arm_fp16_format_type +{ + ARM_FP16_FORMAT_NONE = 0, + ARM_FP16_FORMAT_IEEE = 1, + ARM_FP16_FORMAT_ALTERNATIVE = 2 +}; + +/* Which ABI to use. */ +enum arm_abi_type +{ + ARM_ABI_APCS, + ARM_ABI_ATPCS, + ARM_ABI_AAPCS, + ARM_ABI_IWMMXT, + ARM_ABI_AAPCS_LINUX +}; + +enum float_abi_type +{ + ARM_FLOAT_ABI_SOFT, + ARM_FLOAT_ABI_SOFTFP, + ARM_FLOAT_ABI_HARD +}; + +/* Which thread pointer access sequence to use. */ +enum arm_tp_type { + TP_AUTO, + TP_SOFT, + TP_CP15 +}; + +/* Which TLS scheme to use. */ +enum arm_tls_type { + TLS_GNU, + TLS_GNU2 +}; +#endif diff --git a/gcc-4.9/gcc/config/arm/arm-protos.h b/gcc-4.9/gcc/config/arm/arm-protos.h new file mode 100644 index 000000000..13874ee6e --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-protos.h @@ -0,0 +1,297 @@ +/* Prototypes for exported functions defined in arm.c and pe.c + Copyright (C) 1999-2014 Free Software Foundation, Inc. + Contributed by Richard Earnshaw (rearnsha@arm.com) + Minor hacks by Nick Clifton (nickc@cygnus.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_ARM_PROTOS_H +#define GCC_ARM_PROTOS_H + +extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *); +extern int use_return_insn (int, rtx); +extern bool use_simple_return_p (void); +extern enum reg_class arm_regno_class (int); +extern void arm_load_pic_register (unsigned long); +extern int arm_volatile_func (void); +extern void arm_expand_prologue (void); +extern void arm_expand_epilogue (bool); +extern void thumb2_expand_return (bool); +extern const char *arm_strip_name_encoding (const char *); +extern void arm_asm_output_labelref (FILE *, const char *); +extern void thumb2_asm_output_opcode (FILE *); +extern unsigned long arm_current_func_type (void); +extern HOST_WIDE_INT arm_compute_initial_elimination_offset (unsigned int, + unsigned int); +extern HOST_WIDE_INT thumb_compute_initial_elimination_offset (unsigned int, + unsigned int); +extern unsigned int arm_dbx_register_number (unsigned int); +extern void arm_output_fn_unwind (FILE *, bool); + + +#ifdef RTX_CODE +extern bool arm_vector_mode_supported_p (enum machine_mode); +extern bool arm_small_register_classes_for_mode_p (enum machine_mode); +extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode); +extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode); +extern int const_ok_for_arm (HOST_WIDE_INT); +extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); +extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code); +extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx, + HOST_WIDE_INT, rtx, rtx, int); +extern int legitimate_pic_operand_p (rtx); +extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); +extern rtx legitimize_tls_address (rtx, rtx); +extern int arm_legitimate_address_outer_p (enum machine_mode, rtx, RTX_CODE, int); +extern int thumb_legitimate_offset_p (enum machine_mode, HOST_WIDE_INT); +extern bool arm_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); +extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int, + int); +extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); +extern bool ldm_stm_operation_p (rtx, bool, enum machine_mode mode, + bool, bool); +extern int arm_const_double_rtx (rtx); +extern int vfp3_const_double_rtx (rtx); +extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); +extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, + int *); +extern int neon_immediate_valid_for_shift (rtx, enum machine_mode, rtx *, + int *, bool); +extern char *neon_output_logic_immediate (const char *, rtx *, + enum machine_mode, int, int); +extern char *neon_output_shift_immediate (const char *, char, rtx *, + enum machine_mode, int, bool); +extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode, + rtx (*) (rtx, rtx, rtx)); +extern rtx neon_make_constant (rtx); +extern tree arm_builtin_vectorized_function (tree, tree, tree); +extern void neon_expand_vector_init (rtx, rtx); +extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern void neon_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern HOST_WIDE_INT neon_element_bits (enum machine_mode); +extern void neon_reinterpret (rtx, rtx); +extern void neon_emit_pair_result_insn (enum machine_mode, + rtx (*) (rtx, rtx, rtx, rtx), + rtx, rtx, rtx); +extern void neon_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int); +extern void neon_split_vcombine (rtx op[3]); +extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx, + bool); +extern bool arm_tls_referenced_p (rtx); + +extern int arm_coproc_mem_operand (rtx, bool); +extern int neon_vector_mem_operand (rtx, int, bool); +extern int neon_struct_mem_operand (rtx); + +extern int tls_mentioned_p (rtx); +extern int symbol_mentioned_p (rtx); +extern int label_mentioned_p (rtx); +extern RTX_CODE minmax_code (rtx); +extern bool arm_sat_operator_match (rtx, rtx, int *, bool *); +extern int adjacent_mem_locations (rtx, rtx); +extern bool gen_ldm_seq (rtx *, int, bool); +extern bool gen_stm_seq (rtx *, int); +extern bool gen_const_stm_seq (rtx *, int); +extern rtx arm_gen_load_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); +extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); +extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); +extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); +extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); +extern int arm_gen_movmemqi (rtx *); +extern bool gen_movmem_ldrd_strd (rtx *); +extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); +extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, + HOST_WIDE_INT); +extern rtx arm_gen_compare_reg (RTX_CODE, rtx, rtx, rtx); +extern rtx arm_gen_return_addr_mask (void); +extern void arm_reload_in_hi (rtx *); +extern void arm_reload_out_hi (rtx *); +extern int arm_max_const_double_inline_cost (void); +extern int arm_const_double_inline_cost (rtx); +extern bool arm_const_double_by_parts (rtx); +extern bool arm_const_double_by_immediates (rtx); +extern const char *fp_immediate_constant (rtx); +extern void arm_emit_call_insn (rtx, rtx); +extern const char *output_call (rtx *); +extern const char *output_call_mem (rtx *); +void arm_emit_movpair (rtx, rtx); +extern const char *output_mov_long_double_arm_from_arm (rtx *); +extern const char *output_move_double (rtx *, bool, int *count); +extern const char *output_move_quad (rtx *); +extern int arm_count_output_move_double_insns (rtx *); +extern const char *output_move_vfp (rtx *operands); +extern const char *output_move_neon (rtx *operands); +extern int arm_attr_length_move_neon (rtx); +extern int arm_address_offset_is_imm (rtx); +extern const char *output_add_immediate (rtx *); +extern const char *arithmetic_instr (rtx, int); +extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int); +extern const char *output_return_instruction (rtx, bool, bool, bool); +extern void arm_poke_function_name (FILE *, const char *); +extern void arm_final_prescan_insn (rtx); +extern int arm_debugger_arg_offset (int, rtx); +extern bool arm_is_long_call_p (tree); +extern int arm_emit_vector_const (FILE *, rtx); +extern void arm_emit_fp16_const (rtx c); +extern const char * arm_output_load_gr (rtx *); +extern const char *vfp_output_fstmd (rtx *); +extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool); +extern void arm_set_return_address (rtx, rtx); +extern int arm_eliminable_register (rtx); +extern const char *arm_output_shift(rtx *, int); +extern const char *arm_output_iwmmxt_shift_immediate (const char *, rtx *, bool); +extern const char *arm_output_iwmmxt_tinsr (rtx *); +extern unsigned int arm_sync_loop_insns (rtx , rtx *); +extern int arm_attr_length_push_multi(rtx, rtx); +extern void arm_expand_compare_and_swap (rtx op[]); +extern void arm_split_compare_and_swap (rtx op[]); +extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); +extern rtx arm_load_tp (rtx); + +#if defined TREE_CODE +extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); +extern bool arm_pad_arg_upward (enum machine_mode, const_tree); +extern bool arm_pad_reg_upward (enum machine_mode, tree, int); +#endif +extern int arm_apply_result_size (void); + +#endif /* RTX_CODE */ + +/* Thumb functions. */ +extern void arm_init_expanders (void); +extern const char *thumb1_unexpanded_epilogue (void); +extern void thumb1_expand_prologue (void); +extern void thumb1_expand_epilogue (void); +extern const char *thumb1_output_interwork (void); +#ifdef TREE_CODE +extern int is_called_in_ARM_mode (tree); +#endif +extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); +#ifdef RTX_CODE +extern enum arm_cond_code maybe_get_arm_condition_code (rtx); +extern void thumb1_final_prescan_insn (rtx); +extern void thumb2_final_prescan_insn (rtx); +extern const char *thumb_load_double_from_address (rtx *); +extern const char *thumb_output_move_mem_multiple (int, rtx *); +extern const char *thumb_call_via_reg (rtx); +extern void thumb_expand_movmemqi (rtx *); +extern rtx arm_return_addr (int, rtx); +extern void thumb_reload_out_hi (rtx *); +extern void thumb_reload_in_hi (rtx *); +extern void thumb_set_return_address (rtx, rtx); +extern const char *thumb1_output_casesi (rtx *); +extern const char *thumb2_output_casesi (rtx *); +#endif + +/* Defined in pe.c. */ +extern int arm_dllexport_name_p (const char *); +extern int arm_dllimport_name_p (const char *); + +#ifdef TREE_CODE +extern void arm_pe_unique_section (tree, int); +extern void arm_pe_encode_section_info (tree, rtx, int); +extern int arm_dllexport_p (tree); +extern int arm_dllimport_p (tree); +extern void arm_mark_dllexport (tree); +extern void arm_mark_dllimport (tree); +#endif + +extern void arm_pr_long_calls (struct cpp_reader *); +extern void arm_pr_no_long_calls (struct cpp_reader *); +extern void arm_pr_long_calls_off (struct cpp_reader *); + +extern void arm_lang_object_attributes_init(void); + +extern const char *arm_mangle_type (const_tree); + +extern void arm_order_regs_for_local_alloc (void); + +extern int arm_max_conditional_execute (); + +/* Vectorizer cost model implementation. */ +struct cpu_vec_costs { + const int scalar_stmt_cost; /* Cost of any scalar operation, excluding + load and store. */ + const int scalar_load_cost; /* Cost of scalar load. */ + const int scalar_store_cost; /* Cost of scalar store. */ + const int vec_stmt_cost; /* Cost of any vector operation, excluding + load, store, vector-to-scalar and + scalar-to-vector operation. */ + const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ + const int vec_align_load_cost; /* Cost of aligned vector load. */ + const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ + const int vec_unalign_store_cost; /* Cost of unaligned vector load. */ + const int vec_store_cost; /* Cost of vector store. */ + const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer + cost model. */ + const int cond_not_taken_branch_cost;/* Cost of not taken branch for + vectorizer cost model. */ +}; + +#ifdef RTX_CODE +/* This needs to be here because we need RTX_CODE and similar. */ + +struct cpu_cost_table; + +struct tune_params +{ + bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); + const struct cpu_cost_table *insn_extra_cost; + bool (*sched_adjust_cost) (rtx, rtx, rtx, int *); + int constant_limit; + /* Maximum number of instructions to conditionalise. */ + int max_insns_skipped; + int num_prefetch_slots; + int l1_cache_size; + int l1_cache_line_size; + bool prefer_constant_pool; + int (*branch_cost) (bool, bool); + /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM. */ + bool prefer_ldrd_strd; + /* The preference for non short cirtcuit operation when optimizing for + performance. The first element covers Thumb state and the second one + is for ARM state. */ + bool logical_op_non_short_circuit[2]; + /* Vectorizer costs. */ + const struct cpu_vec_costs* vec_costs; + /* Prefer Neon for 64-bit bitops. */ + bool prefer_neon_for_64bits; +}; + +extern const struct tune_params *current_tune; +extern int vfp3_const_double_for_fract_bits (rtx); +/* return power of two from operand, otherwise 0. */ +extern int vfp3_const_double_for_bits (rtx); + +extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, + rtx); +extern bool arm_validize_comparison (rtx *, rtx *, rtx *); +#endif /* RTX_CODE */ + +extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); +extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); + +extern bool arm_autoinc_modes_ok_p (enum machine_mode, enum arm_auto_incmodes); + +extern void arm_emit_eabi_attribute (const char *, int, int); + +/* Defined in gcc/common/config/arm-common.c. */ +extern const char *arm_rewrite_selected_cpu (const char *name); + +#endif /* ! GCC_ARM_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/arm/arm-tables.opt b/gcc-4.9/gcc/config/arm/arm-tables.opt new file mode 100644 index 000000000..bc046a0de --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-tables.opt @@ -0,0 +1,439 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from arm-cores.def, arm-arches.def +; and arm-fpus.def. + +; Copyright (C) 2011-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +Enum +Name(processor_type) Type(enum processor_type) +Known ARM CPUs (for use with the -mcpu= and -mtune= options): + +EnumValue +Enum(processor_type) String(arm2) Value(arm2) + +EnumValue +Enum(processor_type) String(arm250) Value(arm250) + +EnumValue +Enum(processor_type) String(arm3) Value(arm3) + +EnumValue +Enum(processor_type) String(arm6) Value(arm6) + +EnumValue +Enum(processor_type) String(arm60) Value(arm60) + +EnumValue +Enum(processor_type) String(arm600) Value(arm600) + +EnumValue +Enum(processor_type) String(arm610) Value(arm610) + +EnumValue +Enum(processor_type) String(arm620) Value(arm620) + +EnumValue +Enum(processor_type) String(arm7) Value(arm7) + +EnumValue +Enum(processor_type) String(arm7d) Value(arm7d) + +EnumValue +Enum(processor_type) String(arm7di) Value(arm7di) + +EnumValue +Enum(processor_type) String(arm70) Value(arm70) + +EnumValue +Enum(processor_type) String(arm700) Value(arm700) + +EnumValue +Enum(processor_type) String(arm700i) Value(arm700i) + +EnumValue +Enum(processor_type) String(arm710) Value(arm710) + +EnumValue +Enum(processor_type) String(arm720) Value(arm720) + +EnumValue +Enum(processor_type) String(arm710c) Value(arm710c) + +EnumValue +Enum(processor_type) String(arm7100) Value(arm7100) + +EnumValue +Enum(processor_type) String(arm7500) Value(arm7500) + +EnumValue +Enum(processor_type) String(arm7500fe) Value(arm7500fe) + +EnumValue +Enum(processor_type) String(arm7m) Value(arm7m) + +EnumValue +Enum(processor_type) String(arm7dm) Value(arm7dm) + +EnumValue +Enum(processor_type) String(arm7dmi) Value(arm7dmi) + +EnumValue +Enum(processor_type) String(arm8) Value(arm8) + +EnumValue +Enum(processor_type) String(arm810) Value(arm810) + +EnumValue +Enum(processor_type) String(strongarm) Value(strongarm) + +EnumValue +Enum(processor_type) String(strongarm110) Value(strongarm110) + +EnumValue +Enum(processor_type) String(strongarm1100) Value(strongarm1100) + +EnumValue +Enum(processor_type) String(strongarm1110) Value(strongarm1110) + +EnumValue +Enum(processor_type) String(fa526) Value(fa526) + +EnumValue +Enum(processor_type) String(fa626) Value(fa626) + +EnumValue +Enum(processor_type) String(arm7tdmi) Value(arm7tdmi) + +EnumValue +Enum(processor_type) String(arm7tdmi-s) Value(arm7tdmis) + +EnumValue +Enum(processor_type) String(arm710t) Value(arm710t) + +EnumValue +Enum(processor_type) String(arm720t) Value(arm720t) + +EnumValue +Enum(processor_type) String(arm740t) Value(arm740t) + +EnumValue +Enum(processor_type) String(arm9) Value(arm9) + +EnumValue +Enum(processor_type) String(arm9tdmi) Value(arm9tdmi) + +EnumValue +Enum(processor_type) String(arm920) Value(arm920) + +EnumValue +Enum(processor_type) String(arm920t) Value(arm920t) + +EnumValue +Enum(processor_type) String(arm922t) Value(arm922t) + +EnumValue +Enum(processor_type) String(arm940t) Value(arm940t) + +EnumValue +Enum(processor_type) String(ep9312) Value(ep9312) + +EnumValue +Enum(processor_type) String(arm10tdmi) Value(arm10tdmi) + +EnumValue +Enum(processor_type) String(arm1020t) Value(arm1020t) + +EnumValue +Enum(processor_type) String(arm9e) Value(arm9e) + +EnumValue +Enum(processor_type) String(arm946e-s) Value(arm946es) + +EnumValue +Enum(processor_type) String(arm966e-s) Value(arm966es) + +EnumValue +Enum(processor_type) String(arm968e-s) Value(arm968es) + +EnumValue +Enum(processor_type) String(arm10e) Value(arm10e) + +EnumValue +Enum(processor_type) String(arm1020e) Value(arm1020e) + +EnumValue +Enum(processor_type) String(arm1022e) Value(arm1022e) + +EnumValue +Enum(processor_type) String(xscale) Value(xscale) + +EnumValue +Enum(processor_type) String(iwmmxt) Value(iwmmxt) + +EnumValue +Enum(processor_type) String(iwmmxt2) Value(iwmmxt2) + +EnumValue +Enum(processor_type) String(fa606te) Value(fa606te) + +EnumValue +Enum(processor_type) String(fa626te) Value(fa626te) + +EnumValue +Enum(processor_type) String(fmp626) Value(fmp626) + +EnumValue +Enum(processor_type) String(fa726te) Value(fa726te) + +EnumValue +Enum(processor_type) String(arm926ej-s) Value(arm926ejs) + +EnumValue +Enum(processor_type) String(arm1026ej-s) Value(arm1026ejs) + +EnumValue +Enum(processor_type) String(arm1136j-s) Value(arm1136js) + +EnumValue +Enum(processor_type) String(arm1136jf-s) Value(arm1136jfs) + +EnumValue +Enum(processor_type) String(arm1176jz-s) Value(arm1176jzs) + +EnumValue +Enum(processor_type) String(arm1176jzf-s) Value(arm1176jzfs) + +EnumValue +Enum(processor_type) String(mpcorenovfp) Value(mpcorenovfp) + +EnumValue +Enum(processor_type) String(mpcore) Value(mpcore) + +EnumValue +Enum(processor_type) String(arm1156t2-s) Value(arm1156t2s) + +EnumValue +Enum(processor_type) String(arm1156t2f-s) Value(arm1156t2fs) + +EnumValue +Enum(processor_type) String(cortex-m1) Value(cortexm1) + +EnumValue +Enum(processor_type) String(cortex-m0) Value(cortexm0) + +EnumValue +Enum(processor_type) String(cortex-m0plus) Value(cortexm0plus) + +EnumValue +Enum(processor_type) String(generic-armv7-a) Value(genericv7a) + +EnumValue +Enum(processor_type) String(cortex-a5) Value(cortexa5) + +EnumValue +Enum(processor_type) String(cortex-a7) Value(cortexa7) + +EnumValue +Enum(processor_type) String(cortex-a8) Value(cortexa8) + +EnumValue +Enum(processor_type) String(cortex-a9) Value(cortexa9) + +EnumValue +Enum(processor_type) String(cortex-a12) Value(cortexa12) + +EnumValue +Enum(processor_type) String(cortex-a15) Value(cortexa15) + +EnumValue +Enum(processor_type) String(cortex-r4) Value(cortexr4) + +EnumValue +Enum(processor_type) String(cortex-r4f) Value(cortexr4f) + +EnumValue +Enum(processor_type) String(cortex-r5) Value(cortexr5) + +EnumValue +Enum(processor_type) String(cortex-r7) Value(cortexr7) + +EnumValue +Enum(processor_type) String(cortex-m4) Value(cortexm4) + +EnumValue +Enum(processor_type) String(cortex-m3) Value(cortexm3) + +EnumValue +Enum(processor_type) String(marvell-pj4) Value(marvell_pj4) + +EnumValue +Enum(processor_type) String(cortex-a15.cortex-a7) Value(cortexa15cortexa7) + +EnumValue +Enum(processor_type) String(cortex-a53) Value(cortexa53) + +EnumValue +Enum(processor_type) String(cortex-a57) Value(cortexa57) + +EnumValue +Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53) + +Enum +Name(arm_arch) Type(int) +Known ARM architectures (for use with the -march= option): + +EnumValue +Enum(arm_arch) String(armv2) Value(0) + +EnumValue +Enum(arm_arch) String(armv2a) Value(1) + +EnumValue +Enum(arm_arch) String(armv3) Value(2) + +EnumValue +Enum(arm_arch) String(armv3m) Value(3) + +EnumValue +Enum(arm_arch) String(armv4) Value(4) + +EnumValue +Enum(arm_arch) String(armv4t) Value(5) + +EnumValue +Enum(arm_arch) String(armv5) Value(6) + +EnumValue +Enum(arm_arch) String(armv5t) Value(7) + +EnumValue +Enum(arm_arch) String(armv5e) Value(8) + +EnumValue +Enum(arm_arch) String(armv5te) Value(9) + +EnumValue +Enum(arm_arch) String(armv6) Value(10) + +EnumValue +Enum(arm_arch) String(armv6j) Value(11) + +EnumValue +Enum(arm_arch) String(armv6k) Value(12) + +EnumValue +Enum(arm_arch) String(armv6z) Value(13) + +EnumValue +Enum(arm_arch) String(armv6zk) Value(14) + +EnumValue +Enum(arm_arch) String(armv6t2) Value(15) + +EnumValue +Enum(arm_arch) String(armv6-m) Value(16) + +EnumValue +Enum(arm_arch) String(armv6s-m) Value(17) + +EnumValue +Enum(arm_arch) String(armv7) Value(18) + +EnumValue +Enum(arm_arch) String(armv7-a) Value(19) + +EnumValue +Enum(arm_arch) String(armv7ve) Value(20) + +EnumValue +Enum(arm_arch) String(armv7-r) Value(21) + +EnumValue +Enum(arm_arch) String(armv7-m) Value(22) + +EnumValue +Enum(arm_arch) String(armv7e-m) Value(23) + +EnumValue +Enum(arm_arch) String(armv8-a) Value(24) + +EnumValue +Enum(arm_arch) String(armv8-a+crc) Value(25) + +EnumValue +Enum(arm_arch) String(iwmmxt) Value(26) + +EnumValue +Enum(arm_arch) String(iwmmxt2) Value(27) + +Enum +Name(arm_fpu) Type(int) +Known ARM FPUs (for use with the -mfpu= option): + +EnumValue +Enum(arm_fpu) String(vfp) Value(0) + +EnumValue +Enum(arm_fpu) String(vfpv3) Value(1) + +EnumValue +Enum(arm_fpu) String(vfpv3-fp16) Value(2) + +EnumValue +Enum(arm_fpu) String(vfpv3-d16) Value(3) + +EnumValue +Enum(arm_fpu) String(vfpv3-d16-fp16) Value(4) + +EnumValue +Enum(arm_fpu) String(vfpv3xd) Value(5) + +EnumValue +Enum(arm_fpu) String(vfpv3xd-fp16) Value(6) + +EnumValue +Enum(arm_fpu) String(neon) Value(7) + +EnumValue +Enum(arm_fpu) String(neon-fp16) Value(8) + +EnumValue +Enum(arm_fpu) String(vfpv4) Value(9) + +EnumValue +Enum(arm_fpu) String(vfpv4-d16) Value(10) + +EnumValue +Enum(arm_fpu) String(fpv4-sp-d16) Value(11) + +EnumValue +Enum(arm_fpu) String(neon-vfpv4) Value(12) + +EnumValue +Enum(arm_fpu) String(fp-armv8) Value(13) + +EnumValue +Enum(arm_fpu) String(neon-fp-armv8) Value(14) + +EnumValue +Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(15) + +EnumValue +Enum(arm_fpu) String(vfp3) Value(16) + diff --git a/gcc-4.9/gcc/config/arm/arm-tune.md b/gcc-4.9/gcc/config/arm/arm-tune.md new file mode 100644 index 000000000..954cab8ef --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm-tune.md @@ -0,0 +1,34 @@ +;; -*- buffer-read-only: t -*- +;; Generated automatically by gentune.sh from arm-cores.def +(define_attr "tune" + "arm2,arm250,arm3, + arm6,arm60,arm600, + arm610,arm620,arm7, + arm7d,arm7di,arm70, + arm700,arm700i,arm710, + arm720,arm710c,arm7100, + arm7500,arm7500fe,arm7m, + arm7dm,arm7dmi,arm8, + arm810,strongarm,strongarm110, + strongarm1100,strongarm1110,fa526, + fa626,arm7tdmi,arm7tdmis, + arm710t,arm720t,arm740t, + arm9,arm9tdmi,arm920, + arm920t,arm922t,arm940t, + ep9312,arm10tdmi,arm1020t, + arm9e,arm946es,arm966es, + arm968es,arm10e,arm1020e, + arm1022e,xscale,iwmmxt, + iwmmxt2,fa606te,fa626te, + fmp626,fa726te,arm926ejs, + arm1026ejs,arm1136js,arm1136jfs, + arm1176jzs,arm1176jzfs,mpcorenovfp, + mpcore,arm1156t2s,arm1156t2fs, + cortexm1,cortexm0,cortexm0plus, + genericv7a,cortexa5,cortexa7, + cortexa8,cortexa9,cortexa12, + cortexa15,cortexr4,cortexr4f, + cortexr5,cortexr7,cortexm4, + cortexm3,marvell_pj4,cortexa15cortexa7, + cortexa53,cortexa57,cortexa57cortexa53" + (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c new file mode 100644 index 000000000..0240cc70e --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm.c @@ -0,0 +1,31119 @@ +/* Output routines for GCC for ARM. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) + and Martin Simmons (@harleqn.co.uk). + More major hacks by Richard Earnshaw (rearnsha@arm.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "hash-table.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "stringpool.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "obstack.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "reload.h" +#include "function.h" +#include "expr.h" +#include "optabs.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "cgraph.h" +#include "ggc.h" +#include "except.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "debug.h" +#include "langhooks.h" +#include "df.h" +#include "intl.h" +#include "libfuncs.h" +#include "params.h" +#include "opts.h" +#include "dumpfile.h" + +/* Forward definitions of types. */ +typedef struct minipool_node Mnode; +typedef struct minipool_fixup Mfix; + +void (*arm_lang_output_object_attributes_hook)(void); + +struct four_ints +{ + int i[4]; +}; + +/* Forward function declarations. */ +static bool arm_lra_p (void); +static bool arm_needs_doubleword_align (enum machine_mode, const_tree); +static int arm_compute_static_chain_stack_bytes (void); +static arm_stack_offsets *arm_get_frame_offsets (void); +static void arm_add_gc_roots (void); +static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, + HOST_WIDE_INT, rtx, rtx, int, int); +static unsigned bit_count (unsigned long); +static int arm_address_register_rtx_p (rtx, int); +static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int); +static int thumb2_legitimate_index_p (enum machine_mode, rtx, int); +static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int); +static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); +static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); +static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); +inline static int thumb1_index_register_rtx_p (rtx, int); +static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); +static int thumb_far_jump_used_p (void); +static bool thumb_force_lr_save (void); +static unsigned arm_size_return_regs (void); +static bool arm_assemble_integer (rtx, unsigned int, int); +static void arm_print_operand (FILE *, rtx, int); +static void arm_print_operand_address (FILE *, rtx); +static bool arm_print_operand_punct_valid_p (unsigned char code); +static const char *fp_const_from_val (REAL_VALUE_TYPE *); +static arm_cc get_arm_condition_code (rtx); +static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); +static const char *output_multi_immediate (rtx *, const char *, const char *, + int, HOST_WIDE_INT); +static const char *shift_op (rtx, HOST_WIDE_INT *); +static struct machine_function *arm_init_machine_status (void); +static void thumb_exit (FILE *, int); +static HOST_WIDE_INT get_jump_table_size (rtx); +static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT); +static Mnode *add_minipool_forward_ref (Mfix *); +static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT); +static Mnode *add_minipool_backward_ref (Mfix *); +static void assign_minipool_offsets (Mfix *); +static void arm_print_value (FILE *, rtx); +static void dump_minipool (rtx); +static int arm_barrier_cost (rtx); +static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT); +static void push_minipool_barrier (rtx, HOST_WIDE_INT); +static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode, + rtx); +static void arm_reorg (void); +static void note_invalid_constants (rtx, HOST_WIDE_INT, int); +static unsigned long arm_compute_save_reg0_reg12_mask (void); +static unsigned long arm_compute_save_reg_mask (void); +static unsigned long arm_isr_value (tree); +static unsigned long arm_compute_func_type (void); +static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); +static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); +static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); +#endif +static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); +static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); +static int arm_comp_type_attributes (const_tree, const_tree); +static void arm_set_default_type_attributes (tree); +static int arm_adjust_cost (rtx, rtx, rtx, int); +static int arm_sched_reorder (FILE *, int, rtx *, int *, int); +static int optimal_immediate_sequence (enum rtx_code code, + unsigned HOST_WIDE_INT val, + struct four_ints *return_sequence); +static int optimal_immediate_sequence_1 (enum rtx_code code, + unsigned HOST_WIDE_INT val, + struct four_ints *return_sequence, + int i); +static int arm_get_strip_length (int); +static bool arm_function_ok_for_sibcall (tree, tree); +static enum machine_mode arm_promote_function_mode (const_tree, + enum machine_mode, int *, + const_tree, int); +static bool arm_return_in_memory (const_tree, const_tree); +static rtx arm_function_value (const_tree, const_tree, bool); +static rtx arm_libcall_value_1 (enum machine_mode); +static rtx arm_libcall_value (enum machine_mode, const_rtx); +static bool arm_function_value_regno_p (const unsigned int); +static void arm_internal_label (FILE *, const char *, unsigned long); +static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, + tree); +static bool arm_have_conditional_execution (void); +static bool arm_cannot_force_const_mem (enum machine_mode, rtx); +static bool arm_legitimate_constant_p (enum machine_mode, rtx); +static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); +static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); +static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); +static bool arm_rtx_costs (rtx, int, int, int, int *, bool); +static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool); +static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); +static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool); +static void arm_init_builtins (void); +static void arm_init_iwmmxt_builtins (void); +static rtx safe_vector_operand (rtx, enum machine_mode); +static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); +static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); +static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); +static tree arm_builtin_decl (unsigned, bool); +static void emit_constant_insn (rtx cond, rtx pattern); +static rtx emit_set_insn (rtx, rtx); +static rtx emit_multi_reg_push (unsigned long, unsigned long); +static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode, + tree, bool); +static rtx arm_function_arg (cumulative_args_t, enum machine_mode, + const_tree, bool); +static void arm_function_arg_advance (cumulative_args_t, enum machine_mode, + const_tree, bool); +static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree); +static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, + const_tree); +static rtx aapcs_libcall_value (enum machine_mode); +static int aapcs_select_return_coproc (const_tree, const_tree); + +#ifdef OBJECT_FORMAT_ELF +static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; +static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; +#endif +#ifndef ARM_PE +static void arm_encode_section_info (tree, rtx, int); +#endif + +static void arm_file_end (void); +static void arm_file_start (void); + +static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode, + tree, int *, int); +static bool arm_pass_by_reference (cumulative_args_t, + enum machine_mode, const_tree, bool); +static bool arm_promote_prototypes (const_tree); +static bool arm_default_short_enums (void); +static bool arm_align_anon_bitfield (void); +static bool arm_return_in_msb (const_tree); +static bool arm_must_pass_in_stack (enum machine_mode, const_tree); +static bool arm_return_in_memory (const_tree, const_tree); +#if ARM_UNWIND_INFO +static void arm_unwind_emit (FILE *, rtx); +static bool arm_output_ttype (rtx); +static void arm_asm_emit_except_personality (rtx); +static void arm_asm_init_sections (void); +#endif +static rtx arm_dwarf_register_span (rtx); + +static tree arm_cxx_guard_type (void); +static bool arm_cxx_guard_mask_bit (void); +static tree arm_get_cookie_size (tree); +static bool arm_cookie_has_size (void); +static bool arm_cxx_cdtor_returns_this (void); +static bool arm_cxx_key_method_may_be_inline (void); +static void arm_cxx_determine_class_data_visibility (tree); +static bool arm_cxx_class_data_always_comdat (void); +static bool arm_cxx_use_aeabi_atexit (void); +static void arm_init_libfuncs (void); +static tree arm_build_builtin_va_list (void); +static void arm_expand_builtin_va_start (tree, rtx); +static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); +static void arm_option_override (void); +static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); +static bool arm_cannot_copy_insn_p (rtx); +static int arm_issue_rate (void); +static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; +static bool arm_output_addr_const_extra (FILE *, rtx); +static bool arm_allocate_stack_slots_for_args (void); +static bool arm_warn_func_return (tree); +static const char *arm_invalid_parameter_type (const_tree t); +static const char *arm_invalid_return_type (const_tree t); +static tree arm_promoted_type (const_tree t); +static tree arm_convert_to_type (tree type, tree expr); +static bool arm_scalar_mode_supported_p (enum machine_mode); +static bool arm_frame_pointer_required (void); +static bool arm_can_eliminate (const int, const int); +static void arm_asm_trampoline_template (FILE *); +static void arm_trampoline_init (rtx, tree, rtx); +static rtx arm_trampoline_adjust_address (rtx); +static rtx arm_pic_static_addr (rtx orig, rtx reg); +static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); +static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); +static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *); +static bool arm_array_mode_supported_p (enum machine_mode, + unsigned HOST_WIDE_INT); +static enum machine_mode arm_preferred_simd_mode (enum machine_mode); +static bool arm_class_likely_spilled_p (reg_class_t); +static HOST_WIDE_INT arm_vector_alignment (const_tree type); +static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); +static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, + const_tree type, + int misalignment, + bool is_packed); +static void arm_conditional_register_usage (void); +static reg_class_t arm_preferred_rename_class (reg_class_t rclass); +static unsigned int arm_autovectorize_vector_sizes (void); +static int arm_default_branch_cost (bool, bool); +static int arm_cortex_a5_branch_cost (bool, bool); +static int arm_cortex_m_branch_cost (bool, bool); + +static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel); + +static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED); +static unsigned arm_add_stmt_cost (void *data, int count, + enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, + int misalign, + enum vect_cost_model_location where); + +static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value); +static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); + +/* Table of machine attributes. */ +static const struct attribute_spec arm_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + /* Function calls made to this symbol must be done indirectly, because + it may lie outside of the 26 bit addressing range of a normal function + call. */ + { "long_call", 0, 0, false, true, true, NULL, false }, + /* Whereas these functions are always known to reside within the 26 bit + addressing range. */ + { "short_call", 0, 0, false, true, true, NULL, false }, + /* Specify the procedure call conventions for a function. */ + { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute, + false }, + /* Interrupt Service Routines have special prologue and epilogue requirements. */ + { "isr", 0, 1, false, false, false, arm_handle_isr_attribute, + false }, + { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute, + false }, + { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute, + false }, +#ifdef ARM_PE + /* ARM/PE has three new attributes: + interfacearm - ? + dllexport - for exporting a function/variable that will live in a dll + dllimport - for importing a function/variable from a dll + + Microsoft allows multiple declspecs in one __declspec, separating + them with spaces. We do NOT support this. Instead, use __declspec + multiple times. + */ + { "dllimport", 0, 0, true, false, false, NULL, false }, + { "dllexport", 0, 0, true, false, false, NULL, false }, + { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute, + false }, +#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES + { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, + { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, + { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute, + false }, +#endif + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* Initialize the GCC target structure. */ +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +#undef TARGET_MERGE_DECL_ATTRIBUTES +#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes +#endif + +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address + +#undef TARGET_LRA_P +#define TARGET_LRA_P arm_lra_p + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE arm_attribute_table + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START arm_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END arm_file_end + +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP NULL +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER arm_assemble_integer + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND arm_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p + +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra + +#undef TARGET_ASM_FUNCTION_PROLOGUE +#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue + +#undef TARGET_ASM_FUNCTION_EPILOGUE +#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE arm_option_override + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes + +#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES +#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST arm_adjust_cost + +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER arm_sched_reorder + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST arm_register_move_cost + +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST arm_memory_move_cost + +#undef TARGET_ENCODE_SECTION_INFO +#ifdef ARM_PE +#define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info +#else +#define TARGET_ENCODE_SECTION_INFO arm_encode_section_info +#endif + +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding + +#undef TARGET_ASM_INTERNAL_LABEL +#define TARGET_ASM_INTERNAL_LABEL arm_internal_label + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE arm_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE arm_libcall_value + +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS arm_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST arm_address_cost + +#undef TARGET_SHIFT_TRUNCATION_MASK +#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p +#undef TARGET_ARRAY_MODE_SUPPORTED_P +#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + arm_autovectorize_vector_sizes + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS arm_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN arm_expand_builtin +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL arm_builtin_decl + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS arm_init_libfuncs + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG arm_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs + +#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS +#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT arm_trampoline_init +#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address + +#undef TARGET_WARN_FUNC_RETURN +#define TARGET_WARN_FUNC_RETURN arm_warn_func_return + +#undef TARGET_DEFAULT_SHORT_ENUMS +#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums + +#undef TARGET_ALIGN_ANON_BITFIELD +#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield + +#undef TARGET_NARROW_VOLATILE_BITFIELD +#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false + +#undef TARGET_CXX_GUARD_TYPE +#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type + +#undef TARGET_CXX_GUARD_MASK_BIT +#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit + +#undef TARGET_CXX_GET_COOKIE_SIZE +#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size + +#undef TARGET_CXX_COOKIE_HAS_SIZE +#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size + +#undef TARGET_CXX_CDTOR_RETURNS_THIS +#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this + +#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE +#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline + +#undef TARGET_CXX_USE_AEABI_ATEXIT +#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit + +#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY +#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \ + arm_cxx_determine_class_data_visibility + +#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT +#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat + +#undef TARGET_RETURN_IN_MSB +#define TARGET_RETURN_IN_MSB arm_return_in_msb + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY arm_return_in_memory + +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack + +#if ARM_UNWIND_INFO +#undef TARGET_ASM_UNWIND_EMIT +#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit + +/* EABI unwinding tables use a different format for the typeinfo tables. */ +#undef TARGET_ASM_TTYPE +#define TARGET_ASM_TTYPE arm_output_ttype + +#undef TARGET_ARM_EABI_UNWINDER +#define TARGET_ARM_EABI_UNWINDER true + +#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY +#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality + +#undef TARGET_ASM_INIT_SECTIONS +#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections +#endif /* ARM_UNWIND_INFO */ + +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span + +#undef TARGET_CANNOT_COPY_INSN_P +#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif + +#undef TARGET_HAVE_CONDITIONAL_EXECUTION +#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem + +#undef TARGET_MAX_ANCHOR_OFFSET +#define TARGET_MAX_ANCHOR_OFFSET 4095 + +/* The minimum is set such that the total size of the block + for a particular anchor is -4088 + 1 + 4095 bytes, which is + divisible by eight, ensuring natural spacing of anchors. */ +#undef TARGET_MIN_ANCHOR_OFFSET +#define TARGET_MIN_ANCHOR_OFFSET -4088 + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE arm_issue_rate + +#undef TARGET_MANGLE_TYPE +#define TARGET_MANGLE_TYPE arm_mangle_type + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list +#undef TARGET_EXPAND_BUILTIN_VA_START +#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start +#undef TARGET_GIMPLIFY_VA_ARG_EXPR +#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr + +#ifdef HAVE_AS_TLS +#undef TARGET_ASM_OUTPUT_DWARF_DTPREL +#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel +#endif + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class + +#undef TARGET_INVALID_PARAMETER_TYPE +#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type + +#undef TARGET_INVALID_RETURN_TYPE +#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type + +#undef TARGET_PROMOTED_TYPE +#define TARGET_PROMOTED_TYPE arm_promoted_type + +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE arm_convert_to_type + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE arm_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage + +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p + +#undef TARGET_VECTORIZE_BUILTINS +#define TARGET_VECTORIZE_BUILTINS + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION +#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ + arm_builtin_vectorized_function + +#undef TARGET_VECTOR_ALIGNMENT +#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment + +#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ + arm_vector_alignment_reachable + +#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT +#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ + arm_builtin_support_vector_misalignment + +#undef TARGET_PREFERRED_RENAME_CLASS +#define TARGET_PREFERRED_RENAME_CLASS \ + arm_preferred_rename_class + +#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK +#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ + arm_vectorize_vec_perm_const_ok + +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + arm_builtin_vectorization_cost +#undef TARGET_VECTORIZE_ADD_STMT_COST +#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost + +#undef TARGET_CANONICALIZE_COMPARISON +#define TARGET_CANONICALIZE_COMPARISON \ + arm_canonicalize_comparison + +#undef TARGET_ASAN_SHADOW_OFFSET +#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset + +#undef MAX_INSN_PER_IT_BLOCK +#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) + +#undef TARGET_CAN_USE_DOLOOP_P +#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Obstack for minipool constant handling. */ +static struct obstack minipool_obstack; +static char * minipool_startobj; + +/* The maximum number of insns skipped which + will be conditionalised if possible. */ +static int max_insns_skipped = 5; + +extern FILE * asm_out_file; + +/* True if we are currently building a constant table. */ +int making_const_table; + +/* The processor for which instructions should be scheduled. */ +enum processor_type arm_tune = arm_none; + +/* The current tuning set. */ +const struct tune_params *current_tune; + +/* Which floating point hardware to schedule for. */ +int arm_fpu_attr; + +/* Which floating popint hardware to use. */ +const struct arm_fpu_desc *arm_fpu_desc; + +/* Used for Thumb call_via trampolines. */ +rtx thumb_call_via_label[14]; +static int thumb_call_reg_needed; + +/* Bit values used to identify processor capabilities. */ +#define FL_CO_PROC (1 << 0) /* Has external co-processor bus */ +#define FL_ARCH3M (1 << 1) /* Extended multiply */ +#define FL_MODE26 (1 << 2) /* 26-bit mode support */ +#define FL_MODE32 (1 << 3) /* 32-bit mode support */ +#define FL_ARCH4 (1 << 4) /* Architecture rel 4 */ +#define FL_ARCH5 (1 << 5) /* Architecture rel 5 */ +#define FL_THUMB (1 << 6) /* Thumb aware */ +#define FL_LDSCHED (1 << 7) /* Load scheduling necessary */ +#define FL_STRONG (1 << 8) /* StrongARM */ +#define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ +#define FL_XSCALE (1 << 10) /* XScale */ +/* spare (1 << 11) */ +#define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds + media instructions. */ +#define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */ +#define FL_WBUF (1 << 14) /* Schedule for write buffer ops. + Note: ARM6 & 7 derivatives only. */ +#define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */ +#define FL_THUMB2 (1 << 16) /* Thumb-2. */ +#define FL_NOTM (1 << 17) /* Instructions not present in the 'M' + profile. */ +#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */ +#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ +#define FL_NEON (1 << 20) /* Neon instructions. */ +#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M + architecture. */ +#define FL_ARCH7 (1 << 22) /* Architecture 7. */ +#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */ +#define FL_ARCH8 (1 << 24) /* Architecture 8. */ +#define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */ + +#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ +#define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */ + +/* Flags that only effect tuning, not available instructions. */ +#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ + | FL_CO_PROC) + +#define FL_FOR_ARCH2 FL_NOTM +#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) +#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) +#define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) +#define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB) +#define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5) +#define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB) +#define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E) +#define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB) +#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE +#define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6) +#define FL_FOR_ARCH6J FL_FOR_ARCH6 +#define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) +#define FL_FOR_ARCH6Z FL_FOR_ARCH6 +#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K +#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) +#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) +#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) +#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) +#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV) +#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) +#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) +#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) +#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) + +/* The bits in this mask specify which + instructions we are allowed to generate. */ +static unsigned long insn_flags = 0; + +/* The bits in this mask specify which instruction scheduling options should + be used. */ +static unsigned long tune_flags = 0; + +/* The highest ARM architecture version supported by the + target. */ +enum base_architecture arm_base_arch = BASE_ARCH_0; + +/* The following are used in the arm.md file as equivalents to bits + in the above two flag variables. */ + +/* Nonzero if this chip supports the ARM Architecture 3M extensions. */ +int arm_arch3m = 0; + +/* Nonzero if this chip supports the ARM Architecture 4 extensions. */ +int arm_arch4 = 0; + +/* Nonzero if this chip supports the ARM Architecture 4t extensions. */ +int arm_arch4t = 0; + +/* Nonzero if this chip supports the ARM Architecture 5 extensions. */ +int arm_arch5 = 0; + +/* Nonzero if this chip supports the ARM Architecture 5E extensions. */ +int arm_arch5e = 0; + +/* Nonzero if this chip supports the ARM Architecture 6 extensions. */ +int arm_arch6 = 0; + +/* Nonzero if this chip supports the ARM 6K extensions. */ +int arm_arch6k = 0; + +/* Nonzero if instructions present in ARMv6-M can be used. */ +int arm_arch6m = 0; + +/* Nonzero if this chip supports the ARM 7 extensions. */ +int arm_arch7 = 0; + +/* Nonzero if instructions not present in the 'M' profile can be used. */ +int arm_arch_notm = 0; + +/* Nonzero if instructions present in ARMv7E-M can be used. */ +int arm_arch7em = 0; + +/* Nonzero if instructions present in ARMv8 can be used. */ +int arm_arch8 = 0; + +/* Nonzero if this chip can benefit from load scheduling. */ +int arm_ld_sched = 0; + +/* Nonzero if this chip is a StrongARM. */ +int arm_tune_strongarm = 0; + +/* Nonzero if this chip supports Intel Wireless MMX technology. */ +int arm_arch_iwmmxt = 0; + +/* Nonzero if this chip supports Intel Wireless MMX2 technology. */ +int arm_arch_iwmmxt2 = 0; + +/* Nonzero if this chip is an XScale. */ +int arm_arch_xscale = 0; + +/* Nonzero if tuning for XScale */ +int arm_tune_xscale = 0; + +/* Nonzero if we want to tune for stores that access the write-buffer. + This typically means an ARM6 or ARM7 with MMU or MPU. */ +int arm_tune_wbuf = 0; + +/* Nonzero if tuning for Cortex-A9. */ +int arm_tune_cortex_a9 = 0; + +/* Nonzero if generating Thumb instructions. */ +int thumb_code = 0; + +/* Nonzero if generating Thumb-1 instructions. */ +int thumb1_code = 0; + +/* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around + problems in GLD which doesn't understand that armv5t code is + interworking clean. */ +int arm_cpp_interwork = 0; + +/* Nonzero if chip supports Thumb 2. */ +int arm_arch_thumb2; + +/* Nonzero if chip supports integer division instruction. */ +int arm_arch_arm_hwdiv; +int arm_arch_thumb_hwdiv; + +/* Nonzero if we should use Neon to handle 64-bits operations rather + than core registers. */ +int prefer_neon_for_64bits = 0; + +/* Nonzero if we shouldn't use literal pools. */ +bool arm_disable_literal_pool = false; + +/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, + we must report the mode of the memory reference from + TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ +enum machine_mode output_memory_reference_mode; + +/* The register number to be used for the PIC offset register. */ +unsigned arm_pic_register = INVALID_REGNUM; + +/* Set to 1 after arm_reorg has started. Reset to start at the start of + the next function. */ +static int after_arm_reorg = 0; + +enum arm_pcs arm_pcs_default; + +/* For an explanation of these variables, see final_prescan_insn below. */ +int arm_ccfsm_state; +/* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ +enum arm_cond_code arm_current_cc; + +rtx arm_target_insn; +int arm_target_label; +/* The number of conditionally executed insns, including the current insn. */ +int arm_condexec_count = 0; +/* A bitmask specifying the patterns for the IT block. + Zero means do not output an IT block before this insn. */ +int arm_condexec_mask = 0; +/* The number of bits used in arm_condexec_mask. */ +int arm_condexec_masklen = 0; + +/* Nonzero if chip supports the ARMv8 CRC instructions. */ +int arm_arch_crc = 0; + +/* The condition codes of the ARM, and the inverse function. */ +static const char * const arm_condition_codes[] = +{ + "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" +}; + +/* The register numbers in sequence, for passing to arm_gen_load_multiple. */ +int arm_regs_in_sequence[] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +}; + +#define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") +#define streq(string1, string2) (strcmp (string1, string2) == 0) + +#define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ + | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ + | (1 << PIC_OFFSET_TABLE_REGNUM))) + +/* Initialization code. */ + +struct processors +{ + const char *const name; + enum processor_type core; + const char *arch; + enum base_architecture base_arch; + const unsigned long flags; + const struct tune_params *const tune; +}; + + +#define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1 +#define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \ + prefetch_slots, \ + l1_size, \ + l1_line_size + +/* arm generic vectorizer costs. */ +static const +struct cpu_vec_costs arm_default_vec_cost = { + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 1, /* vec_unalign_load_cost. */ + 1, /* vec_unalign_store_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + +/* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */ +#include "aarch-cost-tables.h" + + + +const struct cpu_cost_table cortexa9_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (2), /* arith_shift_reg. */ + 0, /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (2), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + 0, /* clz. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (3), /* simple. */ + COSTS_N_INSNS (3), /* flag_setting. */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (3), /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (4), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (4), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (2), /* load. */ + COSTS_N_INSNS (2), /* load_sign_extend. */ + COSTS_N_INSNS (2), /* ldrd. */ + COSTS_N_INSNS (2), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (5), /* loadf. */ + COSTS_N_INSNS (5), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (2), /* store. */ + COSTS_N_INSNS (2), /* strd. */ + COSTS_N_INSNS (2), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* storef. */ + COSTS_N_INSNS (1), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (14), /* div. */ + COSTS_N_INSNS (4), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (30), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (1), /* fpconst. */ + COSTS_N_INSNS (1), /* neg. */ + COSTS_N_INSNS (3), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (24), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (8), /* mult_addsub. */ + COSTS_N_INSNS (30), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (1), /* fpconst. */ + COSTS_N_INSNS (1), /* neg. */ + COSTS_N_INSNS (3), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + + +const struct cpu_cost_table cortexa7_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + COSTS_N_INSNS (1), /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + COSTS_N_INSNS (1), /* clz. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + + { + /* MULT SImode */ + { + 0, /* simple. */ + COSTS_N_INSNS (1), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (7) /* idiv. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (1), /* extend. */ + 0, /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (1), /* load. */ + COSTS_N_INSNS (1), /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (1), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* loadf. */ + COSTS_N_INSNS (2), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (1), /* store. */ + COSTS_N_INSNS (3), /* strd. */ + COSTS_N_INSNS (1), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (2), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (15), /* div. */ + COSTS_N_INSNS (3), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (3), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (30), /* div. */ + COSTS_N_INSNS (6), /* mult. */ + COSTS_N_INSNS (10), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (3), /* compare. */ + COSTS_N_INSNS (3), /* widen. */ + COSTS_N_INSNS (3), /* narrow. */ + COSTS_N_INSNS (3), /* toint. */ + COSTS_N_INSNS (3), /* fromint. */ + COSTS_N_INSNS (3) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct cpu_cost_table cortexa12_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + COSTS_N_INSNS (1), /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + 0, /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + COSTS_N_INSNS (1), /* clz. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + /* MULT SImode */ + { + { + COSTS_N_INSNS (2), /* simple. */ + COSTS_N_INSNS (3), /* flag_setting. */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (3), /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + COSTS_N_INSNS (18) /* idiv. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (3), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (3), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (3), /* load. */ + COSTS_N_INSNS (3), /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (3), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (3), /* loadf. */ + COSTS_N_INSNS (3), /* loadd. */ + 0, /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + 0, /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (2), /* stored. */ + 0 /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (17), /* div. */ + COSTS_N_INSNS (4), /* mult. */ + COSTS_N_INSNS (8), /* mult_addsub. */ + COSTS_N_INSNS (8), /* fma. */ + COSTS_N_INSNS (4), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (31), /* div. */ + COSTS_N_INSNS (4), /* mult. */ + COSTS_N_INSNS (8), /* mult_addsub. */ + COSTS_N_INSNS (8), /* fma. */ + COSTS_N_INSNS (4), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct cpu_cost_table cortexa15_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + 0, /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + COSTS_N_INSNS (1), /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + /* MULT SImode */ + { + { + COSTS_N_INSNS (2), /* simple. */ + COSTS_N_INSNS (3), /* flag_setting. */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (2), /* add. */ + COSTS_N_INSNS (2), /* extend_add. */ + COSTS_N_INSNS (18) /* idiv. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (3), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (3), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (3), /* load. */ + COSTS_N_INSNS (3), /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (4), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (4), /* loadf. */ + COSTS_N_INSNS (4), /* loadd. */ + 0, /* load_unaligned. */ + 0, /* store. */ + 0, /* strd. */ + COSTS_N_INSNS (1), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + 0, /* storef. */ + 0, /* stored. */ + 0 /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (17), /* div. */ + COSTS_N_INSNS (4), /* mult. */ + COSTS_N_INSNS (8), /* mult_addsub. */ + COSTS_N_INSNS (8), /* fma. */ + COSTS_N_INSNS (4), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (5), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (31), /* div. */ + COSTS_N_INSNS (4), /* mult. */ + COSTS_N_INSNS (8), /* mult_addsub. */ + COSTS_N_INSNS (8), /* fma. */ + COSTS_N_INSNS (4), /* addsub. */ + COSTS_N_INSNS (2), /* fpconst. */ + COSTS_N_INSNS (2), /* neg. */ + COSTS_N_INSNS (2), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (4), /* toint. */ + COSTS_N_INSNS (4), /* fromint. */ + COSTS_N_INSNS (4) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct cpu_cost_table v7m_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + 0, /* shift. */ + 0, /* shift_reg. */ + 0, /* arith_shift. */ + COSTS_N_INSNS (1), /* arith_shift_reg. */ + 0, /* log_shift. */ + COSTS_N_INSNS (1), /* log_shift_reg. */ + 0, /* extend. */ + COSTS_N_INSNS (1), /* extend_arith. */ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + COSTS_N_INSNS (1), /* non_exec. */ + false /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (1), /* simple. */ + COSTS_N_INSNS (1), /* flag_setting. */ + COSTS_N_INSNS (2), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (3), /* extend_add. */ + COSTS_N_INSNS (8) /* idiv. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (2), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (3), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (2), /* load. */ + 0, /* load_sign_extend. */ + COSTS_N_INSNS (3), /* ldrd. */ + COSTS_N_INSNS (2), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 1, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* loadf. */ + COSTS_N_INSNS (3), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (2), /* store. */ + COSTS_N_INSNS (3), /* strd. */ + COSTS_N_INSNS (2), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 1, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (2), /* storef. */ + COSTS_N_INSNS (3), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (7), /* div. */ + COSTS_N_INSNS (2), /* mult. */ + COSTS_N_INSNS (5), /* mult_addsub. */ + COSTS_N_INSNS (3), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0, /* compare. */ + 0, /* widen. */ + 0, /* narrow. */ + 0, /* toint. */ + 0, /* fromint. */ + 0 /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (15), /* div. */ + COSTS_N_INSNS (5), /* mult. */ + COSTS_N_INSNS (7), /* mult_addsub. */ + COSTS_N_INSNS (7), /* fma. */ + COSTS_N_INSNS (3), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0, /* compare. */ + 0, /* widen. */ + 0, /* narrow. */ + 0, /* toint. */ + 0, /* fromint. */ + 0 /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + +const struct tune_params arm_slowmul_tune = +{ + arm_slowmul_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 3, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_fastmul_tune = +{ + arm_fastmul_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +/* StrongARM has early execution of branches, so a sequence that is worth + skipping is shorter. Set max_insns_skipped to a lower value. */ + +const struct tune_params arm_strongarm_tune = +{ + arm_fastmul_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 3, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_xscale_tune = +{ + arm_xscale_rtx_costs, + NULL, + xscale_sched_adjust_cost, + 2, /* Constant limit. */ + 3, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_9e_tune = +{ + arm_9e_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_v6t2_tune = +{ + arm_9e_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +/* Generic Cortex tuning. Use more specific tunings if appropriate. */ +const struct tune_params arm_cortex_tune = +{ + arm_9e_rtx_costs, + &generic_extra_costs, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_cortex_a7_tune = +{ + arm_9e_rtx_costs, + &cortexa7_extra_costs, + NULL, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_cortex_a15_tune = +{ + arm_9e_rtx_costs, + &cortexa15_extra_costs, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_cortex_a53_tune = +{ + arm_9e_rtx_costs, + &cortexa53_extra_costs, + NULL, /* Scheduler cost adjustment. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_cortex_a57_tune = +{ + arm_9e_rtx_costs, + &cortexa57_extra_costs, + NULL, /* Scheduler cost adjustment. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +/* Branches can be dual-issued on Cortex-A5, so conditional execution is + less appealing. Set max_insns_skipped to a low value. */ + +const struct tune_params arm_cortex_a5_tune = +{ + arm_9e_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 1, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_cortex_a5_branch_cost, + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_cortex_a9_tune = +{ + arm_9e_rtx_costs, + &cortexa9_extra_costs, + cortex_a9_sched_adjust_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_BENEFICIAL(4,32,32), + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_cortex_a12_tune = +{ + arm_9e_rtx_costs, + &cortexa12_extra_costs, + NULL, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_BENEFICIAL(4,32,32), + false, /* Prefer constant pool. */ + arm_default_branch_cost, + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single + cycle to execute each. An LDR from the constant pool also takes two cycles + to execute, but mildly increases pipelining opportunity (consecutive + loads/stores can be pipelined together, saving one cycle), and may also + improve icache utilisation. Hence we prefer the constant pool for such + processors. */ + +const struct tune_params arm_v7m_tune = +{ + arm_9e_rtx_costs, + &v7m_extra_costs, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 2, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_cortex_m_branch_cost, + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than + arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */ +const struct tune_params arm_v6m_tune = +{ + arm_9e_rtx_costs, + NULL, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + +const struct tune_params arm_fa726te_tune = +{ + arm_9e_rtx_costs, + NULL, + fa726te_sched_adjust_cost, + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + true, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false /* Prefer Neon for 64-bits bitops. */ +}; + + +/* Not all of these give usefully different compilation alternatives, + but there is no simple way of generalizing them. */ +static const struct processors all_cores[] = +{ + /* ARM Cores */ +#define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ + {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \ + FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, +#include "arm-cores.def" +#undef ARM_CORE + {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL} +}; + +static const struct processors all_architectures[] = +{ + /* ARM Architectures */ + /* We don't specify tuning costs here as it will be figured out + from the core. */ + +#define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \ + {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL}, +#include "arm-arches.def" +#undef ARM_ARCH + {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL} +}; + + +/* These are populated as commandline arguments are processed, or NULL + if not specified. */ +static const struct processors *arm_selected_arch; +static const struct processors *arm_selected_cpu; +static const struct processors *arm_selected_tune; + +/* The name of the preprocessor macro to define for this architecture. */ + +char arm_arch_name[] = "__ARM_ARCH_0UNK__"; + +/* Available values for -mfpu=. */ + +static const struct arm_fpu_desc all_fpus[] = +{ +#define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \ + { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO }, +#include "arm-fpus.def" +#undef ARM_FPU +}; + + +/* Supported TLS relocations. */ + +enum tls_reloc { + TLS_GD32, + TLS_LDM32, + TLS_LDO32, + TLS_IE32, + TLS_LE32, + TLS_DESCSEQ /* GNU scheme */ +}; + +/* The maximum number of insns to be used when loading a constant. */ +inline static int +arm_constant_limit (bool size_p) +{ + return size_p ? 1 : current_tune->constant_limit; +} + +/* Emit an insn that's a simple single-set. Both the operands must be known + to be valid. */ +inline static rtx +emit_set_insn (rtx x, rtx y) +{ + return emit_insn (gen_rtx_SET (VOIDmode, x, y)); +} + +/* Return the number of bits set in VALUE. */ +static unsigned +bit_count (unsigned long value) +{ + unsigned long count = 0; + + while (value) + { + count++; + value &= value - 1; /* Clear the least-significant set bit. */ + } + + return count; +} + +typedef struct +{ + enum machine_mode mode; + const char *name; +} arm_fixed_mode_set; + +/* A small helper for setting fixed-point library libfuncs. */ + +static void +arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode, + const char *funcname, const char *modename, + int num_suffix) +{ + char buffer[50]; + + if (num_suffix == 0) + sprintf (buffer, "__gnu_%s%s", funcname, modename); + else + sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix); + + set_optab_libfunc (optable, mode, buffer); +} + +static void +arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to, + enum machine_mode from, const char *funcname, + const char *toname, const char *fromname) +{ + char buffer[50]; + const char *maybe_suffix_2 = ""; + + /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */ + if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to) + && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to) + && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to)) + maybe_suffix_2 = "2"; + + sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname, + maybe_suffix_2); + + set_conv_libfunc (optable, to, from, buffer); +} + +/* Set up library functions unique to ARM. */ + +static void +arm_init_libfuncs (void) +{ + /* For Linux, we have access to kernel support for atomic operations. */ + if (arm_abi == ARM_ABI_AAPCS_LINUX) + init_sync_libfuncs (2 * UNITS_PER_WORD); + + /* There are no special library functions unless we are using the + ARM BPABI. */ + if (!TARGET_BPABI) + return; + + /* The functions below are described in Section 4 of the "Run-Time + ABI for the ARM architecture", Version 1.0. */ + + /* Double-precision floating-point arithmetic. Table 2. */ + set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd"); + set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv"); + set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul"); + set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg"); + set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub"); + + /* Double-precision comparisons. Table 3. */ + set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq"); + set_optab_libfunc (ne_optab, DFmode, NULL); + set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt"); + set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple"); + set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge"); + set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt"); + set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun"); + + /* Single-precision floating-point arithmetic. Table 4. */ + set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd"); + set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv"); + set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul"); + set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg"); + set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub"); + + /* Single-precision comparisons. Table 5. */ + set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq"); + set_optab_libfunc (ne_optab, SFmode, NULL); + set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt"); + set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple"); + set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge"); + set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt"); + set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun"); + + /* Floating-point to integer conversions. Table 6. */ + set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz"); + set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz"); + set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz"); + set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz"); + set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz"); + set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz"); + set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz"); + set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz"); + + /* Conversions between floating types. Table 7. */ + set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f"); + set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d"); + + /* Integer to floating-point conversions. Table 8. */ + set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d"); + set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d"); + set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d"); + set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d"); + set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f"); + set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f"); + set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f"); + set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f"); + + /* Long long. Table 9. */ + set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul"); + set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod"); + set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod"); + set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl"); + set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr"); + set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr"); + set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp"); + set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp"); + + /* Integer (32/32->32) division. \S 4.3.1. */ + set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod"); + set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod"); + + /* The divmod functions are designed so that they can be used for + plain division, even though they return both the quotient and the + remainder. The quotient is returned in the usual location (i.e., + r0 for SImode, {r0, r1} for DImode), just as would be expected + for an ordinary division routine. Because the AAPCS calling + conventions specify that all of { r0, r1, r2, r3 } are + callee-saved registers, there is no need to tell the compiler + explicitly that those registers are clobbered by these + routines. */ + set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod"); + set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod"); + + /* For SImode division the ABI provides div-without-mod routines, + which are faster. */ + set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv"); + set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv"); + + /* We don't have mod libcalls. Fortunately gcc knows how to use the + divmod libcalls instead. */ + set_optab_libfunc (smod_optab, DImode, NULL); + set_optab_libfunc (umod_optab, DImode, NULL); + set_optab_libfunc (smod_optab, SImode, NULL); + set_optab_libfunc (umod_optab, SImode, NULL); + + /* Half-precision float operations. The compiler handles all operations + with NULL libfuncs by converting the SFmode. */ + switch (arm_fp16_format) + { + case ARM_FP16_FORMAT_IEEE: + case ARM_FP16_FORMAT_ALTERNATIVE: + + /* Conversions. */ + set_conv_libfunc (trunc_optab, HFmode, SFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_f2h_ieee" + : "__gnu_f2h_alternative")); + set_conv_libfunc (sext_optab, SFmode, HFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_h2f_ieee" + : "__gnu_h2f_alternative")); + + /* Arithmetic. */ + set_optab_libfunc (add_optab, HFmode, NULL); + set_optab_libfunc (sdiv_optab, HFmode, NULL); + set_optab_libfunc (smul_optab, HFmode, NULL); + set_optab_libfunc (neg_optab, HFmode, NULL); + set_optab_libfunc (sub_optab, HFmode, NULL); + + /* Comparisons. */ + set_optab_libfunc (eq_optab, HFmode, NULL); + set_optab_libfunc (ne_optab, HFmode, NULL); + set_optab_libfunc (lt_optab, HFmode, NULL); + set_optab_libfunc (le_optab, HFmode, NULL); + set_optab_libfunc (ge_optab, HFmode, NULL); + set_optab_libfunc (gt_optab, HFmode, NULL); + set_optab_libfunc (unord_optab, HFmode, NULL); + break; + + default: + break; + } + + /* Use names prefixed with __gnu_ for fixed-point helper functions. */ + { + const arm_fixed_mode_set fixed_arith_modes[] = + { + { QQmode, "qq" }, + { UQQmode, "uqq" }, + { HQmode, "hq" }, + { UHQmode, "uhq" }, + { SQmode, "sq" }, + { USQmode, "usq" }, + { DQmode, "dq" }, + { UDQmode, "udq" }, + { TQmode, "tq" }, + { UTQmode, "utq" }, + { HAmode, "ha" }, + { UHAmode, "uha" }, + { SAmode, "sa" }, + { USAmode, "usa" }, + { DAmode, "da" }, + { UDAmode, "uda" }, + { TAmode, "ta" }, + { UTAmode, "uta" } + }; + const arm_fixed_mode_set fixed_conv_modes[] = + { + { QQmode, "qq" }, + { UQQmode, "uqq" }, + { HQmode, "hq" }, + { UHQmode, "uhq" }, + { SQmode, "sq" }, + { USQmode, "usq" }, + { DQmode, "dq" }, + { UDQmode, "udq" }, + { TQmode, "tq" }, + { UTQmode, "utq" }, + { HAmode, "ha" }, + { UHAmode, "uha" }, + { SAmode, "sa" }, + { USAmode, "usa" }, + { DAmode, "da" }, + { UDAmode, "uda" }, + { TAmode, "ta" }, + { UTAmode, "uta" }, + { QImode, "qi" }, + { HImode, "hi" }, + { SImode, "si" }, + { DImode, "di" }, + { TImode, "ti" }, + { SFmode, "sf" }, + { DFmode, "df" } + }; + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++) + { + arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode, + "add", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode, + "ssadd", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode, + "usadd", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode, + "sub", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode, + "sssub", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode, + "ussub", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode, + "mul", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode, + "ssmul", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode, + "usmul", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode, + "div", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode, + "udiv", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode, + "ssdiv", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode, + "usdiv", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode, + "neg", fixed_arith_modes[i].name, 2); + arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode, + "ssneg", fixed_arith_modes[i].name, 2); + arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode, + "usneg", fixed_arith_modes[i].name, 2); + arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode, + "ashl", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode, + "ashr", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode, + "lshr", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode, + "ssashl", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode, + "usashl", fixed_arith_modes[i].name, 3); + arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode, + "cmp", fixed_arith_modes[i].name, 2); + } + + for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++) + for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++) + { + if (i == j + || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode) + && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode))) + continue; + + arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode, + fixed_conv_modes[j].mode, "fract", + fixed_conv_modes[i].name, + fixed_conv_modes[j].name); + arm_set_fixed_conv_libfunc (satfract_optab, + fixed_conv_modes[i].mode, + fixed_conv_modes[j].mode, "satfract", + fixed_conv_modes[i].name, + fixed_conv_modes[j].name); + arm_set_fixed_conv_libfunc (fractuns_optab, + fixed_conv_modes[i].mode, + fixed_conv_modes[j].mode, "fractuns", + fixed_conv_modes[i].name, + fixed_conv_modes[j].name); + arm_set_fixed_conv_libfunc (satfractuns_optab, + fixed_conv_modes[i].mode, + fixed_conv_modes[j].mode, "satfractuns", + fixed_conv_modes[i].name, + fixed_conv_modes[j].name); + } + } + + if (TARGET_AAPCS_BASED) + synchronize_libfunc = init_one_libfunc ("__sync_synchronize"); +} + +/* On AAPCS systems, this is the "struct __va_list". */ +static GTY(()) tree va_list_type; + +/* Return the type to use as __builtin_va_list. */ +static tree +arm_build_builtin_va_list (void) +{ + tree va_list_name; + tree ap_field; + + if (!TARGET_AAPCS_BASED) + return std_build_builtin_va_list (); + + /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type + defined as: + + struct __va_list + { + void *__ap; + }; + + The C Library ABI further reinforces this definition in \S + 4.1. + + We must follow this definition exactly. The structure tag + name is visible in C++ mangled names, and thus forms a part + of the ABI. The field name may be used by people who + #include . */ + /* Create the type. */ + va_list_type = lang_hooks.types.make_type (RECORD_TYPE); + /* Give it the required name. */ + va_list_name = build_decl (BUILTINS_LOCATION, + TYPE_DECL, + get_identifier ("__va_list"), + va_list_type); + DECL_ARTIFICIAL (va_list_name) = 1; + TYPE_NAME (va_list_type) = va_list_name; + TYPE_STUB_DECL (va_list_type) = va_list_name; + /* Create the __ap field. */ + ap_field = build_decl (BUILTINS_LOCATION, + FIELD_DECL, + get_identifier ("__ap"), + ptr_type_node); + DECL_ARTIFICIAL (ap_field) = 1; + DECL_FIELD_CONTEXT (ap_field) = va_list_type; + TYPE_FIELDS (va_list_type) = ap_field; + /* Compute its layout. */ + layout_type (va_list_type); + + return va_list_type; +} + +/* Return an expression of type "void *" pointing to the next + available argument in a variable-argument list. VALIST is the + user-level va_list object, of type __builtin_va_list. */ +static tree +arm_extract_valist_ptr (tree valist) +{ + if (TREE_TYPE (valist) == error_mark_node) + return error_mark_node; + + /* On an AAPCS target, the pointer is stored within "struct + va_list". */ + if (TARGET_AAPCS_BASED) + { + tree ap_field = TYPE_FIELDS (TREE_TYPE (valist)); + valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field), + valist, ap_field, NULL_TREE); + } + + return valist; +} + +/* Implement TARGET_EXPAND_BUILTIN_VA_START. */ +static void +arm_expand_builtin_va_start (tree valist, rtx nextarg) +{ + valist = arm_extract_valist_ptr (valist); + std_expand_builtin_va_start (valist, nextarg); +} + +/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ +static tree +arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) +{ + valist = arm_extract_valist_ptr (valist); + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); +} + +/* Fix up any incompatible options that the user has specified. */ +static void +arm_option_override (void) +{ + if (global_options_set.x_arm_arch_option) + arm_selected_arch = &all_architectures[arm_arch_option]; + + if (global_options_set.x_arm_cpu_option) + { + arm_selected_cpu = &all_cores[(int) arm_cpu_option]; + arm_selected_tune = &all_cores[(int) arm_cpu_option]; + } + + if (global_options_set.x_arm_tune_option) + arm_selected_tune = &all_cores[(int) arm_tune_option]; + +#ifdef SUBTARGET_OVERRIDE_OPTIONS + SUBTARGET_OVERRIDE_OPTIONS; +#endif + + if (arm_selected_arch) + { + if (arm_selected_cpu) + { + /* Check for conflict between mcpu and march. */ + if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE) + { + warning (0, "switch -mcpu=%s conflicts with -march=%s switch", + arm_selected_cpu->name, arm_selected_arch->name); + /* -march wins for code generation. + -mcpu wins for default tuning. */ + if (!arm_selected_tune) + arm_selected_tune = arm_selected_cpu; + + arm_selected_cpu = arm_selected_arch; + } + else + /* -mcpu wins. */ + arm_selected_arch = NULL; + } + else + /* Pick a CPU based on the architecture. */ + arm_selected_cpu = arm_selected_arch; + } + + /* If the user did not specify a processor, choose one for them. */ + if (!arm_selected_cpu) + { + const struct processors * sel; + unsigned int sought; + + arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; + if (!arm_selected_cpu->name) + { +#ifdef SUBTARGET_CPU_DEFAULT + /* Use the subtarget default CPU if none was specified by + configure. */ + arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT]; +#endif + /* Default to ARM6. */ + if (!arm_selected_cpu->name) + arm_selected_cpu = &all_cores[arm6]; + } + + sel = arm_selected_cpu; + insn_flags = sel->flags; + + /* Now check to see if the user has specified some command line + switch that require certain abilities from the cpu. */ + sought = 0; + + if (TARGET_INTERWORK || TARGET_THUMB) + { + sought |= (FL_THUMB | FL_MODE32); + + /* There are no ARM processors that support both APCS-26 and + interworking. Therefore we force FL_MODE26 to be removed + from insn_flags here (if it was set), so that the search + below will always be able to find a compatible processor. */ + insn_flags &= ~FL_MODE26; + } + + if (sought != 0 && ((sought & insn_flags) != sought)) + { + /* Try to locate a CPU type that supports all of the abilities + of the default CPU, plus the extra abilities requested by + the user. */ + for (sel = all_cores; sel->name != NULL; sel++) + if ((sel->flags & sought) == (sought | insn_flags)) + break; + + if (sel->name == NULL) + { + unsigned current_bit_count = 0; + const struct processors * best_fit = NULL; + + /* Ideally we would like to issue an error message here + saying that it was not possible to find a CPU compatible + with the default CPU, but which also supports the command + line options specified by the programmer, and so they + ought to use the -mcpu= command line option to + override the default CPU type. + + If we cannot find a cpu that has both the + characteristics of the default cpu and the given + command line options we scan the array again looking + for a best match. */ + for (sel = all_cores; sel->name != NULL; sel++) + if ((sel->flags & sought) == sought) + { + unsigned count; + + count = bit_count (sel->flags & insn_flags); + + if (count >= current_bit_count) + { + best_fit = sel; + current_bit_count = count; + } + } + + gcc_assert (best_fit); + sel = best_fit; + } + + arm_selected_cpu = sel; + } + } + + gcc_assert (arm_selected_cpu); + /* The selected cpu may be an architecture, so lookup tuning by core ID. */ + if (!arm_selected_tune) + arm_selected_tune = &all_cores[arm_selected_cpu->core]; + + sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch); + insn_flags = arm_selected_cpu->flags; + arm_base_arch = arm_selected_cpu->base_arch; + + arm_tune = arm_selected_tune->core; + tune_flags = arm_selected_tune->flags; + current_tune = arm_selected_tune->tune; + + /* Make sure that the processor choice does not conflict with any of the + other command line choices. */ + if (TARGET_ARM && !(insn_flags & FL_NOTM)) + error ("target CPU does not support ARM mode"); + + /* BPABI targets use linker tricks to allow interworking on cores + without thumb support. */ + if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI)) + { + warning (0, "target CPU does not support interworking" ); + target_flags &= ~MASK_INTERWORK; + } + + if (TARGET_THUMB && !(insn_flags & FL_THUMB)) + { + warning (0, "target CPU does not support THUMB instructions"); + target_flags &= ~MASK_THUMB; + } + + if (TARGET_APCS_FRAME && TARGET_THUMB) + { + /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ + target_flags &= ~MASK_APCS_FRAME; + } + + /* Callee super interworking implies thumb interworking. Adding + this to the flags here simplifies the logic elsewhere. */ + if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) + target_flags |= MASK_INTERWORK; + + /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done + from here where no function is being compiled currently. */ + if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) + warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); + + if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) + warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); + + if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) + { + warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); + target_flags |= MASK_APCS_FRAME; + } + + if (TARGET_POKE_FUNCTION_NAME) + target_flags |= MASK_APCS_FRAME; + + if (TARGET_APCS_REENT && flag_pic) + error ("-fpic and -mapcs-reent are incompatible"); + + if (TARGET_APCS_REENT) + warning (0, "APCS reentrant code not supported. Ignored"); + + /* If this target is normally configured to use APCS frames, warn if they + are turned off and debugging is turned on. */ + if (TARGET_ARM + && write_symbols != NO_DEBUG + && !TARGET_APCS_FRAME + && (TARGET_DEFAULT & MASK_APCS_FRAME)) + warning (0, "-g with -mno-apcs-frame may not give sensible debugging"); + + if (TARGET_APCS_FLOAT) + warning (0, "passing floating point arguments in fp regs not yet supported"); + + if (TARGET_LITTLE_WORDS) + warning (OPT_Wdeprecated, "% is deprecated and " + "will be removed in a future release"); + + /* Initialize boolean versions of the flags, for use in the arm.md file. */ + arm_arch3m = (insn_flags & FL_ARCH3M) != 0; + arm_arch4 = (insn_flags & FL_ARCH4) != 0; + arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0); + arm_arch5 = (insn_flags & FL_ARCH5) != 0; + arm_arch5e = (insn_flags & FL_ARCH5E) != 0; + arm_arch6 = (insn_flags & FL_ARCH6) != 0; + arm_arch6k = (insn_flags & FL_ARCH6K) != 0; + arm_arch_notm = (insn_flags & FL_NOTM) != 0; + arm_arch6m = arm_arch6 && !arm_arch_notm; + arm_arch7 = (insn_flags & FL_ARCH7) != 0; + arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; + arm_arch8 = (insn_flags & FL_ARCH8) != 0; + arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; + arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; + + arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; + arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; + thumb_code = TARGET_ARM == 0; + thumb1_code = TARGET_THUMB1 != 0; + arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; + arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; + arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; + arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0; + arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; + arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; + arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; + arm_arch_crc = (insn_flags & FL_CRC32) != 0; + if (arm_restrict_it == 2) + arm_restrict_it = arm_arch8 && TARGET_THUMB2; + + if (!TARGET_THUMB2) + arm_restrict_it = 0; + + /* If we are not using the default (ARM mode) section anchor offset + ranges, then set the correct ranges now. */ + if (TARGET_THUMB1) + { + /* Thumb-1 LDR instructions cannot have negative offsets. + Permissible positive offset ranges are 5-bit (for byte loads), + 6-bit (for halfword loads), or 7-bit (for word loads). + Empirical results suggest a 7-bit anchor range gives the best + overall code size. */ + targetm.min_anchor_offset = 0; + targetm.max_anchor_offset = 127; + } + else if (TARGET_THUMB2) + { + /* The minimum is set such that the total size of the block + for a particular anchor is 248 + 1 + 4095 bytes, which is + divisible by eight, ensuring natural spacing of anchors. */ + targetm.min_anchor_offset = -248; + targetm.max_anchor_offset = 4095; + } + + /* V5 code we generate is completely interworking capable, so we turn off + TARGET_INTERWORK here to avoid many tests later on. */ + + /* XXX However, we must pass the right pre-processor defines to CPP + or GLD can get confused. This is a hack. */ + if (TARGET_INTERWORK) + arm_cpp_interwork = 1; + + if (arm_arch5) + target_flags &= ~MASK_INTERWORK; + + if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN) + error ("iwmmxt requires an AAPCS compatible ABI for proper operation"); + + if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) + error ("iwmmxt abi requires an iwmmxt capable cpu"); + + if (!global_options_set.x_arm_fpu_index) + { + const char *target_fpu_name; + bool ok; + +#ifdef FPUTYPE_DEFAULT + target_fpu_name = FPUTYPE_DEFAULT; +#else + target_fpu_name = "vfp"; +#endif + + ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index, + CL_TARGET); + gcc_assert (ok); + } + + arm_fpu_desc = &all_fpus[arm_fpu_index]; + + switch (arm_fpu_desc->model) + { + case ARM_FP_MODEL_VFP: + arm_fpu_attr = FPU_VFP; + break; + + default: + gcc_unreachable(); + } + + if (TARGET_AAPCS_BASED) + { + if (TARGET_CALLER_INTERWORKING) + error ("AAPCS does not support -mcaller-super-interworking"); + else + if (TARGET_CALLEE_INTERWORKING) + error ("AAPCS does not support -mcallee-super-interworking"); + } + + /* iWMMXt and NEON are incompatible. */ + if (TARGET_IWMMXT && TARGET_NEON) + error ("iWMMXt and NEON are incompatible"); + + /* iWMMXt unsupported under Thumb mode. */ + if (TARGET_THUMB && TARGET_IWMMXT) + error ("iWMMXt unsupported under Thumb mode"); + + /* __fp16 support currently assumes the core has ldrh. */ + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) + sorry ("__fp16 and no ldrh"); + + /* If soft-float is specified then don't use FPU. */ + if (TARGET_SOFT_FLOAT) + arm_fpu_attr = FPU_NONE; + + if (TARGET_AAPCS_BASED) + { + if (arm_abi == ARM_ABI_IWMMXT) + arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; + else if (arm_float_abi == ARM_FLOAT_ABI_HARD + && TARGET_HARD_FLOAT + && TARGET_VFP) + arm_pcs_default = ARM_PCS_AAPCS_VFP; + else + arm_pcs_default = ARM_PCS_AAPCS; + } + else + { + if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) + sorry ("-mfloat-abi=hard and VFP"); + + if (arm_abi == ARM_ABI_APCS) + arm_pcs_default = ARM_PCS_APCS; + else + arm_pcs_default = ARM_PCS_ATPCS; + } + + /* For arm2/3 there is no need to do any scheduling if we are doing + software floating-point. */ + if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0) + flag_schedule_insns = flag_schedule_insns_after_reload = 0; + + /* Use the cp15 method if it is available. */ + if (target_thread_pointer == TP_AUTO) + { + if (arm_arch6k && !TARGET_THUMB1) + target_thread_pointer = TP_CP15; + else + target_thread_pointer = TP_SOFT; + } + + if (TARGET_HARD_TP && TARGET_THUMB1) + error ("can not use -mtp=cp15 with 16-bit Thumb"); + + /* Override the default structure alignment for AAPCS ABI. */ + if (!global_options_set.x_arm_structure_size_boundary) + { + if (TARGET_AAPCS_BASED) + arm_structure_size_boundary = 8; + } + else + { + if (arm_structure_size_boundary != 8 + && arm_structure_size_boundary != 32 + && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64)) + { + if (ARM_DOUBLEWORD_ALIGN) + warning (0, + "structure size boundary can only be set to 8, 32 or 64"); + else + warning (0, "structure size boundary can only be set to 8 or 32"); + arm_structure_size_boundary + = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY); + } + } + + if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic) + { + error ("RTP PIC is incompatible with Thumb"); + flag_pic = 0; + } + + /* If stack checking is disabled, we can use r10 as the PIC register, + which keeps r9 available. The EABI specifies r9 as the PIC register. */ + if (flag_pic && TARGET_SINGLE_PIC_BASE) + { + if (TARGET_VXWORKS_RTP) + warning (0, "RTP PIC is incompatible with -msingle-pic-base"); + arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10; + } + + if (flag_pic && TARGET_VXWORKS_RTP) + arm_pic_register = 9; + + if (arm_pic_register_string != NULL) + { + int pic_register = decode_reg_name (arm_pic_register_string); + + if (!flag_pic) + warning (0, "-mpic-register= is useless without -fpic"); + + /* Prevent the user from choosing an obviously stupid PIC register. */ + else if (pic_register < 0 || call_used_regs[pic_register] + || pic_register == HARD_FRAME_POINTER_REGNUM + || pic_register == STACK_POINTER_REGNUM + || pic_register >= PC_REGNUM + || (TARGET_VXWORKS_RTP + && (unsigned int) pic_register != arm_pic_register)) + error ("unable to use '%s' for PIC register", arm_pic_register_string); + else + arm_pic_register = pic_register; + } + + if (TARGET_VXWORKS_RTP + && !global_options_set.x_arm_pic_data_is_text_relative) + arm_pic_data_is_text_relative = 0; + + /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ + if (fix_cm3_ldrd == 2) + { + if (arm_selected_cpu->core == cortexm3) + fix_cm3_ldrd = 1; + else + fix_cm3_ldrd = 0; + } + + /* Enable -munaligned-access by default for + - all ARMv6 architecture-based processors + - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. + - ARMv8 architecture-base processors. + + Disable -munaligned-access by default for + - all pre-ARMv6 architecture-based processors + - ARMv6-M architecture-based processors. */ + + if (unaligned_access == 2) + { + if (arm_arch6 && (arm_arch_notm || arm_arch7)) + unaligned_access = 1; + else + unaligned_access = 0; + } + else if (unaligned_access == 1 + && !(arm_arch6 && (arm_arch_notm || arm_arch7))) + { + warning (0, "target CPU does not support unaligned accesses"); + unaligned_access = 0; + } + + if (TARGET_THUMB1 && flag_schedule_insns) + { + /* Don't warn since it's on by default in -O2. */ + flag_schedule_insns = 0; + } + + if (optimize_size) + { + /* If optimizing for size, bump the number of instructions that we + are prepared to conditionally execute (even on a StrongARM). */ + max_insns_skipped = 6; + } + else + max_insns_skipped = current_tune->max_insns_skipped; + + /* Hot/Cold partitioning is not currently supported, since we can't + handle literal pool placement in that case. */ + if (flag_reorder_blocks_and_partition) + { + inform (input_location, + "-freorder-blocks-and-partition not supported on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + + if (flag_pic) + /* Hoisting PIC address calculations more aggressively provides a small, + but measurable, size reduction for PIC code. Therefore, we decrease + the bar for unrestricted expression hoisting to the cost of PIC address + calculation, which is 2 instructions. */ + maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2, + global_options.x_param_values, + global_options_set.x_param_values); + + /* ARM EABI defaults to strict volatile bitfields. */ + if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0 + && abi_version_at_least(2)) + flag_strict_volatile_bitfields = 1; + + /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed + it beneficial (signified by setting num_prefetch_slots to 1 or more.) */ + if (flag_prefetch_loop_arrays < 0 + && HAVE_prefetch + && optimize >= 3 + && current_tune->num_prefetch_slots > 0) + flag_prefetch_loop_arrays = 1; + + /* Set up parameters to be used in prefetching algorithm. Do not override the + defaults unless we are tuning for a core we have researched values for. */ + if (current_tune->num_prefetch_slots > 0) + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, + current_tune->num_prefetch_slots, + global_options.x_param_values, + global_options_set.x_param_values); + if (current_tune->l1_cache_line_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, + current_tune->l1_cache_line_size, + global_options.x_param_values, + global_options_set.x_param_values); + if (current_tune->l1_cache_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_SIZE, + current_tune->l1_cache_size, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Use Neon to perform 64-bits operations rather than core + registers. */ + prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits; + if (use_neon_for_64bits == 1) + prefer_neon_for_64bits = true; + + /* Use the alternative scheduling-pressure algorithm by default. */ + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, + global_options.x_param_values, + global_options_set.x_param_values); + + /* Disable shrink-wrap when optimizing function for size, since it tends to + generate additional returns. */ + if (optimize_function_for_size_p (cfun) && TARGET_THUMB2) + flag_shrink_wrap = false; + /* TBD: Dwarf info for apcs frame is not handled yet. */ + if (TARGET_APCS_FRAME) + flag_shrink_wrap = false; + + /* We only support -mslow-flash-data on armv7-m targets. */ + if (target_slow_flash_data + && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em) + || (TARGET_THUMB1 || flag_pic || TARGET_NEON))) + error ("-mslow-flash-data only supports non-pic code on armv7-m targets"); + + /* Currently, for slow flash data, we just disable literal pools. */ + if (target_slow_flash_data) + arm_disable_literal_pool = true; + + /* Register global variables with the garbage collector. */ + arm_add_gc_roots (); +} + +static void +arm_add_gc_roots (void) +{ + gcc_obstack_init(&minipool_obstack); + minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0); +} + +/* A table of known ARM exception types. + For use with the interrupt function attribute. */ + +typedef struct +{ + const char *const arg; + const unsigned long return_value; +} +isr_attribute_arg; + +static const isr_attribute_arg isr_attribute_args [] = +{ + { "IRQ", ARM_FT_ISR }, + { "irq", ARM_FT_ISR }, + { "FIQ", ARM_FT_FIQ }, + { "fiq", ARM_FT_FIQ }, + { "ABORT", ARM_FT_ISR }, + { "abort", ARM_FT_ISR }, + { "ABORT", ARM_FT_ISR }, + { "abort", ARM_FT_ISR }, + { "UNDEF", ARM_FT_EXCEPTION }, + { "undef", ARM_FT_EXCEPTION }, + { "SWI", ARM_FT_EXCEPTION }, + { "swi", ARM_FT_EXCEPTION }, + { NULL, ARM_FT_NORMAL } +}; + +/* Returns the (interrupt) function type of the current + function, or ARM_FT_UNKNOWN if the type cannot be determined. */ + +static unsigned long +arm_isr_value (tree argument) +{ + const isr_attribute_arg * ptr; + const char * arg; + + if (!arm_arch_notm) + return ARM_FT_NORMAL | ARM_FT_STACKALIGN; + + /* No argument - default to IRQ. */ + if (argument == NULL_TREE) + return ARM_FT_ISR; + + /* Get the value of the argument. */ + if (TREE_VALUE (argument) == NULL_TREE + || TREE_CODE (TREE_VALUE (argument)) != STRING_CST) + return ARM_FT_UNKNOWN; + + arg = TREE_STRING_POINTER (TREE_VALUE (argument)); + + /* Check it against the list of known arguments. */ + for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++) + if (streq (arg, ptr->arg)) + return ptr->return_value; + + /* An unrecognized interrupt type. */ + return ARM_FT_UNKNOWN; +} + +/* Computes the type of the current function. */ + +static unsigned long +arm_compute_func_type (void) +{ + unsigned long type = ARM_FT_UNKNOWN; + tree a; + tree attr; + + gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL); + + /* Decide if the current function is volatile. Such functions + never return, and many memory cycles can be saved by not storing + register values that will never be needed again. This optimization + was added to speed up context switching in a kernel application. */ + if (optimize > 0 + && (TREE_NOTHROW (current_function_decl) + || !(flag_unwind_tables + || (flag_exceptions + && arm_except_unwind_info (&global_options) != UI_SJLJ))) + && TREE_THIS_VOLATILE (current_function_decl)) + type |= ARM_FT_VOLATILE; + + if (cfun->static_chain_decl != NULL) + type |= ARM_FT_NESTED; + + attr = DECL_ATTRIBUTES (current_function_decl); + + a = lookup_attribute ("naked", attr); + if (a != NULL_TREE) + type |= ARM_FT_NAKED; + + a = lookup_attribute ("isr", attr); + if (a == NULL_TREE) + a = lookup_attribute ("interrupt", attr); + + if (a == NULL_TREE) + type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL; + else + type |= arm_isr_value (TREE_VALUE (a)); + + return type; +} + +/* Returns the type of the current function. */ + +unsigned long +arm_current_func_type (void) +{ + if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN) + cfun->machine->func_type = arm_compute_func_type (); + + return cfun->machine->func_type; +} + +bool +arm_allocate_stack_slots_for_args (void) +{ + /* Naked functions should not allocate stack slots for arguments. */ + return !IS_NAKED (arm_current_func_type ()); +} + +static bool +arm_warn_func_return (tree decl) +{ + /* Naked functions are implemented entirely in assembly, including the + return sequence, so suppress warnings about this. */ + return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE; +} + + +/* Output assembler code for a block containing the constant parts + of a trampoline, leaving space for the variable parts. + + On the ARM, (if r8 is the static chain regnum, and remembering that + referencing pc adds an offset of 8) the trampoline looks like: + ldr r8, [pc, #0] + ldr pc, [pc] + .word static chain value + .word function's address + XXX FIXME: When the trampoline returns, r8 will be clobbered. */ + +static void +arm_asm_trampoline_template (FILE *f) +{ + if (TARGET_ARM) + { + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); + } + else if (TARGET_THUMB2) + { + /* The Thumb-2 trampoline is similar to the arm implementation. + Unlike 16-bit Thumb, we enter the stub in thumb mode. */ + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", + STATIC_CHAIN_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM); + } + else + { + ASM_OUTPUT_ALIGN (f, 2); + fprintf (f, "\t.code\t16\n"); + fprintf (f, ".Ltrampoline_start:\n"); + asm_fprintf (f, "\tpush\t{r0, r1}\n"); + asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); + asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM); + asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); + asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM); + asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM); + } + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); +} + +/* Emit RTL insns to initialize the variable parts of a trampoline. */ + +static void +arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr, mem, a_tramp; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12); + emit_move_insn (mem, chain_value); + + mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16); + fnaddr = XEXP (DECL_RTL (fndecl), 0); + emit_move_insn (mem, fnaddr); + + a_tramp = XEXP (m_tramp, 0); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), + LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode, + plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode); +} + +/* Thumb trampolines should be entered in thumb mode, so set + the bottom bit of the address. */ + +static rtx +arm_trampoline_adjust_address (rtx addr) +{ + if (TARGET_THUMB) + addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx, + NULL, 0, OPTAB_LIB_WIDEN); + return addr; +} + +/* Return 1 if it is possible to return using a single instruction. + If SIBLING is non-null, this is a test for a return before a sibling + call. SIBLING is the call insn, so we can examine its register usage. */ + +int +use_return_insn (int iscond, rtx sibling) +{ + int regno; + unsigned int func_type; + unsigned long saved_int_regs; + unsigned HOST_WIDE_INT stack_adjust; + arm_stack_offsets *offsets; + + /* Never use a return instruction before reload has run. */ + if (!reload_completed) + return 0; + + func_type = arm_current_func_type (); + + /* Naked, volatile and stack alignment functions need special + consideration. */ + if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN)) + return 0; + + /* So do interrupt functions that use the frame pointer and Thumb + interrupt functions. */ + if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB)) + return 0; + + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + return 0; + + offsets = arm_get_frame_offsets (); + stack_adjust = offsets->outgoing_args - offsets->saved_regs; + + /* As do variadic functions. */ + if (crtl->args.pretend_args_size + || cfun->machine->uses_anonymous_args + /* Or if the function calls __builtin_eh_return () */ + || crtl->calls_eh_return + /* Or if the function calls alloca */ + || cfun->calls_alloca + /* Or if there is a stack adjustment. However, if the stack pointer + is saved on the stack, we can use a pre-incrementing stack load. */ + || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed + && stack_adjust == 4))) + return 0; + + saved_int_regs = offsets->saved_regs_mask; + + /* Unfortunately, the insn + + ldmib sp, {..., sp, ...} + + triggers a bug on most SA-110 based devices, such that the stack + pointer won't be correctly restored if the instruction takes a + page fault. We work around this problem by popping r3 along with + the other registers, since that is never slower than executing + another instruction. + + We test for !arm_arch5 here, because code for any architecture + less than this could potentially be run on one of the buggy + chips. */ + if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM) + { + /* Validate that r3 is a call-clobbered register (always true in + the default abi) ... */ + if (!call_used_regs[3]) + return 0; + + /* ... that it isn't being used for a return value ... */ + if (arm_size_return_regs () >= (4 * UNITS_PER_WORD)) + return 0; + + /* ... or for a tail-call argument ... */ + if (sibling) + { + gcc_assert (CALL_P (sibling)); + + if (find_regno_fusage (sibling, USE, 3)) + return 0; + } + + /* ... and that there are no call-saved registers in r0-r2 + (always true in the default ABI). */ + if (saved_int_regs & 0x7) + return 0; + } + + /* Can't be done if interworking with Thumb, and any registers have been + stacked. */ + if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type)) + return 0; + + /* On StrongARM, conditional returns are expensive if they aren't + taken and multiple registers have been stacked. */ + if (iscond && arm_tune_strongarm) + { + /* Conditional return when just the LR is stored is a simple + conditional-load instruction, that's not expensive. */ + if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM)) + return 0; + + if (flag_pic + && arm_pic_register != INVALID_REGNUM + && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) + return 0; + } + + /* If there are saved registers but the LR isn't saved, then we need + two instructions for the return. */ + if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM))) + return 0; + + /* Can't be done if any of the VFP regs are pushed, + since this also requires an insn. */ + if (TARGET_HARD_FLOAT && TARGET_VFP) + for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++) + if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) + return 0; + + if (TARGET_REALLY_IWMMXT) + for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++) + if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) + return 0; + + return 1; +} + +/* Return TRUE if we should try to use a simple_return insn, i.e. perform + shrink-wrapping if possible. This is the case if we need to emit a + prologue, which we can test by looking at the offsets. */ +bool +use_simple_return_p (void) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + return offsets->outgoing_args != 0; +} + +/* Return TRUE if int I is a valid immediate ARM constant. */ + +int +const_ok_for_arm (HOST_WIDE_INT i) +{ + int lowbit; + + /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must + be all zero, or all one. */ + if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0 + && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) + != ((~(unsigned HOST_WIDE_INT) 0) + & ~(unsigned HOST_WIDE_INT) 0xffffffff))) + return FALSE; + + i &= (unsigned HOST_WIDE_INT) 0xffffffff; + + /* Fast return for 0 and small values. We must do this for zero, since + the code below can't handle that one case. */ + if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0) + return TRUE; + + /* Get the number of trailing zeros. */ + lowbit = ffs((int) i) - 1; + + /* Only even shifts are allowed in ARM mode so round down to the + nearest even number. */ + if (TARGET_ARM) + lowbit &= ~1; + + if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0) + return TRUE; + + if (TARGET_ARM) + { + /* Allow rotated constants in ARM mode. */ + if (lowbit <= 4 + && ((i & ~0xc000003f) == 0 + || (i & ~0xf000000f) == 0 + || (i & ~0xfc000003) == 0)) + return TRUE; + } + else + { + HOST_WIDE_INT v; + + /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */ + v = i & 0xff; + v |= v << 16; + if (i == v || i == (v | (v << 8))) + return TRUE; + + /* Allow repeated pattern 0xXY00XY00. */ + v = i & 0xff00; + v |= v << 16; + if (i == v) + return TRUE; + } + + return FALSE; +} + +/* Return true if I is a valid constant for the operation CODE. */ +int +const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) +{ + if (const_ok_for_arm (i)) + return 1; + + switch (code) + { + case SET: + /* See if we can use movw. */ + if (arm_arch_thumb2 && (i & 0xffff0000) == 0) + return 1; + else + /* Otherwise, try mvn. */ + return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); + + case PLUS: + /* See if we can use addw or subw. */ + if (TARGET_THUMB2 + && ((i & 0xfffff000) == 0 + || ((-i) & 0xfffff000) == 0)) + return 1; + /* else fall through. */ + + case COMPARE: + case EQ: + case NE: + case GT: + case LE: + case LT: + case GE: + case GEU: + case LTU: + case GTU: + case LEU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNLT: + case UNGT: + case UNLE: + return const_ok_for_arm (ARM_SIGN_EXTEND (-i)); + + case MINUS: /* Should only occur with (MINUS I reg) => rsb */ + case XOR: + return 0; + + case IOR: + if (TARGET_THUMB2) + return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); + return 0; + + case AND: + return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); + + default: + gcc_unreachable (); + } +} + +/* Return true if I is a valid di mode constant for the operation CODE. */ +int +const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) +{ + HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF; + HOST_WIDE_INT lo_val = i & 0xFFFFFFFF; + rtx hi = GEN_INT (hi_val); + rtx lo = GEN_INT (lo_val); + + if (TARGET_THUMB1) + return 0; + + switch (code) + { + case AND: + case IOR: + case XOR: + return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF) + && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF); + case PLUS: + return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode); + + default: + return 0; + } +} + +/* Emit a sequence of insns to handle a large constant. + CODE is the code of the operation required, it can be any of SET, PLUS, + IOR, AND, XOR, MINUS; + MODE is the mode in which the operation is being performed; + VAL is the integer to operate on; + SOURCE is the other operand (a register, or a null-pointer for SET); + SUBTARGETS means it is safe to create scratch registers if that will + either produce a simpler sequence, or we will want to cse the values. + Return value is the number of insns emitted. */ + +/* ??? Tweak this for thumb2. */ +int +arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, + HOST_WIDE_INT val, rtx target, rtx source, int subtargets) +{ + rtx cond; + + if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC) + cond = COND_EXEC_TEST (PATTERN (insn)); + else + cond = NULL_RTX; + + if (subtargets || code == SET + || (REG_P (target) && REG_P (source) + && REGNO (target) != REGNO (source))) + { + /* After arm_reorg has been called, we can't fix up expensive + constants by pushing them into memory so we must synthesize + them in-line, regardless of the cost. This is only likely to + be more costly on chips that have load delay slots and we are + compiling without running the scheduler (so no splitting + occurred before the final instruction emission). + + Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c + */ + if (!after_arm_reorg + && !cond + && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, + 1, 0) + > (arm_constant_limit (optimize_function_for_size_p (cfun)) + + (code != SET)))) + { + if (code == SET) + { + /* Currently SET is the only monadic value for CODE, all + the rest are diadic. */ + if (TARGET_USE_MOVT) + arm_emit_movpair (target, GEN_INT (val)); + else + emit_set_insn (target, GEN_INT (val)); + + return 1; + } + else + { + rtx temp = subtargets ? gen_reg_rtx (mode) : target; + + if (TARGET_USE_MOVT) + arm_emit_movpair (temp, GEN_INT (val)); + else + emit_set_insn (temp, GEN_INT (val)); + + /* For MINUS, the value is subtracted from, since we never + have subtraction of a constant. */ + if (code == MINUS) + emit_set_insn (target, gen_rtx_MINUS (mode, temp, source)); + else + emit_set_insn (target, + gen_rtx_fmt_ee (code, mode, source, temp)); + return 2; + } + } + } + + return arm_gen_constant (code, mode, cond, val, target, source, subtargets, + 1); +} + +/* Return a sequence of integers, in RETURN_SEQUENCE that fit into + ARM/THUMB2 immediates, and add up to VAL. + Thr function return value gives the number of insns required. */ +static int +optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, + struct four_ints *return_sequence) +{ + int best_consecutive_zeros = 0; + int i; + int best_start = 0; + int insns1, insns2; + struct four_ints tmp_sequence; + + /* If we aren't targeting ARM, the best place to start is always at + the bottom, otherwise look more closely. */ + if (TARGET_ARM) + { + for (i = 0; i < 32; i += 2) + { + int consecutive_zeros = 0; + + if (!(val & (3 << i))) + { + while ((i < 32) && !(val & (3 << i))) + { + consecutive_zeros += 2; + i += 2; + } + if (consecutive_zeros > best_consecutive_zeros) + { + best_consecutive_zeros = consecutive_zeros; + best_start = i - consecutive_zeros; + } + i -= 2; + } + } + } + + /* So long as it won't require any more insns to do so, it's + desirable to emit a small constant (in bits 0...9) in the last + insn. This way there is more chance that it can be combined with + a later addressing insn to form a pre-indexed load or store + operation. Consider: + + *((volatile int *)0xe0000100) = 1; + *((volatile int *)0xe0000110) = 2; + + We want this to wind up as: + + mov rA, #0xe0000000 + mov rB, #1 + str rB, [rA, #0x100] + mov rB, #2 + str rB, [rA, #0x110] + + rather than having to synthesize both large constants from scratch. + + Therefore, we calculate how many insns would be required to emit + the constant starting from `best_start', and also starting from + zero (i.e. with bit 31 first to be output). If `best_start' doesn't + yield a shorter sequence, we may as well use zero. */ + insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start); + if (best_start != 0 + && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val)) + { + insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0); + if (insns2 <= insns1) + { + *return_sequence = tmp_sequence; + insns1 = insns2; + } + } + + return insns1; +} + +/* As for optimal_immediate_sequence, but starting at bit-position I. */ +static int +optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val, + struct four_ints *return_sequence, int i) +{ + int remainder = val & 0xffffffff; + int insns = 0; + + /* Try and find a way of doing the job in either two or three + instructions. + + In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned + location. We start at position I. This may be the MSB, or + optimial_immediate_sequence may have positioned it at the largest block + of zeros that are aligned on a 2-bit boundary. We then fill up the temps, + wrapping around to the top of the word when we drop off the bottom. + In the worst case this code should produce no more than four insns. + + In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit + constants, shifted to any arbitrary location. We should always start + at the MSB. */ + do + { + int end; + unsigned int b1, b2, b3, b4; + unsigned HOST_WIDE_INT result; + int loc; + + gcc_assert (insns < 4); + + if (i <= 0) + i += 32; + + /* First, find the next normal 12/8-bit shifted/rotated immediate. */ + if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1))))) + { + loc = i; + if (i <= 12 && TARGET_THUMB2 && code == PLUS) + /* We can use addw/subw for the last 12 bits. */ + result = remainder; + else + { + /* Use an 8-bit shifted/rotated immediate. */ + end = i - 8; + if (end < 0) + end += 32; + result = remainder & ((0x0ff << end) + | ((i < end) ? (0xff >> (32 - end)) + : 0)); + i -= 8; + } + } + else + { + /* Arm allows rotates by a multiple of two. Thumb-2 allows + arbitrary shifts. */ + i -= TARGET_ARM ? 2 : 1; + continue; + } + + /* Next, see if we can do a better job with a thumb2 replicated + constant. + + We do it this way around to catch the cases like 0x01F001E0 where + two 8-bit immediates would work, but a replicated constant would + make it worse. + + TODO: 16-bit constants that don't clear all the bits, but still win. + TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */ + if (TARGET_THUMB2) + { + b1 = (remainder & 0xff000000) >> 24; + b2 = (remainder & 0x00ff0000) >> 16; + b3 = (remainder & 0x0000ff00) >> 8; + b4 = remainder & 0xff; + + if (loc > 24) + { + /* The 8-bit immediate already found clears b1 (and maybe b2), + but must leave b3 and b4 alone. */ + + /* First try to find a 32-bit replicated constant that clears + almost everything. We can assume that we can't do it in one, + or else we wouldn't be here. */ + unsigned int tmp = b1 & b2 & b3 & b4; + unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16) + + (tmp << 24); + unsigned int matching_bytes = (tmp == b1) + (tmp == b2) + + (tmp == b3) + (tmp == b4); + if (tmp + && (matching_bytes >= 3 + || (matching_bytes == 2 + && const_ok_for_op (remainder & ~tmp2, code)))) + { + /* At least 3 of the bytes match, and the fourth has at + least as many bits set, or two of the bytes match + and it will only require one more insn to finish. */ + result = tmp2; + i = tmp != b1 ? 32 + : tmp != b2 ? 24 + : tmp != b3 ? 16 + : 8; + } + + /* Second, try to find a 16-bit replicated constant that can + leave three of the bytes clear. If b2 or b4 is already + zero, then we can. If the 8-bit from above would not + clear b2 anyway, then we still win. */ + else if (b1 == b3 && (!b2 || !b4 + || (remainder & 0x00ff0000 & ~result))) + { + result = remainder & 0xff00ff00; + i = 24; + } + } + else if (loc > 16) + { + /* The 8-bit immediate already found clears b2 (and maybe b3) + and we don't get here unless b1 is alredy clear, but it will + leave b4 unchanged. */ + + /* If we can clear b2 and b4 at once, then we win, since the + 8-bits couldn't possibly reach that far. */ + if (b2 == b4) + { + result = remainder & 0x00ff00ff; + i = 16; + } + } + } + + return_sequence->i[insns++] = result; + remainder &= ~result; + + if (code == SET || code == MINUS) + code = PLUS; + } + while (remainder); + + return insns; +} + +/* Emit an instruction with the indicated PATTERN. If COND is + non-NULL, conditionalize the execution of the instruction on COND + being true. */ + +static void +emit_constant_insn (rtx cond, rtx pattern) +{ + if (cond) + pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern); + emit_insn (pattern); +} + +/* As above, but extra parameter GENERATE which, if clear, suppresses + RTL generation. */ + +static int +arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, + HOST_WIDE_INT val, rtx target, rtx source, int subtargets, + int generate) +{ + int can_invert = 0; + int can_negate = 0; + int final_invert = 0; + int i; + int set_sign_bit_copies = 0; + int clear_sign_bit_copies = 0; + int clear_zero_bit_copies = 0; + int set_zero_bit_copies = 0; + int insns = 0, neg_insns, inv_insns; + unsigned HOST_WIDE_INT temp1, temp2; + unsigned HOST_WIDE_INT remainder = val & 0xffffffff; + struct four_ints *immediates; + struct four_ints pos_immediates, neg_immediates, inv_immediates; + + /* Find out which operations are safe for a given CODE. Also do a quick + check for degenerate cases; these can occur when DImode operations + are split. */ + switch (code) + { + case SET: + can_invert = 1; + break; + + case PLUS: + can_negate = 1; + break; + + case IOR: + if (remainder == 0xffffffff) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + GEN_INT (ARM_SIGN_EXTEND (val)))); + return 1; + } + + if (remainder == 0) + { + if (reload_completed && rtx_equal_p (target, source)) + return 0; + + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } + break; + + case AND: + if (remainder == 0) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, const0_rtx)); + return 1; + } + if (remainder == 0xffffffff) + { + if (reload_completed && rtx_equal_p (target, source)) + return 0; + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } + can_invert = 1; + break; + + case XOR: + if (remainder == 0) + { + if (reload_completed && rtx_equal_p (target, source)) + return 0; + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, source)); + return 1; + } + + if (remainder == 0xffffffff) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, source))); + return 1; + } + final_invert = 1; + break; + + case MINUS: + /* We treat MINUS as (val - source), since (source - val) is always + passed as (source + (-val)). */ + if (remainder == 0) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NEG (mode, source))); + return 1; + } + if (const_ok_for_arm (val)) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_MINUS (mode, GEN_INT (val), + source))); + return 1; + } + + break; + + default: + gcc_unreachable (); + } + + /* If we can do it in one insn get out quickly. */ + if (const_ok_for_op (val, code)) + { + if (generate) + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + (source + ? gen_rtx_fmt_ee (code, mode, source, + GEN_INT (val)) + : GEN_INT (val)))); + return 1; + } + + /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single + insn. */ + if (code == AND && (i = exact_log2 (remainder + 1)) > 0 + && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode))) + { + if (generate) + { + if (mode == SImode && i == 16) + /* Use UXTH in preference to UBFX, since on Thumb2 it's a + smaller insn. */ + emit_constant_insn (cond, + gen_zero_extendhisi2 + (target, gen_lowpart (HImode, source))); + else + /* Extz only supports SImode, but we can coerce the operands + into that mode. */ + emit_constant_insn (cond, + gen_extzv_t2 (gen_lowpart (SImode, target), + gen_lowpart (SImode, source), + GEN_INT (i), const0_rtx)); + } + + return 1; + } + + /* Calculate a few attributes that may be useful for specific + optimizations. */ + /* Count number of leading zeros. */ + for (i = 31; i >= 0; i--) + { + if ((remainder & (1 << i)) == 0) + clear_sign_bit_copies++; + else + break; + } + + /* Count number of leading 1's. */ + for (i = 31; i >= 0; i--) + { + if ((remainder & (1 << i)) != 0) + set_sign_bit_copies++; + else + break; + } + + /* Count number of trailing zero's. */ + for (i = 0; i <= 31; i++) + { + if ((remainder & (1 << i)) == 0) + clear_zero_bit_copies++; + else + break; + } + + /* Count number of trailing 1's. */ + for (i = 0; i <= 31; i++) + { + if ((remainder & (1 << i)) != 0) + set_zero_bit_copies++; + else + break; + } + + switch (code) + { + case SET: + /* See if we can do this by sign_extending a constant that is known + to be negative. This is a good, way of doing it, since the shift + may well merge into a subsequent insn. */ + if (set_sign_bit_copies > 1) + { + if (const_ok_for_arm + (temp1 = ARM_SIGN_EXTEND (remainder + << (set_sign_bit_copies - 1)))) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + GEN_INT (temp1))); + emit_constant_insn (cond, + gen_ashrsi3 (target, new_src, + GEN_INT (set_sign_bit_copies - 1))); + } + return 2; + } + /* For an inverted constant, we will need to set the low bits, + these will be shifted out of harm's way. */ + temp1 |= (1 << (set_sign_bit_copies - 1)) - 1; + if (const_ok_for_arm (~temp1)) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + GEN_INT (temp1))); + emit_constant_insn (cond, + gen_ashrsi3 (target, new_src, + GEN_INT (set_sign_bit_copies - 1))); + } + return 2; + } + } + + /* See if we can calculate the value as the difference between two + valid immediates. */ + if (clear_sign_bit_copies + clear_zero_bit_copies <= 16) + { + int topshift = clear_sign_bit_copies & ~1; + + temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift)) + & (0xff000000 >> topshift)); + + /* If temp1 is zero, then that means the 9 most significant + bits of remainder were 1 and we've caused it to overflow. + When topshift is 0 we don't need to do anything since we + can borrow from 'bit 32'. */ + if (temp1 == 0 && topshift != 0) + temp1 = 0x80000000 >> (topshift - 1); + + temp2 = ARM_SIGN_EXTEND (temp1 - remainder); + + if (const_ok_for_arm (temp2)) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + GEN_INT (temp1))); + emit_constant_insn (cond, + gen_addsi3 (target, new_src, + GEN_INT (-temp2))); + } + + return 2; + } + } + + /* See if we can generate this by setting the bottom (or the top) + 16 bits, and then shifting these into the other half of the + word. We only look for the simplest cases, to do more would cost + too much. Be careful, however, not to generate this when the + alternative would take fewer insns. */ + if (val & 0xffff0000) + { + temp1 = remainder & 0xffff0000; + temp2 = remainder & 0x0000ffff; + + /* Overlaps outside this range are best done using other methods. */ + for (i = 9; i < 24; i++) + { + if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder) + && !const_ok_for_arm (temp2)) + { + rtx new_src = (subtargets + ? (generate ? gen_reg_rtx (mode) : NULL_RTX) + : target); + insns = arm_gen_constant (code, mode, cond, temp2, new_src, + source, subtargets, generate); + source = new_src; + if (generate) + emit_constant_insn + (cond, + gen_rtx_SET + (VOIDmode, target, + gen_rtx_IOR (mode, + gen_rtx_ASHIFT (mode, source, + GEN_INT (i)), + source))); + return insns + 1; + } + } + + /* Don't duplicate cases already considered. */ + for (i = 17; i < 24; i++) + { + if (((temp1 | (temp1 >> i)) == remainder) + && !const_ok_for_arm (temp1)) + { + rtx new_src = (subtargets + ? (generate ? gen_reg_rtx (mode) : NULL_RTX) + : target); + insns = arm_gen_constant (code, mode, cond, temp1, new_src, + source, subtargets, generate); + source = new_src; + if (generate) + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_IOR + (mode, + gen_rtx_LSHIFTRT (mode, source, + GEN_INT (i)), + source))); + return insns + 1; + } + } + } + break; + + case IOR: + case XOR: + /* If we have IOR or XOR, and the constant can be loaded in a + single instruction, and we can find a temporary to put it in, + then this can be done in two instructions instead of 3-4. */ + if (subtargets + /* TARGET can't be NULL if SUBTARGETS is 0 */ + || (reload_completed && !reg_mentioned_p (target, source))) + { + if (const_ok_for_arm (ARM_SIGN_EXTEND (~val))) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, sub, + GEN_INT (val))); + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_fmt_ee (code, mode, + source, sub))); + } + return 2; + } + } + + if (code == XOR) + break; + + /* Convert. + x = y | constant ( which is composed of set_sign_bit_copies of leading 1s + and the remainder 0s for e.g. 0xfff00000) + x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies) + + This can be done in 2 instructions by using shifts with mov or mvn. + e.g. for + x = x | 0xfff00000; + we generate. + mvn r0, r0, asl #12 + mvn r0, r0, lsr #12 */ + if (set_sign_bit_copies > 8 + && (val & (-1 << (32 - set_sign_bit_copies))) == val) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (set_sign_bit_copies); + + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_NOT (mode, + gen_rtx_ASHIFT (mode, + source, + shift)))); + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, + gen_rtx_LSHIFTRT (mode, sub, + shift)))); + } + return 2; + } + + /* Convert + x = y | constant (which has set_zero_bit_copies number of trailing ones). + to + x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies). + + For eg. r0 = r0 | 0xfff + mvn r0, r0, lsr #12 + mvn r0, r0, asl #12 + + */ + if (set_zero_bit_copies > 8 + && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (set_zero_bit_copies); + + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_NOT (mode, + gen_rtx_LSHIFTRT (mode, + source, + shift)))); + emit_constant_insn + (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, + gen_rtx_ASHIFT (mode, sub, + shift)))); + } + return 2; + } + + /* This will never be reached for Thumb2 because orn is a valid + instruction. This is for Thumb1 and the ARM 32 bit cases. + + x = y | constant (such that ~constant is a valid constant) + Transform this to + x = ~(~y & ~constant). + */ + if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val))) + { + if (generate) + { + rtx sub = subtargets ? gen_reg_rtx (mode) : target; + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_NOT (mode, source))); + source = sub; + if (subtargets) + sub = gen_reg_rtx (mode); + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, sub, + gen_rtx_AND (mode, source, + GEN_INT (temp1)))); + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, sub))); + } + return 3; + } + break; + + case AND: + /* See if two shifts will do 2 or more insn's worth of work. */ + if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24) + { + HOST_WIDE_INT shift_mask = ((0xffffffff + << (32 - clear_sign_bit_copies)) + & 0xffffffff); + + if ((remainder | shift_mask) != 0xffffffff) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + new_src, source, subtargets, 1); + source = new_src; + } + else + { + rtx targ = subtargets ? NULL_RTX : target; + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + targ, source, subtargets, 0); + } + } + + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (clear_sign_bit_copies); + + emit_insn (gen_ashlsi3 (new_src, source, shift)); + emit_insn (gen_lshrsi3 (target, new_src, shift)); + } + + return insns + 2; + } + + if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24) + { + HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1; + + if ((remainder | shift_mask) != 0xffffffff) + { + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + new_src, source, subtargets, 1); + source = new_src; + } + else + { + rtx targ = subtargets ? NULL_RTX : target; + + insns = arm_gen_constant (AND, mode, cond, + remainder | shift_mask, + targ, source, subtargets, 0); + } + } + + if (generate) + { + rtx new_src = subtargets ? gen_reg_rtx (mode) : target; + rtx shift = GEN_INT (clear_zero_bit_copies); + + emit_insn (gen_lshrsi3 (new_src, source, shift)); + emit_insn (gen_ashlsi3 (target, new_src, shift)); + } + + return insns + 2; + } + + break; + + default: + break; + } + + /* Calculate what the instruction sequences would be if we generated it + normally, negated, or inverted. */ + if (code == AND) + /* AND cannot be split into multiple insns, so invert and use BIC. */ + insns = 99; + else + insns = optimal_immediate_sequence (code, remainder, &pos_immediates); + + if (can_negate) + neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff, + &neg_immediates); + else + neg_insns = 99; + + if (can_invert || final_invert) + inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff, + &inv_immediates); + else + inv_insns = 99; + + immediates = &pos_immediates; + + /* Is the negated immediate sequence more efficient? */ + if (neg_insns < insns && neg_insns <= inv_insns) + { + insns = neg_insns; + immediates = &neg_immediates; + } + else + can_negate = 0; + + /* Is the inverted immediate sequence more efficient? + We must allow for an extra NOT instruction for XOR operations, although + there is some chance that the final 'mvn' will get optimized later. */ + if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns)) + { + insns = inv_insns; + immediates = &inv_immediates; + } + else + { + can_invert = 0; + final_invert = 0; + } + + /* Now output the chosen sequence as instructions. */ + if (generate) + { + for (i = 0; i < insns; i++) + { + rtx new_src, temp1_rtx; + + temp1 = immediates->i[i]; + + if (code == SET || code == MINUS) + new_src = (subtargets ? gen_reg_rtx (mode) : target); + else if ((final_invert || i < (insns - 1)) && subtargets) + new_src = gen_reg_rtx (mode); + else + new_src = target; + + if (can_invert) + temp1 = ~temp1; + else if (can_negate) + temp1 = -temp1; + + temp1 = trunc_int_for_mode (temp1, mode); + temp1_rtx = GEN_INT (temp1); + + if (code == SET) + ; + else if (code == MINUS) + temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); + else + temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); + + emit_constant_insn (cond, + gen_rtx_SET (VOIDmode, new_src, + temp1_rtx)); + source = new_src; + + if (code == SET) + { + can_negate = can_invert; + can_invert = 0; + code = PLUS; + } + else if (code == MINUS) + code = PLUS; + } + } + + if (final_invert) + { + if (generate) + emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, + gen_rtx_NOT (mode, source))); + insns++; + } + + return insns; +} + +/* Canonicalize a comparison so that we are more likely to recognize it. + This can be done for a few constant compares, where we can make the + immediate value easier to load. */ + +static void +arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, + bool op0_preserve_value) +{ + enum machine_mode mode; + unsigned HOST_WIDE_INT i, maxval; + + mode = GET_MODE (*op0); + if (mode == VOIDmode) + mode = GET_MODE (*op1); + + maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; + + /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode + we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either + reversed or (for constant OP1) adjusted to GE/LT. Similarly + for GTU/LEU in Thumb mode. */ + if (mode == DImode) + { + rtx tem; + + if (*code == GT || *code == LE + || (!TARGET_ARM && (*code == GTU || *code == LEU))) + { + /* Missing comparison. First try to use an available + comparison. */ + if (CONST_INT_P (*op1)) + { + i = INTVAL (*op1); + switch (*code) + { + case GT: + case LE: + if (i != maxval + && arm_const_double_by_immediates (GEN_INT (i + 1))) + { + *op1 = GEN_INT (i + 1); + *code = *code == GT ? GE : LT; + return; + } + break; + case GTU: + case LEU: + if (i != ~((unsigned HOST_WIDE_INT) 0) + && arm_const_double_by_immediates (GEN_INT (i + 1))) + { + *op1 = GEN_INT (i + 1); + *code = *code == GTU ? GEU : LTU; + return; + } + break; + default: + gcc_unreachable (); + } + } + + /* If that did not work, reverse the condition. */ + if (!op0_preserve_value) + { + tem = *op0; + *op0 = *op1; + *op1 = tem; + *code = (int)swap_condition ((enum rtx_code)*code); + } + } + return; + } + + /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing + with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)), + to facilitate possible combining with a cmp into 'ands'. */ + if (mode == SImode + && GET_CODE (*op0) == ZERO_EXTEND + && GET_CODE (XEXP (*op0, 0)) == SUBREG + && GET_MODE (XEXP (*op0, 0)) == QImode + && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode + && subreg_lowpart_p (XEXP (*op0, 0)) + && *op1 == const0_rtx) + *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)), + GEN_INT (255)); + + /* Comparisons smaller than DImode. Only adjust comparisons against + an out-of-range constant. */ + if (!CONST_INT_P (*op1) + || const_ok_for_arm (INTVAL (*op1)) + || const_ok_for_arm (- INTVAL (*op1))) + return; + + i = INTVAL (*op1); + + switch (*code) + { + case EQ: + case NE: + return; + + case GT: + case LE: + if (i != maxval + && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) + { + *op1 = GEN_INT (i + 1); + *code = *code == GT ? GE : LT; + return; + } + break; + + case GE: + case LT: + if (i != ~maxval + && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) + { + *op1 = GEN_INT (i - 1); + *code = *code == GE ? GT : LE; + return; + } + break; + + case GTU: + case LEU: + if (i != ~((unsigned HOST_WIDE_INT) 0) + && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) + { + *op1 = GEN_INT (i + 1); + *code = *code == GTU ? GEU : LTU; + return; + } + break; + + case GEU: + case LTU: + if (i != 0 + && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) + { + *op1 = GEN_INT (i - 1); + *code = *code == GEU ? GTU : LEU; + return; + } + break; + + default: + gcc_unreachable (); + } +} + + +/* Define how to find the value returned by a function. */ + +static rtx +arm_function_value(const_tree type, const_tree func, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode; + int unsignedp ATTRIBUTE_UNUSED; + rtx r ATTRIBUTE_UNUSED; + + mode = TYPE_MODE (type); + + if (TARGET_AAPCS_BASED) + return aapcs_allocate_return_reg (mode, type, func); + + /* Promote integer types. */ + if (INTEGRAL_TYPE_P (type)) + mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1); + + /* Promotes small structs returned in a register to full-word size + for big-endian AAPCS. */ + if (arm_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size % UNITS_PER_WORD != 0) + { + size += UNITS_PER_WORD - size % UNITS_PER_WORD; + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + } + + return arm_libcall_value_1 (mode); +} + +/* libcall hashtable helpers. */ + +struct libcall_hasher : typed_noop_remove +{ + typedef rtx_def value_type; + typedef rtx_def compare_type; + static inline hashval_t hash (const value_type *); + static inline bool equal (const value_type *, const compare_type *); + static inline void remove (value_type *); +}; + +inline bool +libcall_hasher::equal (const value_type *p1, const compare_type *p2) +{ + return rtx_equal_p (p1, p2); +} + +inline hashval_t +libcall_hasher::hash (const value_type *p1) +{ + return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE); +} + +typedef hash_table libcall_table_type; + +static void +add_libcall (libcall_table_type htab, rtx libcall) +{ + *htab.find_slot (libcall, INSERT) = libcall; +} + +static bool +arm_libcall_uses_aapcs_base (const_rtx libcall) +{ + static bool init_done = false; + static libcall_table_type libcall_htab; + + if (!init_done) + { + init_done = true; + + libcall_htab.create (31); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, SFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, DFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, SFmode, DImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfloat_optab, DFmode, DImode)); + + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, SFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, DFmode, SImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, SFmode, DImode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufloat_optab, DFmode, DImode)); + + add_libcall (libcall_htab, + convert_optab_libfunc (sext_optab, SFmode, HFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (trunc_optab, HFmode, SFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfix_optab, SImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufix_optab, SImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfix_optab, DImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufix_optab, DImode, DFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (sfix_optab, DImode, SFmode)); + add_libcall (libcall_htab, + convert_optab_libfunc (ufix_optab, DImode, SFmode)); + + /* Values from double-precision helper functions are returned in core + registers if the selected core only supports single-precision + arithmetic, even if we are using the hard-float ABI. The same is + true for single-precision helpers, but we will never be using the + hard-float ABI on a CPU which doesn't support single-precision + operations in hardware. */ + add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode)); + add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode)); + add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode, + SFmode)); + add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode, + DFmode)); + } + + return libcall && libcall_htab.find (libcall) != NULL; +} + +static rtx +arm_libcall_value_1 (enum machine_mode mode) +{ + if (TARGET_AAPCS_BASED) + return aapcs_libcall_value (mode); + else if (TARGET_IWMMXT_ABI + && arm_vector_mode_supported_p (mode)) + return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM); + else + return gen_rtx_REG (mode, ARG_REGISTER (1)); +} + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +static rtx +arm_libcall_value (enum machine_mode mode, const_rtx libcall) +{ + if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS + && GET_MODE_CLASS (mode) == MODE_FLOAT) + { + /* The following libcalls return their result in integer registers, + even though they return a floating point value. */ + if (arm_libcall_uses_aapcs_base (libcall)) + return gen_rtx_REG (mode, ARG_REGISTER(1)); + + } + + return arm_libcall_value_1 (mode); +} + +/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +arm_function_value_regno_p (const unsigned int regno) +{ + if (regno == ARG_REGISTER (1) + || (TARGET_32BIT + && TARGET_AAPCS_BASED + && TARGET_VFP + && TARGET_HARD_FLOAT + && regno == FIRST_VFP_REGNUM) + || (TARGET_IWMMXT_ABI + && regno == FIRST_IWMMXT_REGNUM)) + return true; + + return false; +} + +/* Determine the amount of memory needed to store the possible return + registers of an untyped call. */ +int +arm_apply_result_size (void) +{ + int size = 16; + + if (TARGET_32BIT) + { + if (TARGET_HARD_FLOAT_ABI && TARGET_VFP) + size += 32; + if (TARGET_IWMMXT_ABI) + size += 8; + } + + return size; +} + +/* Decide whether TYPE should be returned in memory (true) + or in a register (false). FNTYPE is the type of the function making + the call. */ +static bool +arm_return_in_memory (const_tree type, const_tree fntype) +{ + HOST_WIDE_INT size; + + size = int_size_in_bytes (type); /* Negative if not fixed size. */ + + if (TARGET_AAPCS_BASED) + { + /* Simple, non-aggregate types (ie not including vectors and + complex) are always returned in a register (or registers). + We don't care about which register here, so we can short-cut + some of the detail. */ + if (!AGGREGATE_TYPE_P (type) + && TREE_CODE (type) != VECTOR_TYPE + && TREE_CODE (type) != COMPLEX_TYPE) + return false; + + /* Any return value that is no larger than one word can be + returned in r0. */ + if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) + return false; + + /* Check any available co-processors to see if they accept the + type as a register candidate (VFP, for example, can return + some aggregates in consecutive registers). These aren't + available if the call is variadic. */ + if (aapcs_select_return_coproc (type, fntype) >= 0) + return false; + + /* Vector values should be returned using ARM registers, not + memory (unless they're over 16 bytes, which will break since + we only have four call-clobbered registers to play with). */ + if (TREE_CODE (type) == VECTOR_TYPE) + return (size < 0 || size > (4 * UNITS_PER_WORD)); + + /* The rest go in memory. */ + return true; + } + + if (TREE_CODE (type) == VECTOR_TYPE) + return (size < 0 || size > (4 * UNITS_PER_WORD)); + + if (!AGGREGATE_TYPE_P (type) && + (TREE_CODE (type) != VECTOR_TYPE)) + /* All simple types are returned in registers. */ + return false; + + if (arm_abi != ARM_ABI_APCS) + { + /* ATPCS and later return aggregate types in memory only if they are + larger than a word (or are variable size). */ + return (size < 0 || size > UNITS_PER_WORD); + } + + /* For the arm-wince targets we choose to be compatible with Microsoft's + ARM and Thumb compilers, which always return aggregates in memory. */ +#ifndef ARM_WINCE + /* All structures/unions bigger than one word are returned in memory. + Also catch the case where int_size_in_bytes returns -1. In this case + the aggregate is either huge or of variable size, and in either case + we will want to return it via memory and not in a register. */ + if (size < 0 || size > UNITS_PER_WORD) + return true; + + if (TREE_CODE (type) == RECORD_TYPE) + { + tree field; + + /* For a struct the APCS says that we only return in a register + if the type is 'integer like' and every addressable element + has an offset of zero. For practical purposes this means + that the structure can have at most one non bit-field element + and that this element must be the first one in the structure. */ + + /* Find the first field, ignoring non FIELD_DECL things which will + have been created by C++. */ + for (field = TYPE_FIELDS (type); + field && TREE_CODE (field) != FIELD_DECL; + field = DECL_CHAIN (field)) + continue; + + if (field == NULL) + return false; /* An empty structure. Allowed by an extension to ANSI C. */ + + /* Check that the first field is valid for returning in a register. */ + + /* ... Floats are not allowed */ + if (FLOAT_TYPE_P (TREE_TYPE (field))) + return true; + + /* ... Aggregates that are not themselves valid for returning in + a register are not allowed. */ + if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) + return true; + + /* Now check the remaining fields, if any. Only bitfields are allowed, + since they are not addressable. */ + for (field = DECL_CHAIN (field); + field; + field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + if (!DECL_BIT_FIELD_TYPE (field)) + return true; + } + + return false; + } + + if (TREE_CODE (type) == UNION_TYPE) + { + tree field; + + /* Unions can be returned in registers if every element is + integral, or can be returned in an integer register. */ + for (field = TYPE_FIELDS (type); + field; + field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + if (FLOAT_TYPE_P (TREE_TYPE (field))) + return true; + + if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) + return true; + } + + return false; + } +#endif /* not ARM_WINCE */ + + /* Return all other types in memory. */ + return true; +} + +const struct pcs_attribute_arg +{ + const char *arg; + enum arm_pcs value; +} pcs_attribute_args[] = + { + {"aapcs", ARM_PCS_AAPCS}, + {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, +#if 0 + /* We could recognize these, but changes would be needed elsewhere + * to implement them. */ + {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, + {"atpcs", ARM_PCS_ATPCS}, + {"apcs", ARM_PCS_APCS}, +#endif + {NULL, ARM_PCS_UNKNOWN} + }; + +static enum arm_pcs +arm_pcs_from_attribute (tree attr) +{ + const struct pcs_attribute_arg *ptr; + const char *arg; + + /* Get the value of the argument. */ + if (TREE_VALUE (attr) == NULL_TREE + || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) + return ARM_PCS_UNKNOWN; + + arg = TREE_STRING_POINTER (TREE_VALUE (attr)); + + /* Check it against the list of known arguments. */ + for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) + if (streq (arg, ptr->arg)) + return ptr->value; + + /* An unrecognized interrupt type. */ + return ARM_PCS_UNKNOWN; +} + +/* Get the PCS variant to use for this call. TYPE is the function's type + specification, DECL is the specific declartion. DECL may be null if + the call could be indirect or if this is a library call. */ +static enum arm_pcs +arm_get_pcs_model (const_tree type, const_tree decl) +{ + bool user_convention = false; + enum arm_pcs user_pcs = arm_pcs_default; + tree attr; + + gcc_assert (type); + + attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); + if (attr) + { + user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); + user_convention = true; + } + + if (TARGET_AAPCS_BASED) + { + /* Detect varargs functions. These always use the base rules + (no argument is ever a candidate for a co-processor + register). */ + bool base_rules = stdarg_p (type); + + if (user_convention) + { + if (user_pcs > ARM_PCS_AAPCS_LOCAL) + sorry ("non-AAPCS derived PCS variant"); + else if (base_rules && user_pcs != ARM_PCS_AAPCS) + error ("variadic functions must use the base AAPCS variant"); + } + + if (base_rules) + return ARM_PCS_AAPCS; + else if (user_convention) + return user_pcs; + else if (decl && flag_unit_at_a_time) + { + /* Local functions never leak outside this compilation unit, + so we are free to use whatever conventions are + appropriate. */ + /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ + struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); + if (i && i->local) + return ARM_PCS_AAPCS_LOCAL; + } + } + else if (user_convention && user_pcs != arm_pcs_default) + sorry ("PCS variant"); + + /* For everything else we use the target's default. */ + return arm_pcs_default; +} + + +static void +aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + rtx libcall ATTRIBUTE_UNUSED, + const_tree fndecl ATTRIBUTE_UNUSED) +{ + /* Record the unallocated VFP registers. */ + pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; + pcum->aapcs_vfp_reg_alloc = 0; +} + +/* Walk down the type tree of TYPE counting consecutive base elements. + If *MODEP is VOIDmode, then set it to the first valid floating point + type. If a non-floating point type is found, or if a floating point + type that doesn't match a non-VOIDmode *MODEP is found, then return -1, + otherwise return the count in the sub-tree. */ +static int +aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) +{ + enum machine_mode mode; + HOST_WIDE_INT size; + + switch (TREE_CODE (type)) + { + case REAL_TYPE: + mode = TYPE_MODE (type); + if (mode != DFmode && mode != SFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 1; + + break; + + case COMPLEX_TYPE: + mode = TYPE_MODE (TREE_TYPE (type)); + if (mode != DFmode && mode != SFmode) + return -1; + + if (*modep == VOIDmode) + *modep = mode; + + if (*modep == mode) + return 2; + + break; + + case VECTOR_TYPE: + /* Use V2SImode and V4SImode as representatives of all 64-bit + and 128-bit vector types, whether or not those modes are + supported with the present options. */ + size = int_size_in_bytes (type); + switch (size) + { + case 8: + mode = V2SImode; + break; + case 16: + mode = V4SImode; + break; + default: + return -1; + } + + if (*modep == VOIDmode) + *modep = mode; + + /* Vector modes are considered to be opaque: two vectors are + equivalent for the purposes of being homogeneous aggregates + if they are the same size. */ + if (*modep == mode) + return 1; + + break; + + case ARRAY_TYPE: + { + int count; + tree index = TYPE_DOMAIN (type); + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P (type)) + return -1; + + count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); + if (count == -1 + || !index + || !TYPE_MAX_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) + || !TYPE_MIN_VALUE (index) + || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) + || count < 0) + return -1; + + count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) + - tree_to_uhwi (TYPE_MIN_VALUE (index))); + + /* There must be no padding. */ + if (!tree_fits_uhwi_p (TYPE_SIZE (type)) + || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case RECORD_TYPE: + { + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P (type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count += sub_count; + } + + /* There must be no padding. */ + if (!tree_fits_uhwi_p (TYPE_SIZE (type)) + || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + case UNION_TYPE: + case QUAL_UNION_TYPE: + { + /* These aren't very interesting except in a degenerate case. */ + int count = 0; + int sub_count; + tree field; + + /* Can't handle incomplete types. */ + if (!COMPLETE_TYPE_P (type)) + return -1; + + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + if (TREE_CODE (field) != FIELD_DECL) + continue; + + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); + if (sub_count < 0) + return -1; + count = count > sub_count ? count : sub_count; + } + + /* There must be no padding. */ + if (!tree_fits_uhwi_p (TYPE_SIZE (type)) + || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type)) + != count * GET_MODE_BITSIZE (*modep))) + return -1; + + return count; + } + + default: + break; + } + + return -1; +} + +/* Return true if PCS_VARIANT should use VFP registers. */ +static bool +use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) +{ + if (pcs_variant == ARM_PCS_AAPCS_VFP) + { + static bool seen_thumb1_vfp = false; + + if (TARGET_THUMB1 && !seen_thumb1_vfp) + { + sorry ("Thumb-1 hard-float VFP ABI"); + /* sorry() is not immediately fatal, so only display this once. */ + seen_thumb1_vfp = true; + } + + return true; + } + + if (pcs_variant != ARM_PCS_AAPCS_LOCAL) + return false; + + return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && + (TARGET_VFP_DOUBLE || !is_double)); +} + +/* Return true if an argument whose type is TYPE, or mode is MODE, is + suitable for passing or returning in VFP registers for the PCS + variant selected. If it is, then *BASE_MODE is updated to contain + a machine mode describing each element of the argument's type and + *COUNT to hold the number of such elements. */ +static bool +aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type, + enum machine_mode *base_mode, int *count) +{ + enum machine_mode new_mode = VOIDmode; + + /* If we have the type information, prefer that to working things + out from the mode. */ + if (type) + { + int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); + + if (ag_count > 0 && ag_count <= 4) + *count = ag_count; + else + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT + || GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + *count = 1; + new_mode = mode; + } + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + { + *count = 2; + new_mode = (mode == DCmode ? DFmode : SFmode); + } + else + return false; + + + if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) + return false; + + *base_mode = new_mode; + return true; +} + +static bool +aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, + enum machine_mode mode, const_tree type) +{ + int count ATTRIBUTE_UNUSED; + enum machine_mode ag_mode ATTRIBUTE_UNUSED; + + if (!use_vfp_abi (pcs_variant, false)) + return false; + return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, + &ag_mode, &count); +} + +static bool +aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type) +{ + if (!use_vfp_abi (pcum->pcs_variant, false)) + return false; + + return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, + &pcum->aapcs_vfp_rmode, + &pcum->aapcs_vfp_rcount); +} + +static bool +aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED) +{ + int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); + unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; + int regno; + + for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) + if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) + { + pcum->aapcs_vfp_reg_alloc = mask << regno; + if (mode == BLKmode + || (mode == TImode && ! TARGET_NEON) + || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode)) + { + int i; + int rcount = pcum->aapcs_vfp_rcount; + int rshift = shift; + enum machine_mode rmode = pcum->aapcs_vfp_rmode; + rtx par; + if (!TARGET_NEON) + { + /* Avoid using unsupported vector modes. */ + if (rmode == V2SImode) + rmode = DImode; + else if (rmode == V4SImode) + { + rmode = DImode; + rcount *= 2; + rshift /= 2; + } + } + par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); + for (i = 0; i < rcount; i++) + { + rtx tmp = gen_rtx_REG (rmode, + FIRST_VFP_REGNUM + regno + i * rshift); + tmp = gen_rtx_EXPR_LIST + (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (rmode))); + XVECEXP (par, 0, i) = tmp; + } + + pcum->aapcs_reg = par; + } + else + pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); + return true; + } + return false; +} + +static rtx +aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, + enum machine_mode mode, + const_tree type ATTRIBUTE_UNUSED) +{ + if (!use_vfp_abi (pcs_variant, false)) + return NULL; + + if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) + { + int count; + enum machine_mode ag_mode; + int i; + rtx par; + int shift; + + aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, + &ag_mode, &count); + + if (!TARGET_NEON) + { + if (ag_mode == V2SImode) + ag_mode = DImode; + else if (ag_mode == V4SImode) + { + ag_mode = DImode; + count *= 2; + } + } + shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); + par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); + for (i = 0; i < count; i++) + { + rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, + GEN_INT (i * GET_MODE_SIZE (ag_mode))); + XVECEXP (par, 0, i) = tmp; + } + + return par; + } + + return gen_rtx_REG (mode, FIRST_VFP_REGNUM); +} + +static void +aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED) +{ + pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; + pcum->aapcs_vfp_reg_alloc = 0; + return; +} + +#define AAPCS_CP(X) \ + { \ + aapcs_ ## X ## _cum_init, \ + aapcs_ ## X ## _is_call_candidate, \ + aapcs_ ## X ## _allocate, \ + aapcs_ ## X ## _is_return_candidate, \ + aapcs_ ## X ## _allocate_return_reg, \ + aapcs_ ## X ## _advance \ + } + +/* Table of co-processors that can be used to pass arguments in + registers. Idealy no arugment should be a candidate for more than + one co-processor table entry, but the table is processed in order + and stops after the first match. If that entry then fails to put + the argument into a co-processor register, the argument will go on + the stack. */ +static struct +{ + /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ + void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); + + /* Return true if an argument of mode MODE (or type TYPE if MODE is + BLKmode) is a candidate for this co-processor's registers; this + function should ignore any position-dependent state in + CUMULATIVE_ARGS and only use call-type dependent information. */ + bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + + /* Return true if the argument does get a co-processor register; it + should set aapcs_reg to an RTX of the register allocated as is + required for a return from FUNCTION_ARG. */ + bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); + + /* Return true if a result of mode MODE (or type TYPE if MODE is + BLKmode) is can be returned in this co-processor's registers. */ + bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); + + /* Allocate and return an RTX element to hold the return type of a + call, this routine must not fail and will only be called if + is_return_candidate returned true with the same parameters. */ + rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); + + /* Finish processing this argument and prepare to start processing + the next one. */ + void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); +} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = + { + AAPCS_CP(vfp) + }; + +#undef AAPCS_CP + +static int +aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type) +{ + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) + return i; + + return -1; +} + +static int +aapcs_select_return_coproc (const_tree type, const_tree fntype) +{ + /* We aren't passed a decl, so we can't check that a call is local. + However, it isn't clear that that would be a win anyway, since it + might limit some tail-calling opportunities. */ + enum arm_pcs pcs_variant; + + if (fntype) + { + const_tree fndecl = NULL_TREE; + + if (TREE_CODE (fntype) == FUNCTION_DECL) + { + fndecl = fntype; + fntype = TREE_TYPE (fntype); + } + + pcs_variant = arm_get_pcs_model (fntype, fndecl); + } + else + pcs_variant = arm_pcs_default; + + if (pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, + TYPE_MODE (type), + type)) + return i; + } + return -1; +} + +static rtx +aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, + const_tree fntype) +{ + /* We aren't passed a decl, so we can't check that a call is local. + However, it isn't clear that that would be a win anyway, since it + might limit some tail-calling opportunities. */ + enum arm_pcs pcs_variant; + int unsignedp ATTRIBUTE_UNUSED; + + if (fntype) + { + const_tree fndecl = NULL_TREE; + + if (TREE_CODE (fntype) == FUNCTION_DECL) + { + fndecl = fntype; + fntype = TREE_TYPE (fntype); + } + + pcs_variant = arm_get_pcs_model (fntype, fndecl); + } + else + pcs_variant = arm_pcs_default; + + /* Promote integer types. */ + if (type && INTEGRAL_TYPE_P (type)) + mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1); + + if (pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, + type)) + return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, + mode, type); + } + + /* Promotes small structs returned in a register to full-word size + for big-endian AAPCS. */ + if (type && arm_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size % UNITS_PER_WORD != 0) + { + size += UNITS_PER_WORD - size % UNITS_PER_WORD; + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); + } + } + + return gen_rtx_REG (mode, R0_REGNUM); +} + +static rtx +aapcs_libcall_value (enum machine_mode mode) +{ + if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode) + && GET_MODE_SIZE (mode) <= 4) + mode = SImode; + + return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); +} + +/* Lay out a function argument using the AAPCS rules. The rule + numbers referred to here are those in the AAPCS. */ +static void +aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, + const_tree type, bool named) +{ + int nregs, nregs2; + int ncrn; + + /* We only need to do this once per argument. */ + if (pcum->aapcs_arg_processed) + return; + + pcum->aapcs_arg_processed = true; + + /* Special case: if named is false then we are handling an incoming + anonymous argument which is on the stack. */ + if (!named) + return; + + /* Is this a potential co-processor register candidate? */ + if (pcum->pcs_variant != ARM_PCS_AAPCS) + { + int slot = aapcs_select_call_coproc (pcum, mode, type); + pcum->aapcs_cprc_slot = slot; + + /* We don't have to apply any of the rules from part B of the + preparation phase, these are handled elsewhere in the + compiler. */ + + if (slot >= 0) + { + /* A Co-processor register candidate goes either in its own + class of registers or on the stack. */ + if (!pcum->aapcs_cprc_failed[slot]) + { + /* C1.cp - Try to allocate the argument to co-processor + registers. */ + if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) + return; + + /* C2.cp - Put the argument on the stack and note that we + can't assign any more candidates in this slot. We also + need to note that we have allocated stack space, so that + we won't later try to split a non-cprc candidate between + core registers and the stack. */ + pcum->aapcs_cprc_failed[slot] = true; + pcum->can_split = false; + } + + /* We didn't get a register, so this argument goes on the + stack. */ + gcc_assert (pcum->can_split == false); + return; + } + } + + /* C3 - For double-word aligned arguments, round the NCRN up to the + next even number. */ + ncrn = pcum->aapcs_ncrn; + if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) + ncrn++; + + nregs = ARM_NUM_REGS2(mode, type); + + /* Sigh, this test should really assert that nregs > 0, but a GCC + extension allows empty structs and then gives them empty size; it + then allows such a structure to be passed by value. For some of + the code below we have to pretend that such an argument has + non-zero size so that we 'locate' it correctly either in + registers or on the stack. */ + gcc_assert (nregs >= 0); + + nregs2 = nregs ? nregs : 1; + + /* C4 - Argument fits entirely in core registers. */ + if (ncrn + nregs2 <= NUM_ARG_REGS) + { + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); + pcum->aapcs_next_ncrn = ncrn + nregs; + return; + } + + /* C5 - Some core registers left and there are no arguments already + on the stack: split this argument between the remaining core + registers and the stack. */ + if (ncrn < NUM_ARG_REGS && pcum->can_split) + { + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); + pcum->aapcs_next_ncrn = NUM_ARG_REGS; + pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; + return; + } + + /* C6 - NCRN is set to 4. */ + pcum->aapcs_next_ncrn = NUM_ARG_REGS; + + /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ + return; +} + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is NULL. */ +void +arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, + rtx libname, + tree fndecl ATTRIBUTE_UNUSED) +{ + /* Long call handling. */ + if (fntype) + pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); + else + pcum->pcs_variant = arm_pcs_default; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + if (arm_libcall_uses_aapcs_base (libname)) + pcum->pcs_variant = ARM_PCS_AAPCS; + + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_partial = 0; + pcum->aapcs_arg_processed = false; + pcum->aapcs_cprc_slot = -1; + pcum->can_split = true; + + if (pcum->pcs_variant != ARM_PCS_AAPCS) + { + int i; + + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) + { + pcum->aapcs_cprc_failed[i] = false; + aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); + } + } + return; + } + + /* Legacy ABIs */ + + /* On the ARM, the offset starts at 0. */ + pcum->nregs = 0; + pcum->iwmmxt_nregs = 0; + pcum->can_split = true; + + /* Varargs vectors are treated the same as long long. + named_count avoids having to change the way arm handles 'named' */ + pcum->named_count = 0; + pcum->nargs = 0; + + if (TARGET_REALLY_IWMMXT && fntype) + { + tree fn_arg; + + for (fn_arg = TYPE_ARG_TYPES (fntype); + fn_arg; + fn_arg = TREE_CHAIN (fn_arg)) + pcum->named_count += 1; + + if (! pcum->named_count) + pcum->named_count = INT_MAX; + } +} + +/* Return true if we use LRA instead of reload pass. */ +static bool +arm_lra_p (void) +{ + return arm_lra_flag; +} + +/* Return true if mode/type need doubleword alignment. */ +static bool +arm_needs_doubleword_align (enum machine_mode mode, const_tree type) +{ + return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY + || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); +} + + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + + On the ARM, normally the first 16 bytes are passed in registers r0-r3; all + other arguments are passed on the stack. If (NAMED == 0) (which happens + only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is + defined), say it is passed in the stack (function_prologue will + indeed make it pass in the stack if necessary). */ + +static rtx +arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + int nregs; + + /* Handle the special case quickly. Pick an arbitrary value for op2 of + a call insn (op3 of a call_value insn). */ + if (mode == VOIDmode) + return const0_rtx; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + return pcum->aapcs_reg; + } + + /* Varargs vectors are treated the same as long long. + named_count avoids having to change the way arm handles 'named' */ + if (TARGET_IWMMXT_ABI + && arm_vector_mode_supported_p (mode) + && pcum->named_count > pcum->nargs + 1) + { + if (pcum->iwmmxt_nregs <= 9) + return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM); + else + { + pcum->can_split = false; + return NULL_RTX; + } + } + + /* Put doubleword aligned quantities in even register pairs. */ + if (pcum->nregs & 1 + && ARM_DOUBLEWORD_ALIGN + && arm_needs_doubleword_align (mode, type)) + pcum->nregs++; + + /* Only allow splitting an arg between regs and memory if all preceding + args were allocated to regs. For args passed by reference we only count + the reference pointer. */ + if (pcum->can_split) + nregs = 1; + else + nregs = ARM_NUM_REGS2 (mode, type); + + if (!named || pcum->nregs + nregs > NUM_ARG_REGS) + return NULL_RTX; + + return gen_rtx_REG (mode, pcum->nregs); +} + +static unsigned int +arm_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type) + ? DOUBLEWORD_ALIGNMENT + : PARM_BOUNDARY); +} + +static int +arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode, + tree type, bool named) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + int nregs = pcum->nregs; + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + return pcum->aapcs_partial; + } + + if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) + return 0; + + if (NUM_ARG_REGS > nregs + && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type)) + && pcum->can_split) + return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; + + return 0; +} + +/* Update the data in PCUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + aapcs_layout_arg (pcum, mode, type, named); + + if (pcum->aapcs_cprc_slot >= 0) + { + aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, + type); + pcum->aapcs_cprc_slot = -1; + } + + /* Generic stuff. */ + pcum->aapcs_arg_processed = false; + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_partial = 0; + } + else + { + pcum->nargs += 1; + if (arm_vector_mode_supported_p (mode) + && pcum->named_count > pcum->nargs + && TARGET_IWMMXT_ABI) + pcum->iwmmxt_nregs += 1; + else + pcum->nregs += ARM_NUM_REGS2 (mode, type); + } +} + +/* Variable sized types are passed by reference. This is a GCC + extension to the ARM ABI. */ + +static bool +arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; +} + +/* Encode the current state of the #pragma [no_]long_calls. */ +typedef enum +{ + OFF, /* No #pragma [no_]long_calls is in effect. */ + LONG, /* #pragma long_calls is in effect. */ + SHORT /* #pragma no_long_calls is in effect. */ +} arm_pragma_enum; + +static arm_pragma_enum arm_pragma_long_calls = OFF; + +void +arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + arm_pragma_long_calls = LONG; +} + +void +arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + arm_pragma_long_calls = SHORT; +} + +void +arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + arm_pragma_long_calls = OFF; +} + +/* Handle an attribute requiring a FUNCTION_DECL; + arguments as in struct attribute_spec.handler. */ +static tree +arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle an "interrupt" or "isr" attribute; + arguments as in struct attribute_spec.handler. */ +static tree +arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, + bool *no_add_attrs) +{ + if (DECL_P (*node)) + { + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + /* FIXME: the argument if any is checked for type attributes; + should it be checked for decl ones? */ + } + else + { + if (TREE_CODE (*node) == FUNCTION_TYPE + || TREE_CODE (*node) == METHOD_TYPE) + { + if (arm_isr_value (args) == ARM_FT_UNKNOWN) + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + } + else if (TREE_CODE (*node) == POINTER_TYPE + && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE + || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE) + && arm_isr_value (args) != ARM_FT_UNKNOWN) + { + *node = build_variant_type_copy (*node); + TREE_TYPE (*node) = build_type_attribute_variant + (TREE_TYPE (*node), + tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node)))); + *no_add_attrs = true; + } + else + { + /* Possibly pass this attribute on from the type to a decl. */ + if (flags & ((int) ATTR_FLAG_DECL_NEXT + | (int) ATTR_FLAG_FUNCTION_NEXT + | (int) ATTR_FLAG_ARRAY_NEXT)) + { + *no_add_attrs = true; + return tree_cons (name, args, NULL_TREE); + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + } + } + } + + return NULL_TREE; +} + +/* Handle a "pcs" attribute; arguments as in struct + attribute_spec.handler. */ +static tree +arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +{ + if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + return NULL_TREE; +} + +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +/* Handle the "notshared" attribute. This attribute is another way of + requesting hidden visibility. ARM's compiler supports + "__declspec(notshared)"; we support the same thing via an + attribute. */ + +static tree +arm_handle_notshared_attribute (tree *node, + tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree decl = TYPE_NAME (*node); + + if (decl) + { + DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (decl) = 1; + *no_add_attrs = false; + } + return NULL_TREE; +} +#endif + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible, and 2 if they are nearly compatible (which causes a + warning to be generated). */ +static int +arm_comp_type_attributes (const_tree type1, const_tree type2) +{ + int l1, l2, s1, s2; + + /* Check for mismatch of non-default calling convention. */ + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + /* Check for mismatched call attributes. */ + l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; + s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; + s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; + + /* Only bother to check if an attribute is defined. */ + if (l1 | l2 | s1 | s2) + { + /* If one type has an attribute, the other must have the same attribute. */ + if ((l1 != l2) || (s1 != s2)) + return 0; + + /* Disallow mixed attributes. */ + if ((l1 & s2) || (l2 & s1)) + return 0; + } + + /* Check for mismatched ISR attribute. */ + l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL; + if (! l1) + l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL; + if (! l2) + l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL; + if (l1 != l2) + return 0; + + return 1; +} + +/* Assigns default attributes to newly defined type. This is used to + set short_call/long_call attributes for function types of + functions defined inside corresponding #pragma scopes. */ +static void +arm_set_default_type_attributes (tree type) +{ + /* Add __attribute__ ((long_call)) to all functions, when + inside #pragma long_calls or __attribute__ ((short_call)), + when inside #pragma no_long_calls. */ + if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) + { + tree type_attr_list, attr_name; + type_attr_list = TYPE_ATTRIBUTES (type); + + if (arm_pragma_long_calls == LONG) + attr_name = get_identifier ("long_call"); + else if (arm_pragma_long_calls == SHORT) + attr_name = get_identifier ("short_call"); + else + return; + + type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); + TYPE_ATTRIBUTES (type) = type_attr_list; + } +} + +/* Return true if DECL is known to be linked into section SECTION. */ + +static bool +arm_function_in_section_p (tree decl, section *section) +{ + /* We can only be certain about functions defined in the same + compilation unit. */ + if (!TREE_STATIC (decl)) + return false; + + /* Make sure that SYMBOL always binds to the definition in this + compilation unit. */ + if (!targetm.binds_local_p (decl)) + return false; + + /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ + if (!DECL_SECTION_NAME (decl)) + { + /* Make sure that we will not create a unique section for DECL. */ + if (flag_function_sections || DECL_ONE_ONLY (decl)) + return false; + } + + return function_section (decl) == section; +} + +/* Return nonzero if a 32-bit "long_call" should be generated for + a call from the current function to DECL. We generate a long_call + if the function: + + a. has an __attribute__((long call)) + or b. is within the scope of a #pragma long_calls + or c. the -mlong-calls command line switch has been specified + + However we do not generate a long call if the function: + + d. has an __attribute__ ((short_call)) + or e. is inside the scope of a #pragma no_long_calls + or f. is defined in the same section as the current function. */ + +bool +arm_is_long_call_p (tree decl) +{ + tree attrs; + + if (!decl) + return TARGET_LONG_CALLS; + + attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + if (lookup_attribute ("short_call", attrs)) + return false; + + /* For "f", be conservative, and only cater for cases in which the + whole of the current function is placed in the same section. */ + if (!flag_reorder_blocks_and_partition + && TREE_CODE (decl) == FUNCTION_DECL + && arm_function_in_section_p (decl, current_function_section ())) + return false; + + if (lookup_attribute ("long_call", attrs)) + return true; + + return TARGET_LONG_CALLS; +} + +/* Return nonzero if it is ok to make a tail-call to DECL. */ +static bool +arm_function_ok_for_sibcall (tree decl, tree exp) +{ + unsigned long func_type; + + if (cfun->machine->sibcall_blocked) + return false; + + /* Never tailcall something if we are generating code for Thumb-1. */ + if (TARGET_THUMB1) + return false; + + /* The PIC register is live on entry to VxWorks PLT entries, so we + must make the call before restoring the PIC register. */ + if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) + return false; + + /* Cannot tail-call to long calls, since these are out of range of + a branch instruction. */ + if (decl && arm_is_long_call_p (decl)) + return false; + + /* If we are interworking and the function is not declared static + then we can't tail-call it unless we know that it exists in this + compilation unit (since it might be a Thumb routine). */ + if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl) + && !TREE_ASM_WRITTEN (decl)) + return false; + + func_type = arm_current_func_type (); + /* Never tailcall from an ISR routine - it needs a special exit sequence. */ + if (IS_INTERRUPT (func_type)) + return false; + + if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) + { + /* Check that the return value locations are the same. For + example that we aren't returning a value from the sibling in + a VFP register but then need to transfer it to a core + register. */ + rtx a, b; + + a = arm_function_value (TREE_TYPE (exp), decl, false); + b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), + cfun->decl, false); + if (!rtx_equal_p (a, b)) + return false; + } + + /* Never tailcall if function may be called with a misaligned SP. */ + if (IS_STACKALIGN (func_type)) + return false; + + /* The AAPCS says that, on bare-metal, calls to unresolved weak + references should become a NOP. Don't convert such calls into + sibling calls. */ + if (TARGET_AAPCS_BASED + && arm_abi == ARM_ABI_AAPCS + && decl + && DECL_WEAK (decl)) + return false; + + /* Everything else is ok. */ + return true; +} + + +/* Addressing mode support functions. */ + +/* Return nonzero if X is a legitimate immediate operand when compiling + for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */ +int +legitimate_pic_operand_p (rtx x) +{ + if (GET_CODE (x) == SYMBOL_REF + || (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) + return 0; + + return 1; +} + +/* Record that the current function needs a PIC register. Initialize + cfun->machine->pic_reg if we have not already done so. */ + +static void +require_pic_register (void) +{ + /* A lot of the logic here is made obscure by the fact that this + routine gets called as part of the rtx cost estimation process. + We don't want those calls to affect any assumptions about the real + function; and further, we can't call entry_of_function() until we + start the real expansion process. */ + if (!crtl->uses_pic_offset_table) + { + gcc_assert (can_create_pseudo_p ()); + if (arm_pic_register != INVALID_REGNUM + && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM)) + { + if (!cfun->machine->pic_reg) + cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register); + + /* Play games to avoid marking the function as needing pic + if we are being called as part of the cost-estimation + process. */ + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) + crtl->uses_pic_offset_table = 1; + } + else + { + rtx seq, insn; + + if (!cfun->machine->pic_reg) + cfun->machine->pic_reg = gen_reg_rtx (Pmode); + + /* Play games to avoid marking the function as needing pic + if we are being called as part of the cost-estimation + process. */ + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) + { + crtl->uses_pic_offset_table = 1; + start_sequence (); + + if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM + && arm_pic_register > LAST_LO_REGNUM) + emit_move_insn (cfun->machine->pic_reg, + gen_rtx_REG (Pmode, arm_pic_register)); + else + arm_load_pic_register (0UL); + + seq = get_insns (); + end_sequence (); + + for (insn = seq; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + INSN_LOCATION (insn) = prologue_location; + + /* We can be called during expansion of PHI nodes, where + we can't yet emit instructions directly in the final + insn stream. Queue the insns on the entry edge, they will + be committed after everything else is expanded. */ + insert_insn_on_edge (seq, + single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + } + } + } +} + +rtx +legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +{ + if (GET_CODE (orig) == SYMBOL_REF + || GET_CODE (orig) == LABEL_REF) + { + rtx insn; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + /* VxWorks does not impose a fixed gap between segments; the run-time + gap can be different from the object-file gap. We therefore can't + use GOTOFF unless we are absolutely sure that the symbol is in the + same segment as the GOT. Unfortunately, the flexibility of linker + scripts means that we can't be sure of that in general, so assume + that GOTOFF is never valid on VxWorks. */ + if ((GET_CODE (orig) == LABEL_REF + || (GET_CODE (orig) == SYMBOL_REF && + SYMBOL_REF_LOCAL_P (orig))) + && NEED_GOT_RELOC + && arm_pic_data_is_text_relative) + insn = arm_pic_static_addr (orig, reg); + else + { + rtx pat; + rtx mem; + + /* If this function doesn't have a pic register, create one now. */ + require_pic_register (); + + pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); + + /* Make the MEM as close to a constant as possible. */ + mem = SET_SRC (pat); + gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + + insn = emit_insn (pat); + } + + /* Put a REG_EQUAL note on this insn, so that it can be optimized + by loop. */ + set_unique_reg_note (insn, REG_EQUAL, orig); + + return reg; + } + else if (GET_CODE (orig) == CONST) + { + rtx base, offset; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg) + return orig; + + /* Handle the case where we have: const (UNSPEC_TLS). */ + if (GET_CODE (XEXP (orig, 0)) == UNSPEC + && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS) + return orig; + + /* Handle the case where we have: + const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a + CONST_INT. */ + if (GET_CODE (XEXP (orig, 0)) == PLUS + && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC + && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS) + { + gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1))); + return orig; + } + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + + base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); + offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, + base == reg ? 0 : reg); + + if (CONST_INT_P (offset)) + { + /* The base register doesn't really matter, we only want to + test the index for the appropriate mode. */ + if (!arm_legitimate_index_p (mode, offset, SET, 0)) + { + gcc_assert (can_create_pseudo_p ()); + offset = force_reg (Pmode, offset); + } + + if (CONST_INT_P (offset)) + return plus_constant (Pmode, base, INTVAL (offset)); + } + + if (GET_MODE_SIZE (mode) > 4 + && (GET_MODE_CLASS (mode) == MODE_INT + || TARGET_SOFT_FLOAT)) + { + emit_insn (gen_addsi3 (reg, base, offset)); + return reg; + } + + return gen_rtx_PLUS (Pmode, base, offset); + } + + return orig; +} + + +/* Find a spare register to use during the prolog of a function. */ + +static int +thumb_find_work_register (unsigned long pushed_regs_mask) +{ + int reg; + + /* Check the argument registers first as these are call-used. The + register allocation order means that sometimes r3 might be used + but earlier argument registers might not, so check them all. */ + for (reg = LAST_ARG_REGNUM; reg >= 0; reg --) + if (!df_regs_ever_live_p (reg)) + return reg; + + /* Before going on to check the call-saved registers we can try a couple + more ways of deducing that r3 is available. The first is when we are + pushing anonymous arguments onto the stack and we have less than 4 + registers worth of fixed arguments(*). In this case r3 will be part of + the variable argument list and so we can be sure that it will be + pushed right at the start of the function. Hence it will be available + for the rest of the prologue. + (*): ie crtl->args.pretend_args_size is greater than 0. */ + if (cfun->machine->uses_anonymous_args + && crtl->args.pretend_args_size > 0) + return LAST_ARG_REGNUM; + + /* The other case is when we have fixed arguments but less than 4 registers + worth. In this case r3 might be used in the body of the function, but + it is not being used to convey an argument into the function. In theory + we could just check crtl->args.size to see how many bytes are + being passed in argument registers, but it seems that it is unreliable. + Sometimes it will have the value 0 when in fact arguments are being + passed. (See testcase execute/20021111-1.c for an example). So we also + check the args_info.nregs field as well. The problem with this field is + that it makes no allowances for arguments that are passed to the + function but which are not used. Hence we could miss an opportunity + when a function has an unused argument in r3. But it is better to be + safe than to be sorry. */ + if (! cfun->machine->uses_anonymous_args + && crtl->args.size >= 0 + && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD) + && (TARGET_AAPCS_BASED + ? crtl->args.info.aapcs_ncrn < 4 + : crtl->args.info.nregs < 4)) + return LAST_ARG_REGNUM; + + /* Otherwise look for a call-saved register that is going to be pushed. */ + for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --) + if (pushed_regs_mask & (1 << reg)) + return reg; + + if (TARGET_THUMB2) + { + /* Thumb-2 can use high regs. */ + for (reg = FIRST_HI_REGNUM; reg < 15; reg ++) + if (pushed_regs_mask & (1 << reg)) + return reg; + } + /* Something went wrong - thumb_compute_save_reg_mask() + should have arranged for a suitable register to be pushed. */ + gcc_unreachable (); +} + +static GTY(()) int pic_labelno; + +/* Generate code to load the PIC register. In thumb mode SCRATCH is a + low register. */ + +void +arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED) +{ + rtx l1, labelno, pic_tmp, pic_rtx, pic_reg; + + if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE) + return; + + gcc_assert (flag_pic); + + pic_reg = cfun->machine->pic_reg; + if (TARGET_VXWORKS_RTP) + { + pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); + pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); + emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); + + emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); + + pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); + emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); + } + else + { + /* We use an UNSPEC rather than a LABEL_REF because this label + never appears in the code stream. */ + + labelno = GEN_INT (pic_labelno++); + l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + l1 = gen_rtx_CONST (VOIDmode, l1); + + /* On the ARM the PC register contains 'dot + 8' at the time of the + addition, on the Thumb it is 'dot + 4'. */ + pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); + pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), + UNSPEC_GOTSYM_OFF); + pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); + + if (TARGET_32BIT) + { + emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); + } + else /* TARGET_THUMB1 */ + { + if (arm_pic_register != INVALID_REGNUM + && REGNO (pic_reg) > LAST_LO_REGNUM) + { + /* We will have pushed the pic register, so we should always be + able to find a work register. */ + pic_tmp = gen_rtx_REG (SImode, + thumb_find_work_register (saved_regs)); + emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx)); + emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp)); + emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); + } + else if (arm_pic_register != INVALID_REGNUM + && arm_pic_register > LAST_LO_REGNUM + && REGNO (pic_reg) <= LAST_LO_REGNUM) + { + emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); + emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg); + emit_use (gen_rtx_REG (Pmode, arm_pic_register)); + } + else + emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); + } + } + + /* Need to emit this whether or not we obey regdecls, + since setjmp/longjmp can cause life info to screw up. */ + emit_use (pic_reg); +} + +/* Generate code to load the address of a static var when flag_pic is set. */ +static rtx +arm_pic_static_addr (rtx orig, rtx reg) +{ + rtx l1, labelno, offset_rtx, insn; + + gcc_assert (flag_pic); + + /* We use an UNSPEC rather than a LABEL_REF because this label + never appears in the code stream. */ + labelno = GEN_INT (pic_labelno++); + l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + l1 = gen_rtx_CONST (VOIDmode, l1); + + /* On the ARM the PC register contains 'dot + 8' at the time of the + addition, on the Thumb it is 'dot + 4'. */ + offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); + offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx), + UNSPEC_SYMBOL_OFFSET); + offset_rtx = gen_rtx_CONST (Pmode, offset_rtx); + + insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno)); + return insn; +} + +/* Return nonzero if X is valid as an ARM state addressing register. */ +static int +arm_address_register_rtx_p (rtx x, int strict_p) +{ + int regno; + + if (!REG_P (x)) + return 0; + + regno = REGNO (x); + + if (strict_p) + return ARM_REGNO_OK_FOR_BASE_P (regno); + + return (regno <= LAST_ARM_REGNUM + || regno >= FIRST_PSEUDO_REGISTER + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM); +} + +/* Return TRUE if this rtx is the difference of a symbol and a label, + and will reduce to a PC-relative relocation in the object file. + Expressions like this can be left alone when generating PIC, rather + than forced through the GOT. */ +static int +pcrel_constant_p (rtx x) +{ + if (GET_CODE (x) == MINUS) + return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); + + return FALSE; +} + +/* Return true if X will surely end up in an index register after next + splitting pass. */ +static bool +will_be_in_index_register (const_rtx x) +{ + /* arm.md: calculate_pic_address will split this into a register. */ + return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM); +} + +/* Return nonzero if X is a valid ARM state address operand. */ +int +arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, + int strict_p) +{ + bool use_ldrd; + enum rtx_code code = GET_CODE (x); + + if (arm_address_register_rtx_p (x, strict_p)) + return 1; + + use_ldrd = (TARGET_LDRD + && (mode == DImode + || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); + + if (code == POST_INC || code == PRE_DEC + || ((code == PRE_INC || code == POST_DEC) + && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) + return arm_address_register_rtx_p (XEXP (x, 0), strict_p); + + else if ((code == POST_MODIFY || code == PRE_MODIFY) + && arm_address_register_rtx_p (XEXP (x, 0), strict_p) + && GET_CODE (XEXP (x, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) + { + rtx addend = XEXP (XEXP (x, 1), 1); + + /* Don't allow ldrd post increment by register because it's hard + to fixup invalid register choices. */ + if (use_ldrd + && GET_CODE (x) == POST_MODIFY + && REG_P (addend)) + return 0; + + return ((use_ldrd || GET_MODE_SIZE (mode) <= 4) + && arm_legitimate_index_p (mode, addend, outer, strict_p)); + } + + /* After reload constants split into minipools will have addresses + from a LABEL_REF. */ + else if (reload_completed + && (code == LABEL_REF + || (code == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) + return 1; + + else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) + return 0; + + else if (code == PLUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + return ((arm_address_register_rtx_p (xop0, strict_p) + && ((CONST_INT_P (xop1) + && arm_legitimate_index_p (mode, xop1, outer, strict_p)) + || (!strict_p && will_be_in_index_register (xop1)))) + || (arm_address_register_rtx_p (xop1, strict_p) + && arm_legitimate_index_p (mode, xop0, outer, strict_p))); + } + +#if 0 + /* Reload currently can't handle MINUS, so disable this for now */ + else if (GET_CODE (x) == MINUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + return (arm_address_register_rtx_p (xop0, strict_p) + && arm_legitimate_index_p (mode, xop1, outer, strict_p)); + } +#endif + + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && code == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) + && ! (flag_pic + && symbol_mentioned_p (get_pool_constant (x)) + && ! pcrel_constant_p (get_pool_constant (x)))) + return 1; + + return 0; +} + +/* Return nonzero if X is a valid Thumb-2 address operand. */ +static int +thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) +{ + bool use_ldrd; + enum rtx_code code = GET_CODE (x); + + if (arm_address_register_rtx_p (x, strict_p)) + return 1; + + use_ldrd = (TARGET_LDRD + && (mode == DImode + || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); + + if (code == POST_INC || code == PRE_DEC + || ((code == PRE_INC || code == POST_DEC) + && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) + return arm_address_register_rtx_p (XEXP (x, 0), strict_p); + + else if ((code == POST_MODIFY || code == PRE_MODIFY) + && arm_address_register_rtx_p (XEXP (x, 0), strict_p) + && GET_CODE (XEXP (x, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) + { + /* Thumb-2 only has autoincrement by constant. */ + rtx addend = XEXP (XEXP (x, 1), 1); + HOST_WIDE_INT offset; + + if (!CONST_INT_P (addend)) + return 0; + + offset = INTVAL(addend); + if (GET_MODE_SIZE (mode) <= 4) + return (offset > -256 && offset < 256); + + return (use_ldrd && offset > -1024 && offset < 1024 + && (offset & 3) == 0); + } + + /* After reload constants split into minipools will have addresses + from a LABEL_REF. */ + else if (reload_completed + && (code == LABEL_REF + || (code == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) + return 1; + + else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) + return 0; + + else if (code == PLUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + return ((arm_address_register_rtx_p (xop0, strict_p) + && (thumb2_legitimate_index_p (mode, xop1, strict_p) + || (!strict_p && will_be_in_index_register (xop1)))) + || (arm_address_register_rtx_p (xop1, strict_p) + && thumb2_legitimate_index_p (mode, xop0, strict_p))); + } + + /* Normally we can assign constant values to target registers without + the help of constant pool. But there are cases we have to use constant + pool like: + 1) assign a label to register. + 2) sign-extend a 8bit value to 32bit and then assign to register. + + Constant pool access in format: + (set (reg r0) (mem (symbol_ref (".LC0")))) + will cause the use of literal pool (later in function arm_reorg). + So here we mark such format as an invalid format, then the compiler + will adjust it into: + (set (reg r0) (symbol_ref (".LC0"))) + (set (reg r0) (mem (reg r0))). + No extra register is required, and (mem (reg r0)) won't cause the use + of literal pools. */ + else if (arm_disable_literal_pool && code == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x)) + return 0; + + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && code == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) + && ! (flag_pic + && symbol_mentioned_p (get_pool_constant (x)) + && ! pcrel_constant_p (get_pool_constant (x)))) + return 1; + + return 0; +} + +/* Return nonzero if INDEX is valid for an address index operand in + ARM state. */ +static int +arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, + int strict_p) +{ + HOST_WIDE_INT range; + enum rtx_code code = GET_CODE (index); + + /* Standard coprocessor addressing modes. */ + if (TARGET_HARD_FLOAT + && TARGET_VFP + && (mode == SFmode || mode == DFmode)) + return (code == CONST_INT && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + /* For quad modes, we restrict the constant offset to be slightly less + than what the instruction format permits. We do this because for + quad mode moves, we will actually decompose them into two separate + double-mode reads or writes. INDEX must therefore be a valid + (double-mode) offset and so should INDEX+8. */ + if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1016 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + /* We have no such constraint on double mode offsets, so we permit the + full range of the instruction format. */ + if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + if (arm_address_register_rtx_p (index, strict_p) + && (GET_MODE_SIZE (mode) <= 4)) + return 1; + + if (mode == DImode || mode == DFmode) + { + if (code == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (index); + + if (TARGET_LDRD) + return val > -256 && val < 256; + else + return val > -4096 && val < 4092; + } + + return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); + } + + if (GET_MODE_SIZE (mode) <= 4 + && ! (arm_arch4 + && (mode == HImode + || mode == HFmode + || (mode == QImode && outer == SIGN_EXTEND)))) + { + if (code == MULT) + { + rtx xiop0 = XEXP (index, 0); + rtx xiop1 = XEXP (index, 1); + + return ((arm_address_register_rtx_p (xiop0, strict_p) + && power_of_two_operand (xiop1, SImode)) + || (arm_address_register_rtx_p (xiop1, strict_p) + && power_of_two_operand (xiop0, SImode))); + } + else if (code == LSHIFTRT || code == ASHIFTRT + || code == ASHIFT || code == ROTATERT) + { + rtx op = XEXP (index, 1); + + return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) + && CONST_INT_P (op) + && INTVAL (op) > 0 + && INTVAL (op) <= 31); + } + } + + /* For ARM v4 we may be doing a sign-extend operation during the + load. */ + if (arm_arch4) + { + if (mode == HImode + || mode == HFmode + || (outer == SIGN_EXTEND && mode == QImode)) + range = 256; + else + range = 4096; + } + else + range = (mode == HImode || mode == HFmode) ? 4095 : 4096; + + return (code == CONST_INT + && INTVAL (index) < range + && INTVAL (index) > -range); +} + +/* Return true if OP is a valid index scaling factor for Thumb-2 address + index operand. i.e. 1, 2, 4 or 8. */ +static bool +thumb2_index_mul_operand (rtx op) +{ + HOST_WIDE_INT val; + + if (!CONST_INT_P (op)) + return false; + + val = INTVAL(op); + return (val == 1 || val == 2 || val == 4 || val == 8); +} + +/* Return nonzero if INDEX is a valid Thumb-2 address index operand. */ +static int +thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p) +{ + enum rtx_code code = GET_CODE (index); + + /* ??? Combine arm and thumb2 coprocessor addressing modes. */ + /* Standard coprocessor addressing modes. */ + if (TARGET_HARD_FLOAT + && TARGET_VFP + && (mode == SFmode || mode == DFmode)) + return (code == CONST_INT && INTVAL (index) < 1024 + /* Thumb-2 allows only > -256 index range for it's core register + load/stores. Since we allow SF/DF in core registers, we have + to use the intersection between -256~4096 (core) and -1024~1024 + (coprocessor). */ + && INTVAL (index) > -256 + && (INTVAL (index) & 3) == 0); + + if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) + { + /* For DImode assume values will usually live in core regs + and only allow LDRD addressing modes. */ + if (!TARGET_LDRD || mode != DImode) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + } + + /* For quad modes, we restrict the constant offset to be slightly less + than what the instruction format permits. We do this because for + quad mode moves, we will actually decompose them into two separate + double-mode reads or writes. INDEX must therefore be a valid + (double-mode) offset and so should INDEX+8. */ + if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1016 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + /* We have no such constraint on double mode offsets, so we permit the + full range of the instruction format. */ + if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) + return (code == CONST_INT + && INTVAL (index) < 1024 + && INTVAL (index) > -1024 + && (INTVAL (index) & 3) == 0); + + if (arm_address_register_rtx_p (index, strict_p) + && (GET_MODE_SIZE (mode) <= 4)) + return 1; + + if (mode == DImode || mode == DFmode) + { + if (code == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (index); + /* ??? Can we assume ldrd for thumb2? */ + /* Thumb-2 ldrd only has reg+const addressing modes. */ + /* ldrd supports offsets of +-1020. + However the ldr fallback does not. */ + return val > -256 && val < 256 && (val & 3) == 0; + } + else + return 0; + } + + if (code == MULT) + { + rtx xiop0 = XEXP (index, 0); + rtx xiop1 = XEXP (index, 1); + + return ((arm_address_register_rtx_p (xiop0, strict_p) + && thumb2_index_mul_operand (xiop1)) + || (arm_address_register_rtx_p (xiop1, strict_p) + && thumb2_index_mul_operand (xiop0))); + } + else if (code == ASHIFT) + { + rtx op = XEXP (index, 1); + + return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) + && CONST_INT_P (op) + && INTVAL (op) > 0 + && INTVAL (op) <= 3); + } + + return (code == CONST_INT + && INTVAL (index) < 4096 + && INTVAL (index) > -256); +} + +/* Return nonzero if X is valid as a 16-bit Thumb state base register. */ +static int +thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p) +{ + int regno; + + if (!REG_P (x)) + return 0; + + regno = REGNO (x); + + if (strict_p) + return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode); + + return (regno <= LAST_LO_REGNUM + || regno > LAST_VIRTUAL_REGISTER + || regno == FRAME_POINTER_REGNUM + || (GET_MODE_SIZE (mode) >= 4 + && (regno == STACK_POINTER_REGNUM + || regno >= FIRST_PSEUDO_REGISTER + || x == hard_frame_pointer_rtx + || x == arg_pointer_rtx))); +} + +/* Return nonzero if x is a legitimate index register. This is the case + for any base register that can access a QImode object. */ +inline static int +thumb1_index_register_rtx_p (rtx x, int strict_p) +{ + return thumb1_base_register_rtx_p (x, QImode, strict_p); +} + +/* Return nonzero if x is a legitimate 16-bit Thumb-state address. + + The AP may be eliminated to either the SP or the FP, so we use the + least common denominator, e.g. SImode, and offsets from 0 to 64. + + ??? Verify whether the above is the right approach. + + ??? Also, the FP may be eliminated to the SP, so perhaps that + needs special handling also. + + ??? Look at how the mips16 port solves this problem. It probably uses + better ways to solve some of these problems. + + Although it is not incorrect, we don't accept QImode and HImode + addresses based on the frame pointer or arg pointer until the + reload pass starts. This is so that eliminating such addresses + into stack based ones won't produce impossible code. */ +int +thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) +{ + /* ??? Not clear if this is right. Experiment. */ + if (GET_MODE_SIZE (mode) < 4 + && !(reload_in_progress || reload_completed) + && (reg_mentioned_p (frame_pointer_rtx, x) + || reg_mentioned_p (arg_pointer_rtx, x) + || reg_mentioned_p (virtual_incoming_args_rtx, x) + || reg_mentioned_p (virtual_outgoing_args_rtx, x) + || reg_mentioned_p (virtual_stack_dynamic_rtx, x) + || reg_mentioned_p (virtual_stack_vars_rtx, x))) + return 0; + + /* Accept any base register. SP only in SImode or larger. */ + else if (thumb1_base_register_rtx_p (x, mode, strict_p)) + return 1; + + /* This is PC relative data before arm_reorg runs. */ + else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) + && GET_CODE (x) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic) + return 1; + + /* This is PC relative data after arm_reorg runs. */ + else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) + && reload_completed + && (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) + return 1; + + /* Post-inc indexing only supported for SImode and larger. */ + else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4 + && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)) + return 1; + + else if (GET_CODE (x) == PLUS) + { + /* REG+REG address can be any two index registers. */ + /* We disallow FRAME+REG addressing since we know that FRAME + will be replaced with STACK, and SP relative addressing only + permits SP+OFFSET. */ + if (GET_MODE_SIZE (mode) <= 4 + && XEXP (x, 0) != frame_pointer_rtx + && XEXP (x, 1) != frame_pointer_rtx + && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) + && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) + || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) + return 1; + + /* REG+const has 5-7 bit offset for non-SP registers. */ + else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) + || XEXP (x, 0) == arg_pointer_rtx) + && CONST_INT_P (XEXP (x, 1)) + && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) + return 1; + + /* REG+const has 10-bit offset for SP, but only SImode and + larger is supported. */ + /* ??? Should probably check for DI/DFmode overflow here + just like GO_IF_LEGITIMATE_OFFSET does. */ + else if (REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM + && GET_MODE_SIZE (mode) >= 4 + && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) >= 0 + && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024 + && (INTVAL (XEXP (x, 1)) & 3) == 0) + return 1; + + else if (REG_P (XEXP (x, 0)) + && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM + || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM + || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER + && REGNO (XEXP (x, 0)) + <= LAST_VIRTUAL_POINTER_REGISTER)) + && GET_MODE_SIZE (mode) >= 4 + && CONST_INT_P (XEXP (x, 1)) + && (INTVAL (XEXP (x, 1)) & 3) == 0) + return 1; + } + + else if (GET_MODE_CLASS (mode) != MODE_FLOAT + && GET_MODE_SIZE (mode) == 4 + && GET_CODE (x) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (x) + && ! (flag_pic + && symbol_mentioned_p (get_pool_constant (x)) + && ! pcrel_constant_p (get_pool_constant (x)))) + return 1; + + return 0; +} + +/* Return nonzero if VAL can be used as an offset in a Thumb-state address + instruction of mode MODE. */ +int +thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val) +{ + switch (GET_MODE_SIZE (mode)) + { + case 1: + return val >= 0 && val < 32; + + case 2: + return val >= 0 && val < 64 && (val & 1) == 0; + + default: + return (val >= 0 + && (val + GET_MODE_SIZE (mode)) <= 128 + && (val & 3) == 0); + } +} + +bool +arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p) +{ + if (TARGET_ARM) + return arm_legitimate_address_outer_p (mode, x, SET, strict_p); + else if (TARGET_THUMB2) + return thumb2_legitimate_address_p (mode, x, strict_p); + else /* if (TARGET_THUMB1) */ + return thumb1_legitimate_address_p (mode, x, strict_p); +} + +/* Worker function for TARGET_PREFERRED_RELOAD_CLASS. + + Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS, but for the Thumb core registers and + immediate constants we prefer a LO_REGS class or a subset. */ + +static reg_class_t +arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) +{ + if (TARGET_32BIT) + return rclass; + else + { + if (rclass == GENERAL_REGS) + return LO_REGS; + else + return rclass; + } +} + +/* Build the SYMBOL_REF for __tls_get_addr. */ + +static GTY(()) rtx tls_get_addr_libfunc; + +static rtx +get_tls_get_addr (void) +{ + if (!tls_get_addr_libfunc) + tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); + return tls_get_addr_libfunc; +} + +rtx +arm_load_tp (rtx target) +{ + if (!target) + target = gen_reg_rtx (SImode); + + if (TARGET_HARD_TP) + { + /* Can return in any reg. */ + emit_insn (gen_load_tp_hard (target)); + } + else + { + /* Always returned in r0. Immediately copy the result into a pseudo, + otherwise other uses of r0 (e.g. setting up function arguments) may + clobber the value. */ + + rtx tmp; + + emit_insn (gen_load_tp_soft ()); + + tmp = gen_rtx_REG (SImode, 0); + emit_move_insn (target, tmp); + } + return target; +} + +static rtx +load_tls_operand (rtx x, rtx reg) +{ + rtx tmp; + + if (reg == NULL_RTX) + reg = gen_reg_rtx (SImode); + + tmp = gen_rtx_CONST (SImode, x); + + emit_move_insn (reg, tmp); + + return reg; +} + +static rtx +arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc) +{ + rtx insns, label, labelno, sum; + + gcc_assert (reloc != TLS_DESCSEQ); + start_sequence (); + + labelno = GEN_INT (pic_labelno++); + label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + label = gen_rtx_CONST (VOIDmode, label); + + sum = gen_rtx_UNSPEC (Pmode, + gen_rtvec (4, x, GEN_INT (reloc), label, + GEN_INT (TARGET_ARM ? 8 : 4)), + UNSPEC_TLS); + reg = load_tls_operand (sum, reg); + + if (TARGET_ARM) + emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); + else + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + + *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, + LCT_PURE, /* LCT_CONST? */ + Pmode, 1, reg, Pmode); + + insns = get_insns (); + end_sequence (); + + return insns; +} + +static rtx +arm_tls_descseq_addr (rtx x, rtx reg) +{ + rtx labelno = GEN_INT (pic_labelno++); + rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + rtx sum = gen_rtx_UNSPEC (Pmode, + gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ), + gen_rtx_CONST (VOIDmode, label), + GEN_INT (!TARGET_ARM)), + UNSPEC_TLS); + rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0)); + + emit_insn (gen_tlscall (x, labelno)); + if (!reg) + reg = gen_reg_rtx (SImode); + else + gcc_assert (REGNO (reg) != 0); + + emit_move_insn (reg, reg0); + + return reg; +} + +rtx +legitimize_tls_address (rtx x, rtx reg) +{ + rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend; + unsigned int model = SYMBOL_REF_TLS_MODEL (x); + + switch (model) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + if (TARGET_GNU2_TLS) + { + reg = arm_tls_descseq_addr (x, reg); + + tp = arm_load_tp (NULL_RTX); + + dest = gen_rtx_PLUS (Pmode, tp, reg); + } + else + { + /* Original scheme */ + insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32); + dest = gen_reg_rtx (Pmode); + emit_libcall_block (insns, dest, ret, x); + } + return dest; + + case TLS_MODEL_LOCAL_DYNAMIC: + if (TARGET_GNU2_TLS) + { + reg = arm_tls_descseq_addr (x, reg); + + tp = arm_load_tp (NULL_RTX); + + dest = gen_rtx_PLUS (Pmode, tp, reg); + } + else + { + insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32); + + /* Attach a unique REG_EQUIV, to allow the RTL optimizers to + share the LDM result with other LD model accesses. */ + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), + UNSPEC_TLS); + dest = gen_reg_rtx (Pmode); + emit_libcall_block (insns, dest, ret, eqv); + + /* Load the addend. */ + addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, + GEN_INT (TLS_LDO32)), + UNSPEC_TLS); + addend = force_reg (SImode, gen_rtx_CONST (SImode, addend)); + dest = gen_rtx_PLUS (Pmode, dest, addend); + } + return dest; + + case TLS_MODEL_INITIAL_EXEC: + labelno = GEN_INT (pic_labelno++); + label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + label = gen_rtx_CONST (VOIDmode, label); + sum = gen_rtx_UNSPEC (Pmode, + gen_rtvec (4, x, GEN_INT (TLS_IE32), label, + GEN_INT (TARGET_ARM ? 8 : 4)), + UNSPEC_TLS); + reg = load_tls_operand (sum, reg); + + if (TARGET_ARM) + emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); + else if (TARGET_THUMB2) + emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); + else + { + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); + emit_move_insn (reg, gen_const_mem (SImode, reg)); + } + + tp = arm_load_tp (NULL_RTX); + + return gen_rtx_PLUS (Pmode, tp, reg); + + case TLS_MODEL_LOCAL_EXEC: + tp = arm_load_tp (NULL_RTX); + + reg = gen_rtx_UNSPEC (Pmode, + gen_rtvec (2, x, GEN_INT (TLS_LE32)), + UNSPEC_TLS); + reg = force_reg (SImode, gen_rtx_CONST (SImode, reg)); + + return gen_rtx_PLUS (Pmode, tp, reg); + + default: + abort (); + } +} + +/* Try machine-dependent ways of modifying an illegitimate address + to be legitimate. If we find one, return the new, valid address. */ +rtx +arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) +{ + if (arm_tls_referenced_p (x)) + { + rtx addend = NULL; + + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + { + addend = XEXP (XEXP (x, 0), 1); + x = XEXP (XEXP (x, 0), 0); + } + + if (GET_CODE (x) != SYMBOL_REF) + return x; + + gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0); + + x = legitimize_tls_address (x, NULL_RTX); + + if (addend) + { + x = gen_rtx_PLUS (SImode, x, addend); + orig_x = x; + } + else + return x; + } + + if (!TARGET_ARM) + { + /* TODO: legitimize_address for Thumb2. */ + if (TARGET_THUMB2) + return x; + return thumb_legitimize_address (x, orig_x, mode); + } + + if (GET_CODE (x) == PLUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0)) + xop0 = force_reg (SImode, xop0); + + if (CONSTANT_P (xop1) && !CONST_INT_P (xop1) + && !symbol_mentioned_p (xop1)) + xop1 = force_reg (SImode, xop1); + + if (ARM_BASE_REGISTER_RTX_P (xop0) + && CONST_INT_P (xop1)) + { + HOST_WIDE_INT n, low_n; + rtx base_reg, val; + n = INTVAL (xop1); + + /* VFP addressing modes actually allow greater offsets, but for + now we just stick with the lowest common denominator. */ + if (mode == DImode + || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode)) + { + low_n = n & 0x0f; + n &= ~0x0f; + if (low_n > 4) + { + n += 16; + low_n -= 16; + } + } + else + { + low_n = ((mode) == TImode ? 0 + : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff)); + n -= low_n; + } + + base_reg = gen_reg_rtx (SImode); + val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX); + emit_move_insn (base_reg, val); + x = plus_constant (Pmode, base_reg, low_n); + } + else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) + x = gen_rtx_PLUS (SImode, xop0, xop1); + } + + /* XXX We don't allow MINUS any more -- see comment in + arm_legitimate_address_outer_p (). */ + else if (GET_CODE (x) == MINUS) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + + if (CONSTANT_P (xop0)) + xop0 = force_reg (SImode, xop0); + + if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1)) + xop1 = force_reg (SImode, xop1); + + if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) + x = gen_rtx_MINUS (SImode, xop0, xop1); + } + + /* Make sure to take full advantage of the pre-indexed addressing mode + with absolute addresses which often allows for the base register to + be factorized for multiple adjacent memory references, and it might + even allows for the mini pool to be avoided entirely. */ + else if (CONST_INT_P (x) && optimize > 0) + { + unsigned int bits; + HOST_WIDE_INT mask, base, index; + rtx base_reg; + + /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only + use a 8-bit index. So let's use a 12-bit index for SImode only and + hope that arm_gen_constant will enable ldrb to use more bits. */ + bits = (mode == SImode) ? 12 : 8; + mask = (1 << bits) - 1; + base = INTVAL (x) & ~mask; + index = INTVAL (x) & mask; + if (bit_count (base & 0xffffffff) > (32 - bits)/2) + { + /* It'll most probably be more efficient to generate the base + with more bits set and use a negative index instead. */ + base |= mask; + index -= mask; + } + base_reg = force_reg (SImode, GEN_INT (base)); + x = plus_constant (Pmode, base_reg, index); + } + + if (flag_pic) + { + /* We need to find and carefully transform any SYMBOL and LABEL + references; so go back to the original address expression. */ + rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); + + if (new_x != orig_x) + x = new_x; + } + + return x; +} + + +/* Try machine-dependent ways of modifying an illegitimate Thumb address + to be legitimate. If we find one, return the new, valid address. */ +rtx +thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) +{ + if (GET_CODE (x) == PLUS + && CONST_INT_P (XEXP (x, 1)) + && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode) + || INTVAL (XEXP (x, 1)) < 0)) + { + rtx xop0 = XEXP (x, 0); + rtx xop1 = XEXP (x, 1); + HOST_WIDE_INT offset = INTVAL (xop1); + + /* Try and fold the offset into a biasing of the base register and + then offsetting that. Don't do this when optimizing for space + since it can cause too many CSEs. */ + if (optimize_size && offset >= 0 + && offset < 256 + 31 * GET_MODE_SIZE (mode)) + { + HOST_WIDE_INT delta; + + if (offset >= 256) + delta = offset - (256 - GET_MODE_SIZE (mode)); + else if (offset < 32 * GET_MODE_SIZE (mode) + 8) + delta = 31 * GET_MODE_SIZE (mode); + else + delta = offset & (~31 * GET_MODE_SIZE (mode)); + + xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta), + NULL_RTX); + x = plus_constant (Pmode, xop0, delta); + } + else if (offset < 0 && offset > -256) + /* Small negative offsets are best done with a subtract before the + dereference, forcing these into a register normally takes two + instructions. */ + x = force_operand (x, NULL_RTX); + else + { + /* For the remaining cases, force the constant into a register. */ + xop1 = force_reg (SImode, xop1); + x = gen_rtx_PLUS (SImode, xop0, xop1); + } + } + else if (GET_CODE (x) == PLUS + && s_register_operand (XEXP (x, 1), SImode) + && !s_register_operand (XEXP (x, 0), SImode)) + { + rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX); + + x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1)); + } + + if (flag_pic) + { + /* We need to find and carefully transform any SYMBOL and LABEL + references; so go back to the original address expression. */ + rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); + + if (new_x != orig_x) + x = new_x; + } + + return x; +} + +bool +arm_legitimize_reload_address (rtx *p, + enum machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + /* We must recognize output that we have already generated ourselves. */ + if (GET_CODE (*p) == PLUS + && GET_CODE (XEXP (*p, 0)) == PLUS + && REG_P (XEXP (XEXP (*p, 0), 0)) + && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) + && CONST_INT_P (XEXP (*p, 1))) + { + push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, + MODE_BASE_REG_CLASS (mode), GET_MODE (*p), + VOIDmode, 0, 0, opnum, (enum reload_type) type); + return true; + } + + if (GET_CODE (*p) == PLUS + && REG_P (XEXP (*p, 0)) + && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) + /* If the base register is equivalent to a constant, let the generic + code handle it. Otherwise we will run into problems if a future + reload pass decides to rematerialize the constant. */ + && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0))) + && CONST_INT_P (XEXP (*p, 1))) + { + HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); + HOST_WIDE_INT low, high; + + /* Detect coprocessor load/stores. */ + bool coproc_p = ((TARGET_HARD_FLOAT + && TARGET_VFP + && (mode == SFmode || mode == DFmode)) + || (TARGET_REALLY_IWMMXT + && VALID_IWMMXT_REG_MODE (mode)) + || (TARGET_NEON + && (VALID_NEON_DREG_MODE (mode) + || VALID_NEON_QREG_MODE (mode)))); + + /* For some conditions, bail out when lower two bits are unaligned. */ + if ((val & 0x3) != 0 + /* Coprocessor load/store indexes are 8-bits + '00' appended. */ + && (coproc_p + /* For DI, and DF under soft-float: */ + || ((mode == DImode || mode == DFmode) + /* Without ldrd, we use stm/ldm, which does not + fair well with unaligned bits. */ + && (! TARGET_LDRD + /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */ + || TARGET_THUMB2)))) + return false; + + /* When breaking down a [reg+index] reload address into [(reg+high)+low], + of which the (reg+high) gets turned into a reload add insn, + we try to decompose the index into high/low values that can often + also lead to better reload CSE. + For example: + ldr r0, [r2, #4100] // Offset too large + ldr r1, [r2, #4104] // Offset too large + + is best reloaded as: + add t1, r2, #4096 + ldr r0, [t1, #4] + add t2, r2, #4096 + ldr r1, [t2, #8] + + which post-reload CSE can simplify in most cases to eliminate the + second add instruction: + add t1, r2, #4096 + ldr r0, [t1, #4] + ldr r1, [t1, #8] + + The idea here is that we want to split out the bits of the constant + as a mask, rather than as subtracting the maximum offset that the + respective type of load/store used can handle. + + When encountering negative offsets, we can still utilize it even if + the overall offset is positive; sometimes this may lead to an immediate + that can be constructed with fewer instructions. + For example: + ldr r0, [r2, #0x3FFFFC] + + This is best reloaded as: + add t1, r2, #0x400000 + ldr r0, [t1, #-4] + + The trick for spotting this for a load insn with N bits of offset + (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a + negative offset that is going to make bit N and all the bits below + it become zero in the remainder part. + + The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect + to sign-magnitude addressing (i.e. separate +- bit, or 1's complement), + used in most cases of ARM load/store instructions. */ + +#define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \ + (((VAL) & ((1 << (N)) - 1)) \ + ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \ + : 0) + + if (coproc_p) + { + low = SIGN_MAG_LOW_ADDR_BITS (val, 10); + + /* NEON quad-word load/stores are made of two double-word accesses, + so the valid index range is reduced by 8. Treat as 9-bit range if + we go over it. */ + if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016) + low = SIGN_MAG_LOW_ADDR_BITS (val, 9); + } + else if (GET_MODE_SIZE (mode) == 8) + { + if (TARGET_LDRD) + low = (TARGET_THUMB2 + ? SIGN_MAG_LOW_ADDR_BITS (val, 10) + : SIGN_MAG_LOW_ADDR_BITS (val, 8)); + else + /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib) + to access doublewords. The supported load/store offsets are + -8, -4, and 4, which we try to produce here. */ + low = ((val & 0xf) ^ 0x8) - 0x8; + } + else if (GET_MODE_SIZE (mode) < 8) + { + /* NEON element load/stores do not have an offset. */ + if (TARGET_NEON_FP16 && mode == HFmode) + return false; + + if (TARGET_THUMB2) + { + /* Thumb-2 has an asymmetrical index range of (-256,4096). + Try the wider 12-bit range first, and re-try if the result + is out of range. */ + low = SIGN_MAG_LOW_ADDR_BITS (val, 12); + if (low < -255) + low = SIGN_MAG_LOW_ADDR_BITS (val, 8); + } + else + { + if (mode == HImode || mode == HFmode) + { + if (arm_arch4) + low = SIGN_MAG_LOW_ADDR_BITS (val, 8); + else + { + /* The storehi/movhi_bytes fallbacks can use only + [-4094,+4094] of the full ldrb/strb index range. */ + low = SIGN_MAG_LOW_ADDR_BITS (val, 12); + if (low == 4095 || low == -4095) + return false; + } + } + else + low = SIGN_MAG_LOW_ADDR_BITS (val, 12); + } + } + else + return false; + + high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff) + ^ (unsigned HOST_WIDE_INT) 0x80000000) + - (unsigned HOST_WIDE_INT) 0x80000000); + /* Check for overflow or zero */ + if (low == 0 || high == 0 || (high + low != val)) + return false; + + /* Reload the high part into a base reg; leave the low part + in the mem. + Note that replacing this gen_rtx_PLUS with plus_constant is + wrong in this case because we rely on the + (plus (plus reg c1) c2) structure being preserved so that + XEXP (*p, 0) in push_reload below uses the correct term. */ + *p = gen_rtx_PLUS (GET_MODE (*p), + gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0), + GEN_INT (high)), + GEN_INT (low)); + push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, + MODE_BASE_REG_CLASS (mode), GET_MODE (*p), + VOIDmode, 0, 0, opnum, (enum reload_type) type); + return true; + } + + return false; +} + +rtx +thumb_legitimize_reload_address (rtx *x_p, + enum machine_mode mode, + int opnum, int type, + int ind_levels ATTRIBUTE_UNUSED) +{ + rtx x = *x_p; + + if (GET_CODE (x) == PLUS + && GET_MODE_SIZE (mode) < 4 + && REG_P (XEXP (x, 0)) + && XEXP (x, 0) == stack_pointer_rtx + && CONST_INT_P (XEXP (x, 1)) + && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) + { + rtx orig_x = x; + + x = copy_rtx (x); + push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), + Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); + return x; + } + + /* If both registers are hi-regs, then it's better to reload the + entire expression rather than each register individually. That + only requires one reload register rather than two. */ + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && REG_P (XEXP (x, 1)) + && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode) + && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode)) + { + rtx orig_x = x; + + x = copy_rtx (x); + push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), + Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); + return x; + } + + return NULL; +} + +/* Test for various thread-local symbols. */ + +/* Helper for arm_tls_referenced_p. */ + +static int +arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*x) == SYMBOL_REF) + return SYMBOL_REF_TLS_MODEL (*x) != 0; + + /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are + TLS offsets, not real symbol references. */ + if (GET_CODE (*x) == UNSPEC + && XINT (*x, 1) == UNSPEC_TLS) + return -1; + + return 0; +} + +/* Return TRUE if X contains any TLS symbol references. */ + +bool +arm_tls_referenced_p (rtx x) +{ + if (! TARGET_HAVE_TLS) + return false; + + return for_each_rtx (&x, arm_tls_operand_p_1, NULL); +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P. + + On the ARM, allow any integer (invalid ones are removed later by insn + patterns), nice doubles and symbol_refs which refer to the function's + constant pool XXX. + + When generating pic allow anything. */ + +static bool +arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x) +{ + /* At present, we have no support for Neon structure constants, so forbid + them here. It might be possible to handle simple cases like 0 and -1 + in future. */ + if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)) + return false; + + return flag_pic || !label_mentioned_p (x); +} + +static bool +thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + return (CONST_INT_P (x) + || CONST_DOUBLE_P (x) + || CONSTANT_ADDRESS_P (x) + || flag_pic); +} + +static bool +arm_legitimate_constant_p (enum machine_mode mode, rtx x) +{ + return (!arm_cannot_force_const_mem (mode, x) + && (TARGET_32BIT + ? arm_legitimate_constant_p_1 (mode, x) + : thumb_legitimate_constant_p (mode, x))); +} + +/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ + +static bool +arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + rtx base, offset; + + if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + { + split_const (x, &base, &offset); + if (GET_CODE (base) == SYMBOL_REF + && !offset_within_block_p (base, INTVAL (offset))) + return true; + } + return arm_tls_referenced_p (x); +} + +#define REG_OR_SUBREG_REG(X) \ + (REG_P (X) \ + || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X)))) + +#define REG_OR_SUBREG_RTX(X) \ + (REG_P (X) ? (X) : SUBREG_REG (X)) + +static inline int +thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) +{ + enum machine_mode mode = GET_MODE (x); + int total, words; + + switch (code) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); + + case PLUS: + case MINUS: + case COMPARE: + case NEG: + case NOT: + return COSTS_N_INSNS (1); + + case MULT: + if (CONST_INT_P (XEXP (x, 1))) + { + int cycles = 0; + unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); + + while (i) + { + i >>= 2; + cycles++; + } + return COSTS_N_INSNS (2) + cycles; + } + return COSTS_N_INSNS (1) + 16; + + case SET: + /* A SET doesn't have a mode, so let's look at the SET_DEST to get + the mode. */ + words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); + return (COSTS_N_INSNS (words) + + 4 * ((MEM_P (SET_SRC (x))) + + MEM_P (SET_DEST (x)))); + + case CONST_INT: + if (outer == SET) + { + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) + return 0; + if (thumb_shiftable_const (INTVAL (x))) + return COSTS_N_INSNS (2); + return COSTS_N_INSNS (3); + } + else if ((outer == PLUS || outer == COMPARE) + && INTVAL (x) < 256 && INTVAL (x) > -256) + return 0; + else if ((outer == IOR || outer == XOR || outer == AND) + && INTVAL (x) < 256 && INTVAL (x) >= -256) + return COSTS_N_INSNS (1); + else if (outer == AND) + { + int i; + /* This duplicates the tests in the andsi3 expander. */ + for (i = 9; i <= 31; i++) + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) + return COSTS_N_INSNS (2); + } + else if (outer == ASHIFT || outer == ASHIFTRT + || outer == LSHIFTRT) + return 0; + return COSTS_N_INSNS (2); + + case CONST: + case CONST_DOUBLE: + case LABEL_REF: + case SYMBOL_REF: + return COSTS_N_INSNS (3); + + case UDIV: + case UMOD: + case DIV: + case MOD: + return 100; + + case TRUNCATE: + return 99; + + case AND: + case XOR: + case IOR: + /* XXX guess. */ + return 8; + + case MEM: + /* XXX another guess. */ + /* Memory costs quite a lot for the first word, but subsequent words + load at the equivalent of a single insn each. */ + return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + ? 4 : 0)); + + case IF_THEN_ELSE: + /* XXX a guess. */ + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + return 14; + return 2; + + case SIGN_EXTEND: + case ZERO_EXTEND: + total = mode == DImode ? COSTS_N_INSNS (1) : 0; + total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code); + + if (mode == SImode) + return total; + + if (arm_arch6) + return total + COSTS_N_INSNS (1); + + /* Assume a two-shift sequence. Increase the cost slightly so + we prefer actual shifts over an extend operation. */ + return total + 1 + COSTS_N_INSNS (2); + + default: + return 99; + } +} + +static inline bool +arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + enum rtx_code subcode; + rtx operand; + enum rtx_code code = GET_CODE (x); + *total = 0; + + switch (code) + { + case MEM: + /* Memory costs quite a lot for the first word, but subsequent words + load at the equivalent of a single insn each. */ + *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); + return true; + + case DIV: + case MOD: + case UDIV: + case UMOD: + if (TARGET_HARD_FLOAT && mode == SFmode) + *total = COSTS_N_INSNS (2); + else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) + *total = COSTS_N_INSNS (4); + else + *total = COSTS_N_INSNS (20); + return false; + + case ROTATE: + if (REG_P (XEXP (x, 1))) + *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */ + else if (!CONST_INT_P (XEXP (x, 1))) + *total = rtx_cost (XEXP (x, 1), code, 1, speed); + + /* Fall through */ + case ROTATERT: + if (mode != SImode) + { + *total += COSTS_N_INSNS (4); + return true; + } + + /* Fall through */ + case ASHIFT: case LSHIFTRT: case ASHIFTRT: + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + if (mode == DImode) + { + *total += COSTS_N_INSNS (3); + return true; + } + + *total += COSTS_N_INSNS (1); + /* Increase the cost of complex shifts because they aren't any faster, + and reduce dual issue opportunities. */ + if (arm_tune_cortex_a9 + && outer != SET && !CONST_INT_P (XEXP (x, 1))) + ++*total; + + return true; + + case MINUS: + if (mode == DImode) + { + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + if (CONST_INT_P (XEXP (x, 0)) + && const_ok_for_arm (INTVAL (XEXP (x, 0)))) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + return true; + } + + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_arm (INTVAL (XEXP (x, 1)))) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + if (CONST_DOUBLE_P (XEXP (x, 0)) + && arm_const_double_rtx (XEXP (x, 0))) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + return true; + } + + if (CONST_DOUBLE_P (XEXP (x, 1)) + && arm_const_double_rtx (XEXP (x, 1))) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + + return false; + } + *total = COSTS_N_INSNS (20); + return false; + } + + *total = COSTS_N_INSNS (1); + if (CONST_INT_P (XEXP (x, 0)) + && const_ok_for_arm (INTVAL (XEXP (x, 0)))) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + return true; + } + + subcode = GET_CODE (XEXP (x, 1)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed); + return true; + } + + /* A shift as a part of RSB costs no more than RSB itself. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed); + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + return true; + } + + if (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed); + return true; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed); + if (REG_P (XEXP (XEXP (x, 1), 0)) + && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM) + *total += COSTS_N_INSNS (1); + + return true; + } + + /* Fall through */ + + case PLUS: + if (code == PLUS && arm_arch6 && mode == SImode + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (1); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)), + 0, speed); + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + return true; + } + + /* MLA: All arguments must be registers. We filter out + multiplication by a power of two, so that we fall down into + the code below. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + /* The cost comes from the cost of the multiply. */ + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + if (CONST_DOUBLE_P (XEXP (x, 1)) + && arm_const_double_rtx (XEXP (x, 1))) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + + return false; + } + + *total = COSTS_N_INSNS (20); + return false; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed); + if (REG_P (XEXP (XEXP (x, 0), 0)) + && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM) + *total += COSTS_N_INSNS (1); + return true; + } + + /* Fall through */ + + case AND: case XOR: case IOR: + + /* Normally the frame registers will be spilt into reg+const during + reload, so it is a bad idea to combine them with other instructions, + since then they might not be moved outside of loops. As a compromise + we allow integration with ops that have a constant as their second + operand. */ + if (REG_OR_SUBREG_REG (XEXP (x, 0)) + && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) + && !CONST_INT_P (XEXP (x, 1))) + *total = COSTS_N_INSNS (1); + + if (mode == DImode) + { + *total += COSTS_N_INSNS (2); + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + + return false; + } + + *total += COSTS_N_INSNS (1); + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); + return true; + } + + if (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); + return true; + } + + if (subcode == UMIN || subcode == UMAX + || subcode == SMIN || subcode == SMAX) + { + *total = COSTS_N_INSNS (3); + return true; + } + + return false; + + case MULT: + /* This should have been handled by the CPU specific routines. */ + gcc_unreachable (); + + case TRUNCATE: + if (arm_arch3m && mode == SImode + && GET_CODE (XEXP (x, 0)) == LSHIFTRT + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) + == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))) + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND + || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND)) + { + *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed); + return true; + } + *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */ + return false; + + case NEG: + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + *total = COSTS_N_INSNS (2); + return false; + } + + /* Fall through */ + case NOT: + *total = COSTS_N_INSNS (ARM_NUM_REGS(mode)); + if (mode == SImode && code == NOT) + { + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT + || (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))) + { + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); + /* Register shifts cost an extra cycle. */ + if (!CONST_INT_P (XEXP (XEXP (x, 0), 1))) + *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1), + subcode, 1, speed); + return true; + } + } + + return false; + + case IF_THEN_ELSE: + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + { + *total = COSTS_N_INSNS (4); + return true; + } + + operand = XEXP (x, 0); + + if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE) + && REG_P (XEXP (operand, 0)) + && REGNO (XEXP (operand, 0)) == CC_REGNUM)) + *total += COSTS_N_INSNS (1); + *total += (rtx_cost (XEXP (x, 1), code, 1, speed) + + rtx_cost (XEXP (x, 2), code, 2, speed)); + return true; + + case NE: + if (mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + goto scc_insn; + + case GE: + if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM) + && mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + goto scc_insn; + + case LT: + if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM) + && mode == SImode && XEXP (x, 1) == const0_rtx) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + goto scc_insn; + + case EQ: + case GT: + case LE: + case GEU: + case LTU: + case GTU: + case LEU: + case UNORDERED: + case ORDERED: + case UNEQ: + case UNGE: + case UNLT: + case UNGT: + case UNLE: + scc_insn: + /* SCC insns. In the case where the comparison has already been + performed, then they cost 2 instructions. Otherwise they need + an additional comparison before them. */ + *total = COSTS_N_INSNS (2); + if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM) + { + return true; + } + + /* Fall through */ + case COMPARE: + if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM) + { + *total = 0; + return true; + } + + *total += COSTS_N_INSNS (1); + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) + { + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + + subcode = GET_CODE (XEXP (x, 0)); + if (subcode == ASHIFT || subcode == ASHIFTRT + || subcode == LSHIFTRT + || subcode == ROTATE || subcode == ROTATERT) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); + return true; + } + + if (subcode == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); + return true; + } + + return false; + + case UMIN: + case UMAX: + case SMIN: + case SMAX: + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed); + if (!CONST_INT_P (XEXP (x, 1)) + || !const_ok_for_arm (INTVAL (XEXP (x, 1)))) + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + return true; + + case ABS: + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + *total = COSTS_N_INSNS (20); + return false; + } + *total = COSTS_N_INSNS (1); + if (mode == DImode) + *total += COSTS_N_INSNS (3); + return false; + + case SIGN_EXTEND: + case ZERO_EXTEND: + *total = 0; + if (GET_MODE_CLASS (mode) == MODE_INT) + { + rtx op = XEXP (x, 0); + enum machine_mode opmode = GET_MODE (op); + + if (mode == DImode) + *total += COSTS_N_INSNS (1); + + if (opmode != SImode) + { + if (MEM_P (op)) + { + /* If !arm_arch4, we use one of the extendhisi2_mem + or movhi_bytes patterns for HImode. For a QImode + sign extension, we first zero-extend from memory + and then perform a shift sequence. */ + if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND)) + *total += COSTS_N_INSNS (2); + } + else if (arm_arch6) + *total += COSTS_N_INSNS (1); + + /* We don't have the necessary insn, so we need to perform some + other operation. */ + else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode) + /* An and with constant 255. */ + *total += COSTS_N_INSNS (1); + else + /* A shift sequence. Increase costs slightly to avoid + combining two shifts into an extend operation. */ + *total += COSTS_N_INSNS (2) + 1; + } + + return false; + } + + switch (GET_MODE (XEXP (x, 0))) + { + case V8QImode: + case V4HImode: + case V2SImode: + case V4QImode: + case V2HImode: + *total = COSTS_N_INSNS (1); + return false; + + default: + gcc_unreachable (); + } + gcc_unreachable (); + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + + case CONST_INT: + if (const_ok_for_arm (INTVAL (x)) + || const_ok_for_arm (~INTVAL (x))) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX, + INTVAL (x), NULL_RTX, + NULL_RTX, 0, 0)); + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (3); + return true; + + case HIGH: + *total = COSTS_N_INSNS (1); + return true; + + case LO_SUM: + *total = COSTS_N_INSNS (1); + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + + case CONST_DOUBLE: + if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) + && (mode == SFmode || !TARGET_VFP_SINGLE)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; + + case SET: + /* The vec_extract patterns accept memory operands that require an + address reload. Account for the cost of that reload to give the + auto-inc-dec pass an incentive to try to replace them. */ + if (TARGET_NEON && MEM_P (SET_DEST (x)) + && GET_CODE (SET_SRC (x)) == VEC_SELECT) + { + *total = rtx_cost (SET_DEST (x), code, 0, speed); + if (!neon_vector_mem_operand (SET_DEST (x), 2, true)) + *total += COSTS_N_INSNS (1); + return true; + } + /* Likewise for the vec_set patterns. */ + if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE + && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE + && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0))) + { + rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0); + *total = rtx_cost (mem, code, 0, speed); + if (!neon_vector_mem_operand (mem, 2, true)) + *total += COSTS_N_INSNS (1); + return true; + } + return false; + + case UNSPEC: + /* We cost this as high as our memory costs to allow this to + be hoisted from loops. */ + if (XINT (x, 1) == UNSPEC_PIC_UNIFIED) + { + *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); + } + return true; + + case CONST_VECTOR: + if (TARGET_NEON + && TARGET_HARD_FLOAT + && outer == SET + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && neon_immediate_valid_for_move (x, mode, NULL, NULL)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; + + default: + *total = COSTS_N_INSNS (4); + return false; + } +} + +/* Estimates the size cost of thumb1 instructions. + For now most of the code is copied from thumb1_rtx_costs. We need more + fine grain tuning when we have more related test cases. */ +static inline int +thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) +{ + enum machine_mode mode = GET_MODE (x); + int words; + + switch (code) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); + + case PLUS: + case MINUS: + /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1 + defined by RTL expansion, especially for the expansion of + multiplication. */ + if ((GET_CODE (XEXP (x, 0)) == MULT + && power_of_two_operand (XEXP (XEXP (x,0),1), SImode)) + || (GET_CODE (XEXP (x, 1)) == MULT + && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))) + return COSTS_N_INSNS (2); + /* On purpose fall through for normal RTX. */ + case COMPARE: + case NEG: + case NOT: + return COSTS_N_INSNS (1); + + case MULT: + if (CONST_INT_P (XEXP (x, 1))) + { + /* Thumb1 mul instruction can't operate on const. We must Load it + into a register first. */ + int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); + return COSTS_N_INSNS (1) + const_size; + } + return COSTS_N_INSNS (1); + + case SET: + /* A SET doesn't have a mode, so let's look at the SET_DEST to get + the mode. */ + words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); + return (COSTS_N_INSNS (words) + + 4 * ((MEM_P (SET_SRC (x))) + + MEM_P (SET_DEST (x)))); + + case CONST_INT: + if (outer == SET) + { + if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) + return COSTS_N_INSNS (1); + /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ + if (INTVAL (x) >= -255 && INTVAL (x) <= -1) + return COSTS_N_INSNS (2); + /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ + if (thumb_shiftable_const (INTVAL (x))) + return COSTS_N_INSNS (2); + return COSTS_N_INSNS (3); + } + else if ((outer == PLUS || outer == COMPARE) + && INTVAL (x) < 256 && INTVAL (x) > -256) + return 0; + else if ((outer == IOR || outer == XOR || outer == AND) + && INTVAL (x) < 256 && INTVAL (x) >= -256) + return COSTS_N_INSNS (1); + else if (outer == AND) + { + int i; + /* This duplicates the tests in the andsi3 expander. */ + for (i = 9; i <= 31; i++) + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) + return COSTS_N_INSNS (2); + } + else if (outer == ASHIFT || outer == ASHIFTRT + || outer == LSHIFTRT) + return 0; + return COSTS_N_INSNS (2); + + case CONST: + case CONST_DOUBLE: + case LABEL_REF: + case SYMBOL_REF: + return COSTS_N_INSNS (3); + + case UDIV: + case UMOD: + case DIV: + case MOD: + return 100; + + case TRUNCATE: + return 99; + + case AND: + case XOR: + case IOR: + /* XXX guess. */ + return 8; + + case MEM: + /* XXX another guess. */ + /* Memory costs quite a lot for the first word, but subsequent words + load at the equivalent of a single insn each. */ + return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + ? 4 : 0)); + + case IF_THEN_ELSE: + /* XXX a guess. */ + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + return 14; + return 2; + + case ZERO_EXTEND: + /* XXX still guessing. */ + switch (GET_MODE (XEXP (x, 0))) + { + case QImode: + return (1 + (mode == DImode ? 4 : 0) + + (MEM_P (XEXP (x, 0)) ? 10 : 0)); + + case HImode: + return (4 + (mode == DImode ? 4 : 0) + + (MEM_P (XEXP (x, 0)) ? 10 : 0)); + + case SImode: + return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0)); + + default: + return 99; + } + + default: + return 99; + } +} + +/* RTX costs when optimizing for size. */ +static bool +arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total) +{ + enum machine_mode mode = GET_MODE (x); + if (TARGET_THUMB1) + { + *total = thumb1_size_rtx_costs (x, code, outer_code); + return true; + } + + /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ + switch (code) + { + case MEM: + /* A memory access costs 1 insn if the mode is small, or the address is + a single register, otherwise it costs one insn per word. */ + if (REG_P (XEXP (x, 0))) + *total = COSTS_N_INSNS (1); + else if (flag_pic + && GET_CODE (XEXP (x, 0)) == PLUS + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) + /* This will be split into two instructions. + See arm.md:calculate_pic_address. */ + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return true; + + case DIV: + case MOD: + case UDIV: + case UMOD: + /* Needs a libcall, so it costs about this. */ + *total = COSTS_N_INSNS (2); + return false; + + case ROTATE: + if (mode == SImode && REG_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false); + return true; + } + /* Fall through */ + case ROTATERT: + case ASHIFT: + case LSHIFTRT: + case ASHIFTRT: + if (mode == DImode && CONST_INT_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false); + return true; + } + else if (mode == SImode) + { + *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false); + /* Slightly disparage register shifts, but not by much. */ + if (!CONST_INT_P (XEXP (x, 1))) + *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false); + return true; + } + + /* Needs a libcall. */ + *total = COSTS_N_INSNS (2); + return false; + + case MINUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; + } + + if (mode == SImode) + { + enum rtx_code subcode0 = GET_CODE (XEXP (x, 0)); + enum rtx_code subcode1 = GET_CODE (XEXP (x, 1)); + + if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT + || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT + || subcode1 == ROTATE || subcode1 == ROTATERT + || subcode1 == ASHIFT || subcode1 == LSHIFTRT + || subcode1 == ASHIFTRT) + { + /* It's just the cost of the two operands. */ + *total = 0; + return false; + } + + *total = COSTS_N_INSNS (1); + return false; + } + + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return false; + + case PLUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; + } + + /* A shift as a part of ADD costs nothing. */ + if (GET_CODE (XEXP (x, 0)) == MULT + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) + { + *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1); + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false); + *total += rtx_cost (XEXP (x, 1), code, 1, false); + return true; + } + + /* Fall through */ + case AND: case XOR: case IOR: + if (mode == SImode) + { + enum rtx_code subcode = GET_CODE (XEXP (x, 0)); + + if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT + || subcode == LSHIFTRT || subcode == ASHIFTRT + || (code == AND && subcode == NOT)) + { + /* It's just the cost of the two operands. */ + *total = 0; + return false; + } + } + + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return false; + + case MULT: + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + return false; + + case NEG: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *total = COSTS_N_INSNS (1); + return false; + } + + /* Fall through */ + case NOT: + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + + return false; + + case IF_THEN_ELSE: + *total = 0; + return false; + + case COMPARE: + if (cc_register (XEXP (x, 0), VOIDmode)) + * total = 0; + else + *total = COSTS_N_INSNS (1); + return false; + + case ABS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); + return false; + + case SIGN_EXTEND: + case ZERO_EXTEND: + return arm_rtx_costs_1 (x, outer_code, total, 0); + + case CONST_INT: + if (const_ok_for_arm (INTVAL (x))) + /* A multiplication by a constant requires another instruction + to load the constant to a register. */ + *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT) + ? 1 : 0); + else if (const_ok_for_arm (~INTVAL (x))) + *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1); + else if (const_ok_for_arm (-INTVAL (x))) + { + if (outer_code == COMPARE || outer_code == PLUS + || outer_code == MINUS) + *total = 0; + else + *total = COSTS_N_INSNS (1); + } + else + *total = COSTS_N_INSNS (2); + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS (2); + return true; + + case CONST_DOUBLE: + *total = COSTS_N_INSNS (4); + return true; + + case CONST_VECTOR: + if (TARGET_NEON + && TARGET_HARD_FLOAT + && outer_code == SET + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && neon_immediate_valid_for_move (x, mode, NULL, NULL)) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + return true; + + case HIGH: + case LO_SUM: + /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the + cost of these slightly. */ + *total = COSTS_N_INSNS (1) + 1; + return true; + + case SET: + return false; + + default: + if (mode != VOIDmode) + *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + else + *total = COSTS_N_INSNS (4); /* How knows? */ + return false; + } +} + +/* Helper function for arm_rtx_costs. If the operand is a valid shift + operand, then return the operand that is being shifted. If the shift + is not by a constant, then set SHIFT_REG to point to the operand. + Return NULL if OP is not a shifter operand. */ +static rtx +shifter_op_p (rtx op, rtx *shift_reg) +{ + enum rtx_code code = GET_CODE (op); + + if (code == MULT && CONST_INT_P (XEXP (op, 1)) + && exact_log2 (INTVAL (XEXP (op, 1))) > 0) + return XEXP (op, 0); + else if (code == ROTATE && CONST_INT_P (XEXP (op, 1))) + return XEXP (op, 0); + else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT + || code == ASHIFTRT) + { + if (!CONST_INT_P (XEXP (op, 1))) + *shift_reg = XEXP (op, 1); + return XEXP (op, 0); + } + + return NULL; +} + +static bool +arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) +{ + const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost; + gcc_assert (GET_CODE (x) == UNSPEC); + + switch (XINT (x, 1)) + { + case UNSPEC_UNALIGNED_LOAD: + /* We can only do unaligned loads into the integer unit, and we can't + use LDM or LDRD. */ + *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); + if (speed_p) + *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load + + extra_cost->ldst.load_unaligned); + +#ifdef NOT_YET + *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), + ADDR_SPACE_GENERIC, speed_p); +#endif + return true; + + case UNSPEC_UNALIGNED_STORE: + *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); + if (speed_p) + *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store + + extra_cost->ldst.store_unaligned); + + *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p); +#ifdef NOT_YET + *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), + ADDR_SPACE_GENERIC, speed_p); +#endif + return true; + + case UNSPEC_VRINTZ: + case UNSPEC_VRINTP: + case UNSPEC_VRINTM: + case UNSPEC_VRINTR: + case UNSPEC_VRINTX: + case UNSPEC_VRINTA: + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint; + + return true; + default: + *cost = COSTS_N_INSNS (2); + break; + } + return false; +} + +/* Cost of a libcall. We assume one insn per argument, an amount for the + call (one insn for -Os) and then one for processing the result. */ +#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2)) + +#define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \ + do \ + { \ + shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \ + if (shift_op != NULL \ + && arm_rtx_shift_left_p (XEXP (x, IDX))) \ + { \ + if (shift_reg) \ + { \ + if (speed_p) \ + *cost += extra_cost->alu.arith_shift_reg; \ + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \ + } \ + else if (speed_p) \ + *cost += extra_cost->alu.arith_shift; \ + \ + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \ + + rtx_cost (XEXP (x, 1 - IDX), \ + OP, 1, speed_p)); \ + return true; \ + } \ + } \ + while (0); + +/* RTX costs. Make an estimate of the cost of executing the operation + X, which is contained with an operation with code OUTER_CODE. + SPEED_P indicates whether the cost desired is the performance cost, + or the size cost. The estimate is stored in COST and the return + value is TRUE if the cost calculation is final, or FALSE if the + caller should recurse through the operands of X to add additional + costs. + + We currently make no attempt to model the size savings of Thumb-2 + 16-bit instructions. At the normal points in compilation where + this code is called we have no measure of whether the condition + flags are live or not, and thus no realistic way to determine what + the size will eventually be. */ +static bool +arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + const struct cpu_cost_table *extra_cost, + int *cost, bool speed_p) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB1) + { + if (speed_p) + *cost = thumb1_rtx_costs (x, code, outer_code); + else + *cost = thumb1_size_rtx_costs (x, code, outer_code); + return true; + } + + switch (code) + { + case SET: + *cost = 0; + /* SET RTXs don't have a mode so we get it from the destination. */ + mode = GET_MODE (SET_DEST (x)); + + if (REG_P (SET_SRC (x)) + && REG_P (SET_DEST (x))) + { + /* Assume that most copies can be done with a single insn, + unless we don't have HW FP, in which case everything + larger than word mode will require two insns. */ + *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT + && GET_MODE_SIZE (mode) > 4) + || mode == DImode) + ? 2 : 1); + /* Conditional register moves can be encoded + in 16 bits in Thumb mode. */ + if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC) + *cost >>= 1; + + return true; + } + + if (CONST_INT_P (SET_SRC (x))) + { + /* Handle CONST_INT here, since the value doesn't have a mode + and we would otherwise be unable to work out the true cost. */ + *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p); + outer_code = SET; + /* Slightly lower the cost of setting a core reg to a constant. + This helps break up chains and allows for better scheduling. */ + if (REG_P (SET_DEST (x)) + && REGNO (SET_DEST (x)) <= LR_REGNUM) + *cost -= 1; + x = SET_SRC (x); + /* Immediate moves with an immediate in the range [0, 255] can be + encoded in 16 bits in Thumb mode. */ + if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode + && INTVAL (x) >= 0 && INTVAL (x) <=255) + *cost >>= 1; + goto const_int_cost; + } + + return false; + + case MEM: + /* A memory access costs 1 insn if the mode is small, or the address is + a single register, otherwise it costs one insn per word. */ + if (REG_P (XEXP (x, 0))) + *cost = COSTS_N_INSNS (1); + else if (flag_pic + && GET_CODE (XEXP (x, 0)) == PLUS + && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) + /* This will be split into two instructions. + See arm.md:calculate_pic_address. */ + *cost = COSTS_N_INSNS (2); + else + *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + + /* For speed optimizations, add the costs of the address and + accessing memory. */ + if (speed_p) +#ifdef NOT_YET + *cost += (extra_cost->ldst.load + + arm_address_cost (XEXP (x, 0), mode, + ADDR_SPACE_GENERIC, speed_p)); +#else + *cost += extra_cost->ldst.load; +#endif + return true; + + case PARALLEL: + { + /* Calculations of LDM costs are complex. We assume an initial cost + (ldm_1st) which will load the number of registers mentioned in + ldm_regs_per_insn_1st registers; then each additional + ldm_regs_per_insn_subsequent registers cost one more insn. The + formula for N regs is thus: + + ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0) + + ldm_regs_per_insn_subsequent - 1) + / ldm_regs_per_insn_subsequent). + + Additional costs may also be added for addressing. A similar + formula is used for STM. */ + + bool is_ldm = load_multiple_operation (x, SImode); + bool is_stm = store_multiple_operation (x, SImode); + + *cost = COSTS_N_INSNS (1); + + if (is_ldm || is_stm) + { + if (speed_p) + { + HOST_WIDE_INT nregs = XVECLEN (x, 0); + HOST_WIDE_INT regs_per_insn_1st = is_ldm + ? extra_cost->ldst.ldm_regs_per_insn_1st + : extra_cost->ldst.stm_regs_per_insn_1st; + HOST_WIDE_INT regs_per_insn_sub = is_ldm + ? extra_cost->ldst.ldm_regs_per_insn_subsequent + : extra_cost->ldst.stm_regs_per_insn_subsequent; + + *cost += regs_per_insn_1st + + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0)) + + regs_per_insn_sub - 1) + / regs_per_insn_sub); + return true; + } + + } + return false; + } + case DIV: + case UDIV: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + *cost = COSTS_N_INSNS (speed_p + ? extra_cost->fp[mode != SFmode].div : 1); + else if (mode == SImode && TARGET_IDIV) + *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1); + else + *cost = LIBCALL_COST (2); + return false; /* All arguments must be in registers. */ + + case MOD: + case UMOD: + *cost = LIBCALL_COST (2); + return false; /* All arguments must be in registers. */ + + case ROTATE: + if (mode == SImode && REG_P (XEXP (x, 1))) + { + *cost = (COSTS_N_INSNS (2) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + if (speed_p) + *cost += extra_cost->alu.shift_reg; + return true; + } + /* Fall through */ + case ROTATERT: + case ASHIFT: + case LSHIFTRT: + case ASHIFTRT: + if (mode == DImode && CONST_INT_P (XEXP (x, 1))) + { + *cost = (COSTS_N_INSNS (3) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + return true; + } + else if (mode == SImode) + { + *cost = (COSTS_N_INSNS (1) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + /* Slightly disparage register shifts at -Os, but not by much. */ + if (!CONST_INT_P (XEXP (x, 1))) + *cost += (speed_p ? extra_cost->alu.shift_reg : 1 + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + return true; + } + else if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + if (code == ASHIFT) + { + *cost = (COSTS_N_INSNS (1) + + rtx_cost (XEXP (x, 0), code, 0, speed_p)); + /* Slightly disparage register shifts at -Os, but not by + much. */ + if (!CONST_INT_P (XEXP (x, 1))) + *cost += (speed_p ? extra_cost->alu.shift_reg : 1 + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + } + else if (code == LSHIFTRT || code == ASHIFTRT) + { + if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1))) + { + /* Can use SBFX/UBFX. */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.bfx; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + } + else + { + *cost = COSTS_N_INSNS (2); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + { + if (CONST_INT_P (XEXP (x, 1))) + *cost += 2 * extra_cost->alu.shift; + else + *cost += (extra_cost->alu.shift + + extra_cost->alu.shift_reg); + } + else + /* Slightly disparage register shifts. */ + *cost += !CONST_INT_P (XEXP (x, 1)); + } + } + else /* Rotates. */ + { + *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1))); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + { + if (CONST_INT_P (XEXP (x, 1))) + *cost += (2 * extra_cost->alu.shift + + extra_cost->alu.log_shift); + else + *cost += (extra_cost->alu.shift + + extra_cost->alu.shift_reg + + extra_cost->alu.log_shift_reg); + } + } + return true; + } + + *cost = LIBCALL_COST (2); + return false; + + case MINUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == MULT + || GET_CODE (XEXP (x, 1)) == MULT) + { + rtx mul_op0, mul_op1, sub_op; + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].mult_addsub; + + if (GET_CODE (XEXP (x, 0)) == MULT) + { + mul_op0 = XEXP (XEXP (x, 0), 0); + mul_op1 = XEXP (XEXP (x, 0), 1); + sub_op = XEXP (x, 1); + } + else + { + mul_op0 = XEXP (XEXP (x, 1), 0); + mul_op1 = XEXP (XEXP (x, 1), 1); + sub_op = XEXP (x, 0); + } + + /* The first operand of the multiply may be optionally + negated. */ + if (GET_CODE (mul_op0) == NEG) + mul_op0 = XEXP (mul_op0, 0); + + *cost += (rtx_cost (mul_op0, code, 0, speed_p) + + rtx_cost (mul_op1, code, 0, speed_p) + + rtx_cost (sub_op, code, 0, speed_p)); + + return true; + } + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].addsub; + return false; + } + + if (mode == SImode) + { + rtx shift_by_reg = NULL; + rtx shift_op; + rtx non_shift_op; + + *cost = COSTS_N_INSNS (1); + + shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg); + if (shift_op == NULL) + { + shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg); + non_shift_op = XEXP (x, 0); + } + else + non_shift_op = XEXP (x, 1); + + if (shift_op != NULL) + { + if (shift_by_reg != NULL) + { + if (speed_p) + *cost += extra_cost->alu.arith_shift_reg; + *cost += rtx_cost (shift_by_reg, code, 0, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.arith_shift; + + *cost += (rtx_cost (shift_op, code, 0, speed_p) + + rtx_cost (non_shift_op, code, 0, speed_p)); + return true; + } + + if (arm_arch_thumb2 + && GET_CODE (XEXP (x, 1)) == MULT) + { + /* MLS. */ + if (speed_p) + *cost += extra_cost->mult[0].add; + *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p)); + return true; + } + + if (CONST_INT_P (XEXP (x, 0))) + { + int insns = arm_gen_constant (MINUS, SImode, NULL_RTX, + INTVAL (XEXP (x, 0)), NULL_RTX, + NULL_RTX, 1, 0); + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.arith; + *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); + return true; + } + + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + rtx shift_op, shift_reg; + shift_reg = NULL; + + /* We check both sides of the MINUS for shifter operands since, + unlike PLUS, it's not commutative. */ + + HANDLE_NARROW_SHIFT_ARITH (MINUS, 0) + HANDLE_NARROW_SHIFT_ARITH (MINUS, 1) + + /* Slightly disparage, as we might need to widen the result. */ + *cost = 1 + COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + + if (CONST_INT_P (XEXP (x, 0))) + { + *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); + return true; + } + + return false; + } + + if (mode == DImode) + { + *cost = COSTS_N_INSNS (2); + + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) + { + rtx op1 = XEXP (x, 1); + + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + + if (GET_CODE (op1) == ZERO_EXTEND) + *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p); + else + *cost += rtx_cost (op1, MINUS, 1, speed_p); + *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, + 0, speed_p); + return true; + } + else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + { + if (speed_p) + *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift; + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND, + 0, speed_p) + + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p)); + return true; + } + else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND) + { + if (speed_p) + *cost += (extra_cost->alu.arith + + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND + ? extra_cost->alu.arith + : extra_cost->alu.arith_shift)); + *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 0), + GET_CODE (XEXP (x, 1)), 0, speed_p)); + return true; + } + + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + /* Vector mode? */ + + *cost = LIBCALL_COST (2); + return false; + + case PLUS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 0)) == MULT) + { + rtx mul_op0, mul_op1, add_op; + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].mult_addsub; + + mul_op0 = XEXP (XEXP (x, 0), 0); + mul_op1 = XEXP (XEXP (x, 0), 1); + add_op = XEXP (x, 1); + + *cost += (rtx_cost (mul_op0, code, 0, speed_p) + + rtx_cost (mul_op1, code, 0, speed_p) + + rtx_cost (add_op, code, 0, speed_p)); + + return true; + } + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].addsub; + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (2); + return false; + } + + /* Narrow modes can be synthesized in SImode, but the range + of useful sub-operations is limited. Check for shift operations + on one of the operands. Only left shifts can be used in the + narrow modes. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + rtx shift_op, shift_reg; + shift_reg = NULL; + + HANDLE_NARROW_SHIFT_ARITH (PLUS, 0) + + if (CONST_INT_P (XEXP (x, 1))) + { + int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, + INTVAL (XEXP (x, 1)), NULL_RTX, + NULL_RTX, 1, 0); + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.arith; + /* Slightly penalize a narrow operation as the result may + need widening. */ + *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); + return true; + } + + /* Slightly penalize a narrow operation as the result may + need widening. */ + *cost = 1 + COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + + return false; + } + + if (mode == SImode) + { + rtx shift_op, shift_reg; + + *cost = COSTS_N_INSNS (1); + if (TARGET_INT_SIMD + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + /* UXTA[BH] or SXTA[BH]. */ + if (speed_p) + *cost += extra_cost->alu.extend_arith; + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0, + speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p)); + return true; + } + + shift_reg = NULL; + shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); + if (shift_op != NULL) + { + if (shift_reg) + { + if (speed_p) + *cost += extra_cost->alu.arith_shift_reg; + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.arith_shift; + + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + if (GET_CODE (XEXP (x, 0)) == MULT) + { + rtx mul_op = XEXP (x, 0); + + *cost = COSTS_N_INSNS (1); + + if (TARGET_DSP_MULTIPLY + && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND + && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) + && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16))) + || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1)) + && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16 + && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) + && (INTVAL (XEXP (XEXP (mul_op, 1), 1)) + == 16)))))) + { + /* SMLA[BT][BT]. */ + if (speed_p) + *cost += extra_cost->mult[0].extend_add; + *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), + SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (XEXP (mul_op, 1), 0), + SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + + if (speed_p) + *cost += extra_cost->mult[0].add; + *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p) + + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + if (CONST_INT_P (XEXP (x, 1))) + { + int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, + INTVAL (XEXP (x, 1)), NULL_RTX, + NULL_RTX, 1, 0); + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.arith; + *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); + return true; + } + return false; + } + + if (mode == DImode) + { + if (arm_arch3m + && GET_CODE (XEXP (x, 0)) == MULT + && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->mult[1].extend_add; + *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), + ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), + ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + + *cost = COSTS_N_INSNS (2); + + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) + { + if (speed_p) + *cost += (extra_cost->alu.arith + + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + ? extra_cost->alu.arith + : extra_cost->alu.arith_shift)); + + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0, + speed_p) + + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); + return true; + } + + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + /* Vector mode? */ + *cost = LIBCALL_COST (2); + return false; + + case AND: case XOR: case IOR: + if (mode == SImode) + { + enum rtx_code subcode = GET_CODE (XEXP (x, 0)); + rtx op0 = XEXP (x, 0); + rtx shift_op, shift_reg; + + *cost = COSTS_N_INSNS (1); + + if (subcode == NOT + && (code == AND + || (code == IOR && TARGET_THUMB2))) + op0 = XEXP (op0, 0); + + shift_reg = NULL; + shift_op = shifter_op_p (op0, &shift_reg); + if (shift_op != NULL) + { + if (shift_reg) + { + if (speed_p) + *cost += extra_cost->alu.log_shift_reg; + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.log_shift; + + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + return true; + } + + if (CONST_INT_P (XEXP (x, 1))) + { + int insns = arm_gen_constant (code, SImode, NULL_RTX, + INTVAL (XEXP (x, 1)), NULL_RTX, + NULL_RTX, 1, 0); + + *cost = COSTS_N_INSNS (insns); + if (speed_p) + *cost += insns * extra_cost->alu.logical; + *cost += rtx_cost (op0, code, 0, speed_p); + return true; + } + + if (speed_p) + *cost += extra_cost->alu.logical; + *cost += (rtx_cost (op0, code, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 1, speed_p)); + return true; + } + + if (mode == DImode) + { + rtx op0 = XEXP (x, 0); + enum rtx_code subcode = GET_CODE (op0); + + *cost = COSTS_N_INSNS (2); + + if (subcode == NOT + && (code == AND + || (code == IOR && TARGET_THUMB2))) + op0 = XEXP (op0, 0); + + if (GET_CODE (op0) == ZERO_EXTEND) + { + if (speed_p) + *cost += 2 * extra_cost->alu.logical; + + *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 0, speed_p)); + return true; + } + else if (GET_CODE (op0) == SIGN_EXTEND) + { + if (speed_p) + *cost += extra_cost->alu.logical + extra_cost->alu.log_shift; + + *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), code, 0, speed_p)); + return true; + } + + if (speed_p) + *cost += 2 * extra_cost->alu.logical; + + return true; + } + /* Vector mode? */ + + *cost = LIBCALL_COST (2); + return false; + + case MULT: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + rtx op0 = XEXP (x, 0); + + *cost = COSTS_N_INSNS (1); + + if (GET_CODE (op0) == NEG) + op0 = XEXP (op0, 0); + + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].mult; + + *cost += (rtx_cost (op0, MULT, 0, speed_p) + + rtx_cost (XEXP (x, 1), MULT, 1, speed_p)); + return true; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (2); + return false; + } + + if (mode == SImode) + { + *cost = COSTS_N_INSNS (1); + if (TARGET_DSP_MULTIPLY + && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (x, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && INTVAL (XEXP (XEXP (x, 1), 1)) == 16))) + || (GET_CODE (XEXP (x, 0)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && INTVAL (XEXP (XEXP (x, 0), 1)) == 16 + && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND + || (GET_CODE (XEXP (x, 1)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 1), 1)) + && (INTVAL (XEXP (XEXP (x, 1), 1)) + == 16)))))) + { + /* SMUL[TB][TB]. */ + if (speed_p) + *cost += extra_cost->mult[0].extend; + *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p) + + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p)); + return true; + } + if (speed_p) + *cost += extra_cost->mult[0].simple; + return false; + } + + if (mode == DImode) + { + if (arm_arch3m + && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->mult[1].extend; + *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), + ZERO_EXTEND, 0, speed_p) + + rtx_cost (XEXP (XEXP (x, 1), 0), + ZERO_EXTEND, 0, speed_p)); + return true; + } + + *cost = LIBCALL_COST (2); + return false; + } + + /* Vector mode? */ + *cost = LIBCALL_COST (2); + return false; + + case NEG: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].neg; + + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (1); + return false; + } + + if (mode == SImode) + { + if (GET_CODE (XEXP (x, 0)) == ABS) + { + *cost = COSTS_N_INSNS (2); + /* Assume the non-flag-changing variant. */ + if (speed_p) + *cost += (extra_cost->alu.log_shift + + extra_cost->alu.arith_shift); + *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p); + return true; + } + + if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) + { + *cost = COSTS_N_INSNS (2); + /* No extra cost for MOV imm and MVN imm. */ + /* If the comparison op is using the flags, there's no further + cost, otherwise we need to add the cost of the comparison. */ + if (!(REG_P (XEXP (XEXP (x, 0), 0)) + && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM + && XEXP (XEXP (x, 0), 1) == const0_rtx)) + { + *cost += (COSTS_N_INSNS (1) + + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0, + speed_p) + + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1, + speed_p)); + if (speed_p) + *cost += extra_cost->alu.arith; + } + return true; + } + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + { + /* Slightly disparage, as we might need an extend operation. */ + *cost = 1 + COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + return false; + } + + if (mode == DImode) + { + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + /* Vector mode? */ + *cost = LIBCALL_COST (1); + return false; + + case NOT: + if (mode == SImode) + { + rtx shift_op; + rtx shift_reg = NULL; + + *cost = COSTS_N_INSNS (1); + shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); + + if (shift_op) + { + if (shift_reg != NULL) + { + if (speed_p) + *cost += extra_cost->alu.log_shift_reg; + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + } + else if (speed_p) + *cost += extra_cost->alu.log_shift; + *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p); + return true; + } + + if (speed_p) + *cost += extra_cost->alu.logical; + return false; + } + if (mode == DImode) + { + *cost = COSTS_N_INSNS (2); + return false; + } + + /* Vector mode? */ + + *cost += LIBCALL_COST (1); + return false; + + case IF_THEN_ELSE: + { + if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) + { + *cost = COSTS_N_INSNS (4); + return true; + } + int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p); + int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p); + + *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p); + /* Assume that if one arm of the if_then_else is a register, + that it will be tied with the result and eliminate the + conditional insn. */ + if (REG_P (XEXP (x, 1))) + *cost += op2cost; + else if (REG_P (XEXP (x, 2))) + *cost += op1cost; + else + { + if (speed_p) + { + if (extra_cost->alu.non_exec_costs_exec) + *cost += op1cost + op2cost + extra_cost->alu.non_exec; + else + *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec; + } + else + *cost += op1cost + op2cost; + } + } + return true; + + case COMPARE: + if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx) + *cost = 0; + else + { + enum machine_mode op0mode; + /* We'll mostly assume that the cost of a compare is the cost of the + LHS. However, there are some notable exceptions. */ + + /* Floating point compares are never done as side-effects. */ + op0mode = GET_MODE (XEXP (x, 0)); + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT + && (op0mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[op0mode != SFmode].compare; + + if (XEXP (x, 1) == CONST0_RTX (op0mode)) + { + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + + return false; + } + else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (2); + return false; + } + + /* DImode compares normally take two insns. */ + if (op0mode == DImode) + { + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += 2 * extra_cost->alu.arith; + return false; + } + + if (op0mode == SImode) + { + rtx shift_op; + rtx shift_reg; + + if (XEXP (x, 1) == const0_rtx + && !(REG_P (XEXP (x, 0)) + || (GET_CODE (XEXP (x, 0)) == SUBREG + && REG_P (SUBREG_REG (XEXP (x, 0)))))) + { + *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p); + + /* Multiply operations that set the flags are often + significantly more expensive. */ + if (speed_p + && GET_CODE (XEXP (x, 0)) == MULT + && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode)) + *cost += extra_cost->mult[0].flag_setting; + + if (speed_p + && GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0), + 0), 1), mode)) + *cost += extra_cost->mult[0].flag_setting; + return true; + } + + shift_reg = NULL; + shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); + if (shift_op != NULL) + { + *cost = COSTS_N_INSNS (1); + if (shift_reg != NULL) + { + *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); + if (speed_p) + *cost += extra_cost->alu.arith_shift_reg; + } + else if (speed_p) + *cost += extra_cost->alu.arith_shift; + *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) + + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p)); + return true; + } + + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) + { + *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p); + return true; + } + return false; + } + + /* Vector mode? */ + + *cost = LIBCALL_COST (2); + return false; + } + return true; + + case EQ: + case NE: + case LT: + case LE: + case GT: + case GE: + case LTU: + case LEU: + case GEU: + case GTU: + case ORDERED: + case UNORDERED: + case UNEQ: + case UNLE: + case UNLT: + case UNGE: + case UNGT: + case LTGT: + if (outer_code == SET) + { + /* Is it a store-flag operation? */ + if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM + && XEXP (x, 1) == const0_rtx) + { + /* Thumb also needs an IT insn. */ + *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2); + return true; + } + if (XEXP (x, 1) == const0_rtx) + { + switch (code) + { + case LT: + /* LSR Rd, Rn, #31. */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.shift; + break; + + case EQ: + /* RSBS T1, Rn, #0 + ADC Rd, Rn, T1. */ + + case NE: + /* SUBS T1, Rn, #1 + SBC Rd, Rn, T1. */ + *cost = COSTS_N_INSNS (2); + break; + + case LE: + /* RSBS T1, Rn, Rn, LSR #31 + ADC Rd, Rn, T1. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += extra_cost->alu.arith_shift; + break; + + case GT: + /* RSB Rd, Rn, Rn, ASR #1 + LSR Rd, Rd, #31. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += (extra_cost->alu.arith_shift + + extra_cost->alu.shift); + break; + + case GE: + /* ASR Rd, Rn, #31 + ADD Rd, Rn, #1. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += extra_cost->alu.shift; + break; + + default: + /* Remaining cases are either meaningless or would take + three insns anyway. */ + *cost = COSTS_N_INSNS (3); + break; + } + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + else + { + *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3); + if (CONST_INT_P (XEXP (x, 1)) + && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) + { + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + + return false; + } + } + /* Not directly inside a set. If it involves the condition code + register it must be the condition for a branch, cond_exec or + I_T_E operation. Since the comparison is performed elsewhere + this is just the control part which has no additional + cost. */ + else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM + && XEXP (x, 1) == const0_rtx) + { + *cost = 0; + return true; + } + return false; + + case ABS: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode != SFmode].neg; + + return false; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + *cost = LIBCALL_COST (1); + return false; + } + + if (mode == SImode) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift; + return false; + } + /* Vector mode? */ + *cost = LIBCALL_COST (1); + return false; + + case SIGN_EXTEND: + if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode) + && MEM_P (XEXP (x, 0))) + { + *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p); + + if (mode == DImode) + *cost += COSTS_N_INSNS (1); + + if (!speed_p) + return true; + + if (GET_MODE (XEXP (x, 0)) == SImode) + *cost += extra_cost->ldst.load; + else + *cost += extra_cost->ldst.load_sign_extend; + + if (mode == DImode) + *cost += extra_cost->alu.shift; + + return true; + } + + /* Widening from less than 32-bits requires an extend operation. */ + if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) + { + /* We have SXTB/SXTH. */ + *cost = COSTS_N_INSNS (1); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += extra_cost->alu.extend; + } + else if (GET_MODE (XEXP (x, 0)) != SImode) + { + /* Needs two shifts. */ + *cost = COSTS_N_INSNS (2); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + } + + /* Widening beyond 32-bits requires one more insn. */ + if (mode == DImode) + { + *cost += COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.shift; + } + + return true; + + case ZERO_EXTEND: + if ((arm_arch4 + || GET_MODE (XEXP (x, 0)) == SImode + || GET_MODE (XEXP (x, 0)) == QImode) + && MEM_P (XEXP (x, 0))) + { + *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p); + + if (mode == DImode) + *cost += COSTS_N_INSNS (1); /* No speed penalty. */ + + return true; + } + + /* Widening from less than 32-bits requires an extend operation. */ + if (GET_MODE (XEXP (x, 0)) == QImode) + { + /* UXTB can be a shorter instruction in Thumb2, but it might + be slower than the AND Rd, Rn, #255 alternative. When + optimizing for speed it should never be slower to use + AND, and we don't really model 16-bit vs 32-bit insns + here. */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.logical; + } + else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) + { + /* We have UXTB/UXTH. */ + *cost = COSTS_N_INSNS (1); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += extra_cost->alu.extend; + } + else if (GET_MODE (XEXP (x, 0)) != SImode) + { + /* Needs two shifts. It's marginally preferable to use + shifts rather than two BIC instructions as the second + shift may merge with a subsequent insn as a shifter + op. */ + *cost = COSTS_N_INSNS (2); + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + } + else /* GET_MODE (XEXP (x, 0)) == SImode. */ + *cost = COSTS_N_INSNS (1); + + /* Widening beyond 32-bits requires one more insn. */ + if (mode == DImode) + { + *cost += COSTS_N_INSNS (1); /* No speed penalty. */ + } + + return true; + + case CONST_INT: + *cost = 0; + /* CONST_INT has no mode, so we cannot tell for sure how many + insns are really going to be needed. The best we can do is + look at the value passed. If it fits in SImode, then assume + that's the mode it will be used for. Otherwise assume it + will be used in DImode. */ + if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode)) + mode = SImode; + else + mode = DImode; + + /* Avoid blowing up in arm_gen_constant (). */ + if (!(outer_code == PLUS + || outer_code == AND + || outer_code == IOR + || outer_code == XOR + || outer_code == MINUS)) + outer_code = SET; + + const_int_cost: + if (mode == SImode) + { + *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL, + INTVAL (x), NULL, NULL, + 0, 0)); + /* Extra costs? */ + } + else + { + *cost += COSTS_N_INSNS (arm_gen_constant + (outer_code, SImode, NULL, + trunc_int_for_mode (INTVAL (x), SImode), + NULL, NULL, 0, 0) + + arm_gen_constant (outer_code, SImode, NULL, + INTVAL (x) >> 32, NULL, + NULL, 0, 0)); + /* Extra costs? */ + } + + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + if (speed_p) + { + if (arm_arch_thumb2 && !flag_pic) + *cost = COSTS_N_INSNS (2); + else + *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load; + } + else + *cost = COSTS_N_INSNS (2); + + if (flag_pic) + { + *cost += COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.arith; + } + + return true; + + case CONST_FIXED: + *cost = COSTS_N_INSNS (4); + /* Fixme. */ + return true; + + case CONST_DOUBLE: + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) + { + if (vfp3_const_double_rtx (x)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].fpconst; + return true; + } + + if (speed_p) + { + *cost = COSTS_N_INSNS (1); + if (mode == DFmode) + *cost += extra_cost->ldst.loadd; + else + *cost += extra_cost->ldst.loadf; + } + else + *cost = COSTS_N_INSNS (2 + (mode == DFmode)); + + return true; + } + *cost = COSTS_N_INSNS (4); + return true; + + case CONST_VECTOR: + /* Fixme. */ + if (TARGET_NEON + && TARGET_HARD_FLOAT + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && neon_immediate_valid_for_move (x, mode, NULL, NULL)) + *cost = COSTS_N_INSNS (1); + else + *cost = COSTS_N_INSNS (4); + return true; + + case HIGH: + case LO_SUM: + *cost = COSTS_N_INSNS (1); + /* When optimizing for size, we prefer constant pool entries to + MOVW/MOVT pairs, so bump the cost of these slightly. */ + if (!speed_p) + *cost += 1; + return true; + + case CLZ: + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.clz; + return false; + + case SMIN: + if (XEXP (x, 1) == const0_rtx) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.log_shift; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + /* Fall through. */ + case SMAX: + case UMIN: + case UMAX: + *cost = COSTS_N_INSNS (2); + return false; + + case TRUNCATE: + if (GET_CODE (XEXP (x, 0)) == ASHIFTRT + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && INTVAL (XEXP (XEXP (x, 0), 1)) == 32 + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) + || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND + && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) + == ZERO_EXTEND)))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->mult[1].extend; + *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0, + speed_p) + + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND, + 0, speed_p)); + return true; + } + *cost = LIBCALL_COST (1); + return false; + + case UNSPEC: + return arm_unspec_cost (x, outer_code, speed_p, cost); + + case PC: + /* Reading the PC is like reading any other register. Writing it + is more expensive, but we take that into account elsewhere. */ + *cost = 0; + return true; + + case ZERO_EXTRACT: + /* TODO: Simple zero_extract of bottom bits using AND. */ + /* Fall through. */ + case SIGN_EXTRACT: + if (arm_arch6 + && mode == SImode + && CONST_INT_P (XEXP (x, 1)) + && CONST_INT_P (XEXP (x, 2))) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.bfx; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + /* Without UBFX/SBFX, need to resort to shift operations. */ + *cost = COSTS_N_INSNS (2); + if (speed_p) + *cost += 2 * extra_cost->alu.shift; + *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p); + return true; + + case FLOAT_EXTEND: + if (TARGET_HARD_FLOAT) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].widen; + if (!TARGET_FPU_ARMV8 + && GET_MODE (XEXP (x, 0)) == HFmode) + { + /* Pre v8, widening HF->DF is a two-step process, first + widening to SFmode. */ + *cost += COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[0].widen; + } + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + } + + *cost = LIBCALL_COST (1); + return false; + + case FLOAT_TRUNCATE: + if (TARGET_HARD_FLOAT) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].narrow; + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + return true; + /* Vector modes? */ + } + *cost = LIBCALL_COST (1); + return false; + + case FIX: + case UNSIGNED_FIX: + if (TARGET_HARD_FLOAT) + { + if (GET_MODE_CLASS (mode) == MODE_INT) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint; + /* Strip of the 'cost' of rounding towards zero. */ + if (GET_CODE (XEXP (x, 0)) == FIX) + *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p); + else + *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); + /* ??? Increase the cost to deal with transferring from + FP -> CORE registers? */ + return true; + } + else if (GET_MODE_CLASS (mode) == MODE_FLOAT + && TARGET_FPU_ARMV8) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].roundint; + return false; + } + /* Vector costs? */ + } + *cost = LIBCALL_COST (1); + return false; + + case FLOAT: + case UNSIGNED_FLOAT: + if (TARGET_HARD_FLOAT) + { + /* ??? Increase the cost to deal with transferring from CORE + -> FP registers? */ + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->fp[mode == DFmode].fromint; + return false; + } + *cost = LIBCALL_COST (1); + return false; + + case CALL: + *cost = COSTS_N_INSNS (1); + return true; + + case ASM_OPERANDS: + /* Just a guess. Cost one insn per input. */ + *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); + return true; + + default: + if (mode != VOIDmode) + *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); + else + *cost = COSTS_N_INSNS (4); /* Who knows? */ + return false; + } +} + +#undef HANDLE_NARROW_SHIFT_ARITH + +/* RTX costs when optimizing for size. */ +static bool +arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, + int *total, bool speed) +{ + bool result; + + if (TARGET_OLD_RTX_COSTS + || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS)) + { + /* Old way. (Deprecated.) */ + if (!speed) + result = arm_size_rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, total); + else + result = current_tune->rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, total, + speed); + } + else + { + /* New way. */ + if (current_tune->insn_extra_cost) + result = arm_new_rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, + current_tune->insn_extra_cost, + total, speed); + /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS + && current_tune->insn_extra_cost != NULL */ + else + result = arm_new_rtx_costs (x, (enum rtx_code) code, + (enum rtx_code) outer_code, + &generic_extra_costs, total, speed); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + print_rtl_single (dump_file, x); + fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold", + *total, result ? "final" : "partial"); + } + return result; +} + +/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not + supported on any "slowmul" cores, so it can be ignored. */ + +static bool +arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB) + { + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + + switch (code) + { + case MULT: + if (GET_MODE_CLASS (mode) == MODE_FLOAT + || mode == DImode) + { + *total = COSTS_N_INSNS (20); + return false; + } + + if (CONST_INT_P (XEXP (x, 1))) + { + unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) + & (unsigned HOST_WIDE_INT) 0xffffffff); + int cost, const_ok = const_ok_for_arm (i); + int j, booth_unit_size; + + /* Tune as appropriate. */ + cost = const_ok ? 4 : 8; + booth_unit_size = 2; + for (j = 0; i && j < 32; j += booth_unit_size) + { + i >>= booth_unit_size; + cost++; + } + + *total = COSTS_N_INSNS (cost); + *total += rtx_cost (XEXP (x, 0), code, 0, speed); + return true; + } + + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed);; + } +} + + +/* RTX cost for cores with a fast multiply unit (M variants). */ + +static bool +arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB1) + { + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + + /* ??? should thumb2 use different costs? */ + switch (code) + { + case MULT: + /* There is no point basing this on the tuning, since it is always the + fast variant if it exists at all. */ + if (mode == DImode + && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS(2); + return false; + } + + + if (mode == DImode) + { + *total = COSTS_N_INSNS (5); + return false; + } + + if (CONST_INT_P (XEXP (x, 1))) + { + unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) + & (unsigned HOST_WIDE_INT) 0xffffffff); + int cost, const_ok = const_ok_for_arm (i); + int j, booth_unit_size; + + /* Tune as appropriate. */ + cost = const_ok ? 4 : 8; + booth_unit_size = 8; + for (j = 0; i && j < 32; j += booth_unit_size) + { + i >>= booth_unit_size; + cost++; + } + + *total = COSTS_N_INSNS(cost); + return false; + } + + if (mode == SImode) + { + *total = COSTS_N_INSNS (4); + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + } + + /* Requires a lib call */ + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed); + } +} + + +/* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, + so it can be ignored. */ + +static bool +arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB) + { + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + + switch (code) + { + case COMPARE: + if (GET_CODE (XEXP (x, 0)) != MULT) + return arm_rtx_costs_1 (x, outer_code, total, speed); + + /* A COMPARE of a MULT is slow on XScale; the muls instruction + will stall until the multiplication is complete. */ + *total = COSTS_N_INSNS (3); + return false; + + case MULT: + /* There is no point basing this on the tuning, since it is always the + fast variant if it exists at all. */ + if (mode == DImode + && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (2); + return false; + } + + + if (mode == DImode) + { + *total = COSTS_N_INSNS (5); + return false; + } + + if (CONST_INT_P (XEXP (x, 1))) + { + /* If operand 1 is a constant we can more accurately + calculate the cost of the multiply. The multiplier can + retire 15 bits on the first cycle and a further 12 on the + second. We do, of course, have to load the constant into + a register first. */ + unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); + /* There's a general overhead of one cycle. */ + int cost = 1; + unsigned HOST_WIDE_INT masked_const; + + if (i & 0x80000000) + i = ~i; + + i &= (unsigned HOST_WIDE_INT) 0xffffffff; + + masked_const = i & 0xffff8000; + if (masked_const != 0) + { + cost++; + masked_const = i & 0xf8000000; + if (masked_const != 0) + cost++; + } + *total = COSTS_N_INSNS (cost); + return false; + } + + if (mode == SImode) + { + *total = COSTS_N_INSNS (3); + return false; + } + + /* Requires a lib call */ + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed); + } +} + + +/* RTX costs for 9e (and later) cores. */ + +static bool +arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, + int *total, bool speed) +{ + enum machine_mode mode = GET_MODE (x); + + if (TARGET_THUMB1) + { + switch (code) + { + case MULT: + *total = COSTS_N_INSNS (3); + return true; + + default: + *total = thumb1_rtx_costs (x, code, outer_code); + return true; + } + } + + switch (code) + { + case MULT: + /* There is no point basing this on the tuning, since it is always the + fast variant if it exists at all. */ + if (mode == DImode + && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) + && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (2); + return false; + } + + + if (mode == DImode) + { + *total = COSTS_N_INSNS (5); + return false; + } + + if (mode == SImode) + { + *total = COSTS_N_INSNS (2); + return false; + } + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + if (TARGET_HARD_FLOAT + && (mode == SFmode + || (mode == DFmode && !TARGET_VFP_SINGLE))) + { + *total = COSTS_N_INSNS (1); + return false; + } + } + + *total = COSTS_N_INSNS (20); + return false; + + default: + return arm_rtx_costs_1 (x, outer_code, total, speed); + } +} +/* All address computations that can be done are free, but rtx cost returns + the same for practically all of them. So we weight the different types + of address here in the order (most pref first): + PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */ +static inline int +arm_arm_address_cost (rtx x) +{ + enum rtx_code c = GET_CODE (x); + + if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC) + return 0; + if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) + return 10; + + if (c == PLUS) + { + if (CONST_INT_P (XEXP (x, 1))) + return 2; + + if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1))) + return 3; + + return 4; + } + + return 6; +} + +static inline int +arm_thumb_address_cost (rtx x) +{ + enum rtx_code c = GET_CODE (x); + + if (c == REG) + return 1; + if (c == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))) + return 1; + + return 2; +} + +static int +arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) +{ + return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); +} + +/* Adjust cost hook for XScale. */ +static bool +xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +{ + /* Some true dependencies can have a higher cost depending + on precisely how certain input operands are used. */ + if (REG_NOTE_KIND(link) == 0 + && recog_memoized (insn) >= 0 + && recog_memoized (dep) >= 0) + { + int shift_opnum = get_attr_shift (insn); + enum attr_type attr_type = get_attr_type (dep); + + /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted + operand for INSN. If we have a shifted input operand and the + instruction we depend on is another ALU instruction, then we may + have to account for an additional stall. */ + if (shift_opnum != 0 + && (attr_type == TYPE_ALU_SHIFT_IMM + || attr_type == TYPE_ALUS_SHIFT_IMM + || attr_type == TYPE_LOGIC_SHIFT_IMM + || attr_type == TYPE_LOGICS_SHIFT_IMM + || attr_type == TYPE_ALU_SHIFT_REG + || attr_type == TYPE_ALUS_SHIFT_REG + || attr_type == TYPE_LOGIC_SHIFT_REG + || attr_type == TYPE_LOGICS_SHIFT_REG + || attr_type == TYPE_MOV_SHIFT + || attr_type == TYPE_MVN_SHIFT + || attr_type == TYPE_MOV_SHIFT_REG + || attr_type == TYPE_MVN_SHIFT_REG)) + { + rtx shifted_operand; + int opno; + + /* Get the shifted operand. */ + extract_insn (insn); + shifted_operand = recog_data.operand[shift_opnum]; + + /* Iterate over all the operands in DEP. If we write an operand + that overlaps with SHIFTED_OPERAND, then we have increase the + cost of this dependency. */ + extract_insn (dep); + preprocess_constraints (); + for (opno = 0; opno < recog_data.n_operands; opno++) + { + /* We can ignore strict inputs. */ + if (recog_data.operand_type[opno] == OP_IN) + continue; + + if (reg_overlap_mentioned_p (recog_data.operand[opno], + shifted_operand)) + { + *cost = 2; + return false; + } + } + } + } + return true; +} + +/* Adjust cost hook for Cortex A9. */ +static bool +cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +{ + switch (REG_NOTE_KIND (link)) + { + case REG_DEP_ANTI: + *cost = 0; + return false; + + case REG_DEP_TRUE: + case REG_DEP_OUTPUT: + if (recog_memoized (insn) >= 0 + && recog_memoized (dep) >= 0) + { + if (GET_CODE (PATTERN (insn)) == SET) + { + if (GET_MODE_CLASS + (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT + || GET_MODE_CLASS + (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT) + { + enum attr_type attr_type_insn = get_attr_type (insn); + enum attr_type attr_type_dep = get_attr_type (dep); + + /* By default all dependencies of the form + s0 = s0 s1 + s0 = s0 s2 + have an extra latency of 1 cycle because + of the input and output dependency in this + case. However this gets modeled as an true + dependency and hence all these checks. */ + if (REG_P (SET_DEST (PATTERN (insn))) + && REG_P (SET_DEST (PATTERN (dep))) + && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)), + SET_DEST (PATTERN (dep)))) + { + /* FMACS is a special case where the dependent + instruction can be issued 3 cycles before + the normal latency in case of an output + dependency. */ + if ((attr_type_insn == TYPE_FMACS + || attr_type_insn == TYPE_FMACD) + && (attr_type_dep == TYPE_FMACS + || attr_type_dep == TYPE_FMACD)) + { + if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + *cost = insn_default_latency (dep) - 3; + else + *cost = insn_default_latency (dep); + return false; + } + else + { + if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + *cost = insn_default_latency (dep) + 1; + else + *cost = insn_default_latency (dep); + } + return false; + } + } + } + } + break; + + default: + gcc_unreachable (); + } + + return true; +} + +/* Adjust cost hook for FA726TE. */ +static bool +fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) +{ + /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated) + have penalty of 3. */ + if (REG_NOTE_KIND (link) == REG_DEP_TRUE + && recog_memoized (insn) >= 0 + && recog_memoized (dep) >= 0 + && get_attr_conds (dep) == CONDS_SET) + { + /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */ + if (get_attr_conds (insn) == CONDS_USE + && get_attr_type (insn) != TYPE_BRANCH) + { + *cost = 3; + return false; + } + + if (GET_CODE (PATTERN (insn)) == COND_EXEC + || get_attr_conds (insn) == CONDS_USE) + { + *cost = 0; + return false; + } + } + + return true; +} + +/* Implement TARGET_REGISTER_MOVE_COST. + + Moves between VFP_REGS and GENERAL_REGS are a single insn, but + it is typically more expensive than a single memory access. We set + the cost to less than two memory accesses so that floating + point to integer conversion does not go through memory. */ + +int +arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + if (TARGET_32BIT) + { + if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to)) + || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to))) + return 15; + else if ((from == IWMMXT_REGS && to != IWMMXT_REGS) + || (from != IWMMXT_REGS && to == IWMMXT_REGS)) + return 4; + else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS) + return 20; + else + return 2; + } + else + { + if (from == HI_REGS || to == HI_REGS) + return 4; + else + return 2; + } +} + +/* Implement TARGET_MEMORY_MOVE_COST. */ + +int +arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, + bool in ATTRIBUTE_UNUSED) +{ + if (TARGET_32BIT) + return 10; + else + { + if (GET_MODE_SIZE (mode) < 4) + return 8; + else + return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2)); + } +} + +/* Vectorizer cost model implementation. */ + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, + int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + + switch (type_of_cost) + { + case scalar_stmt: + return current_tune->vec_costs->scalar_stmt_cost; + + case scalar_load: + return current_tune->vec_costs->scalar_load_cost; + + case scalar_store: + return current_tune->vec_costs->scalar_store_cost; + + case vector_stmt: + return current_tune->vec_costs->vec_stmt_cost; + + case vector_load: + return current_tune->vec_costs->vec_align_load_cost; + + case vector_store: + return current_tune->vec_costs->vec_store_cost; + + case vec_to_scalar: + return current_tune->vec_costs->vec_to_scalar_cost; + + case scalar_to_vec: + return current_tune->vec_costs->scalar_to_vec_cost; + + case unaligned_load: + return current_tune->vec_costs->vec_unalign_load_cost; + + case unaligned_store: + return current_tune->vec_costs->vec_unalign_store_cost; + + case cond_branch_taken: + return current_tune->vec_costs->cond_taken_branch_cost; + + case cond_branch_not_taken: + return current_tune->vec_costs->cond_not_taken_branch_cost; + + case vec_perm: + case vec_promote_demote: + return current_tune->vec_costs->vec_stmt_cost; + + case vec_construct: + elements = TYPE_VECTOR_SUBPARTS (vectype); + return elements / 2 + 1; + + default: + gcc_unreachable (); + } +} + +/* Implement targetm.vectorize.add_stmt_cost. */ + +static unsigned +arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, + struct _stmt_vec_info *stmt_info, int misalign, + enum vect_cost_model_location where) +{ + unsigned *cost = (unsigned *) data; + unsigned retval = 0; + + if (flag_vect_cost_model) + { + tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; + int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign); + + /* Statements in an inner loop relative to the loop being + vectorized are weighted more heavily. The value here is + arbitrary and could potentially be improved with analysis. */ + if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) + count *= 50; /* FIXME. */ + + retval = (unsigned) (count * stmt_cost); + cost[where] += retval; + } + + return retval; +} + +/* Return true if and only if this insn can dual-issue only as older. */ +static bool +cortexa7_older_only (rtx insn) +{ + if (recog_memoized (insn) < 0) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_ALU_REG: + case TYPE_ALUS_REG: + case TYPE_LOGIC_REG: + case TYPE_LOGICS_REG: + case TYPE_ADC_REG: + case TYPE_ADCS_REG: + case TYPE_ADR: + case TYPE_BFM: + case TYPE_REV: + case TYPE_MVN_REG: + case TYPE_SHIFT_IMM: + case TYPE_SHIFT_REG: + case TYPE_LOAD_BYTE: + case TYPE_LOAD1: + case TYPE_STORE1: + case TYPE_FFARITHS: + case TYPE_FADDS: + case TYPE_FFARITHD: + case TYPE_FADDD: + case TYPE_FMOV: + case TYPE_F_CVT: + case TYPE_FCMPS: + case TYPE_FCMPD: + case TYPE_FCONSTS: + case TYPE_FCONSTD: + case TYPE_FMULS: + case TYPE_FMACS: + case TYPE_FMULD: + case TYPE_FMACD: + case TYPE_FDIVS: + case TYPE_FDIVD: + case TYPE_F_MRC: + case TYPE_F_MRRC: + case TYPE_F_FLAG: + case TYPE_F_LOADS: + case TYPE_F_STORES: + return true; + default: + return false; + } +} + +/* Return true if and only if this insn can dual-issue as younger. */ +static bool +cortexa7_younger (FILE *file, int verbose, rtx insn) +{ + if (recog_memoized (insn) < 0) + { + if (verbose > 5) + fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn)); + return false; + } + + switch (get_attr_type (insn)) + { + case TYPE_ALU_IMM: + case TYPE_ALUS_IMM: + case TYPE_LOGIC_IMM: + case TYPE_LOGICS_IMM: + case TYPE_EXTEND: + case TYPE_MVN_IMM: + case TYPE_MOV_IMM: + case TYPE_MOV_REG: + case TYPE_MOV_SHIFT: + case TYPE_MOV_SHIFT_REG: + case TYPE_BRANCH: + case TYPE_CALL: + return true; + default: + return false; + } +} + + +/* Look for an instruction that can dual issue only as an older + instruction, and move it in front of any instructions that can + dual-issue as younger, while preserving the relative order of all + other instructions in the ready list. This is a hueuristic to help + dual-issue in later cycles, by postponing issue of more flexible + instructions. This heuristic may affect dual issue opportunities + in the current cycle. */ +static void +cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, + int clock) +{ + int i; + int first_older_only = -1, first_younger = -1; + + if (verbose > 5) + fprintf (file, + ";; sched_reorder for cycle %d with %d insns in ready list\n", + clock, + *n_readyp); + + /* Traverse the ready list from the head (the instruction to issue + first), and looking for the first instruction that can issue as + younger and the first instruction that can dual-issue only as + older. */ + for (i = *n_readyp - 1; i >= 0; i--) + { + rtx insn = ready[i]; + if (cortexa7_older_only (insn)) + { + first_older_only = i; + if (verbose > 5) + fprintf (file, ";; reorder older found %d\n", INSN_UID (insn)); + break; + } + else if (cortexa7_younger (file, verbose, insn) && first_younger == -1) + first_younger = i; + } + + /* Nothing to reorder because either no younger insn found or insn + that can dual-issue only as older appears before any insn that + can dual-issue as younger. */ + if (first_younger == -1) + { + if (verbose > 5) + fprintf (file, ";; sched_reorder nothing to reorder as no younger\n"); + return; + } + + /* Nothing to reorder because no older-only insn in the ready list. */ + if (first_older_only == -1) + { + if (verbose > 5) + fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n"); + return; + } + + /* Move first_older_only insn before first_younger. */ + if (verbose > 5) + fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n", + INSN_UID(ready [first_older_only]), + INSN_UID(ready [first_younger])); + rtx first_older_only_insn = ready [first_older_only]; + for (i = first_older_only; i < first_younger; i++) + { + ready[i] = ready[i+1]; + } + + ready[i] = first_older_only_insn; + return; +} + +/* Implement TARGET_SCHED_REORDER. */ +static int +arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp, + int clock) +{ + switch (arm_tune) + { + case cortexa7: + cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock); + break; + default: + /* Do nothing for other cores. */ + break; + } + + return arm_issue_rate (); +} + +/* This function implements the target macro TARGET_SCHED_ADJUST_COST. + It corrects the value of COST based on the relationship between + INSN and DEP through the dependence LINK. It returns the new + value. There is a per-core adjust_cost hook to adjust scheduler costs + and the per-core hook can choose to completely override the generic + adjust_cost function. Only put bits of code into arm_adjust_cost that + are common across all cores. */ +static int +arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) +{ + rtx i_pat, d_pat; + + /* When generating Thumb-1 code, we want to place flag-setting operations + close to a conditional branch which depends on them, so that we can + omit the comparison. */ + if (TARGET_THUMB1 + && REG_NOTE_KIND (link) == 0 + && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn + && recog_memoized (dep) >= 0 + && get_attr_conds (dep) == CONDS_SET) + return 0; + + if (current_tune->sched_adjust_cost != NULL) + { + if (!current_tune->sched_adjust_cost (insn, link, dep, &cost)) + return cost; + } + + /* XXX Is this strictly true? */ + if (REG_NOTE_KIND (link) == REG_DEP_ANTI + || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) + return 0; + + /* Call insns don't incur a stall, even if they follow a load. */ + if (REG_NOTE_KIND (link) == 0 + && CALL_P (insn)) + return 1; + + if ((i_pat = single_set (insn)) != NULL + && MEM_P (SET_SRC (i_pat)) + && (d_pat = single_set (dep)) != NULL + && MEM_P (SET_DEST (d_pat))) + { + rtx src_mem = XEXP (SET_SRC (i_pat), 0); + /* This is a load after a store, there is no conflict if the load reads + from a cached area. Assume that loads from the stack, and from the + constant pool are cached, and that others will miss. This is a + hack. */ + + if ((GET_CODE (src_mem) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (src_mem)) + || reg_mentioned_p (stack_pointer_rtx, src_mem) + || reg_mentioned_p (frame_pointer_rtx, src_mem) + || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) + return 1; + } + + return cost; +} + +int +arm_max_conditional_execute (void) +{ + return max_insns_skipped; +} + +static int +arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) +{ + if (TARGET_32BIT) + return (TARGET_THUMB2 && !speed_p) ? 1 : 4; + else + return (optimize > 0) ? 2 : 0; +} + +static int +arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p) +{ + return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); +} + +/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles" + on Cortex-M4, where P varies from 1 to 3 according to some criteria), since + sequences of non-executed instructions in IT blocks probably take the same + amount of time as executed instructions (and the IT instruction itself takes + space in icache). This function was experimentally determined to give good + results on a popular embedded benchmark. */ + +static int +arm_cortex_m_branch_cost (bool speed_p, bool predictable_p) +{ + return (TARGET_32BIT && speed_p) ? 1 + : arm_default_branch_cost (speed_p, predictable_p); +} + +static bool fp_consts_inited = false; + +static REAL_VALUE_TYPE value_fp0; + +static void +init_fp_table (void) +{ + REAL_VALUE_TYPE r; + + r = REAL_VALUE_ATOF ("0", DFmode); + value_fp0 = r; + fp_consts_inited = true; +} + +/* Return TRUE if rtx X is a valid immediate FP constant. */ +int +arm_const_double_rtx (rtx x) +{ + REAL_VALUE_TYPE r; + + if (!fp_consts_inited) + init_fp_table (); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + if (REAL_VALUE_MINUS_ZERO (r)) + return 0; + + if (REAL_VALUES_EQUAL (r, value_fp0)) + return 1; + + return 0; +} + +/* VFPv3 has a fairly wide range of representable immediates, formed from + "quarter-precision" floating-point values. These can be evaluated using this + formula (with ^ for exponentiation): + + -1^s * n * 2^-r + + Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that + 16 <= n <= 31 and 0 <= r <= 7. + + These values are mapped onto an 8-bit integer ABCDEFGH s.t. + + - A (most-significant) is the sign bit. + - BCD are the exponent (encoded as r XOR 3). + - EFGH are the mantissa (encoded as n - 16). +*/ + +/* Return an integer index for a VFPv3 immediate operand X suitable for the + fconst[sd] instruction, or -1 if X isn't suitable. */ +static int +vfp3_const_double_index (rtx x) +{ + REAL_VALUE_TYPE r, m; + int sign, exponent; + unsigned HOST_WIDE_INT mantissa, mant_hi; + unsigned HOST_WIDE_INT mask; + HOST_WIDE_INT m1, m2; + int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; + + if (!TARGET_VFP3 || !CONST_DOUBLE_P (x)) + return -1; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + + /* We can't represent these things, so detect them first. */ + if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) + return -1; + + /* Extract sign, exponent and mantissa. */ + sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; + r = real_value_abs (&r); + exponent = REAL_EXP (&r); + /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the + highest (sign) bit, with a fixed binary point at bit point_pos. + WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 + bits for the mantissa, this may fail (low bits would be lost). */ + real_ldexp (&m, &r, point_pos - exponent); + REAL_VALUE_TO_INT (&m1, &m2, m); + mantissa = m1; + mant_hi = m2; + + /* If there are bits set in the low part of the mantissa, we can't + represent this value. */ + if (mantissa != 0) + return -1; + + /* Now make it so that mantissa contains the most-significant bits, and move + the point_pos to indicate that the least-significant bits have been + discarded. */ + point_pos -= HOST_BITS_PER_WIDE_INT; + mantissa = mant_hi; + + /* We can permit four significant bits of mantissa only, plus a high bit + which is always 1. */ + mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; + if ((mantissa & mask) != 0) + return -1; + + /* Now we know the mantissa is in range, chop off the unneeded bits. */ + mantissa >>= point_pos - 5; + + /* The mantissa may be zero. Disallow that case. (It's possible to load the + floating-point immediate zero with Neon using an integer-zero load, but + that case is handled elsewhere.) */ + if (mantissa == 0) + return -1; + + gcc_assert (mantissa >= 16 && mantissa <= 31); + + /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where + normalized significands are in the range [1, 2). (Our mantissa is shifted + left 4 places at this point relative to normalized IEEE754 values). GCC + internally uses [0.5, 1) (see real.c), so the exponent returned from + REAL_EXP must be altered. */ + exponent = 5 - exponent; + + if (exponent < 0 || exponent > 7) + return -1; + + /* Sign, mantissa and exponent are now in the correct form to plug into the + formula described in the comment above. */ + return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16); +} + +/* Return TRUE if rtx X is a valid immediate VFPv3 constant. */ +int +vfp3_const_double_rtx (rtx x) +{ + if (!TARGET_VFP3) + return 0; + + return vfp3_const_double_index (x) != -1; +} + +/* Recognize immediates which can be used in various Neon instructions. Legal + immediates are described by the following table (for VMVN variants, the + bitwise inverse of the constant shown is recognized. In either case, VMOV + is output and the correct instruction to use for a given constant is chosen + by the assembler). The constant shown is replicated across all elements of + the destination vector. + + insn elems variant constant (binary) + ---- ----- ------- ----------------- + vmov i32 0 00000000 00000000 00000000 abcdefgh + vmov i32 1 00000000 00000000 abcdefgh 00000000 + vmov i32 2 00000000 abcdefgh 00000000 00000000 + vmov i32 3 abcdefgh 00000000 00000000 00000000 + vmov i16 4 00000000 abcdefgh + vmov i16 5 abcdefgh 00000000 + vmvn i32 6 00000000 00000000 00000000 abcdefgh + vmvn i32 7 00000000 00000000 abcdefgh 00000000 + vmvn i32 8 00000000 abcdefgh 00000000 00000000 + vmvn i32 9 abcdefgh 00000000 00000000 00000000 + vmvn i16 10 00000000 abcdefgh + vmvn i16 11 abcdefgh 00000000 + vmov i32 12 00000000 00000000 abcdefgh 11111111 + vmvn i32 13 00000000 00000000 abcdefgh 11111111 + vmov i32 14 00000000 abcdefgh 11111111 11111111 + vmvn i32 15 00000000 abcdefgh 11111111 11111111 + vmov i8 16 abcdefgh + vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd + eeeeeeee ffffffff gggggggg hhhhhhhh + vmov f32 18 aBbbbbbc defgh000 00000000 00000000 + vmov f32 19 00000000 00000000 00000000 00000000 + + For case 18, B = !b. Representable values are exactly those accepted by + vfp3_const_double_index, but are output as floating-point numbers rather + than indices. + + For case 19, we will change it to vmov.i32 when assembling. + + Variants 0-5 (inclusive) may also be used as immediates for the second + operand of VORR/VBIC instructions. + + The INVERSE argument causes the bitwise inverse of the given operand to be + recognized instead (used for recognizing legal immediates for the VAND/VORN + pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is + *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be + output, rather than the real insns vbic/vorr). + + INVERSE makes no difference to the recognition of float vectors. + + The return value is the variant of immediate as shown in the above table, or + -1 if the given value doesn't match any of the listed patterns. +*/ +static int +neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, + rtx *modconst, int *elementwidth) +{ +#define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ + matches = 1; \ + for (i = 0; i < idx; i += (STRIDE)) \ + if (!(TEST)) \ + matches = 0; \ + if (matches) \ + { \ + immtype = (CLASS); \ + elsize = (ELSIZE); \ + break; \ + } + + unsigned int i, elsize = 0, idx = 0, n_elts; + unsigned int innersize; + unsigned char bytes[16]; + int immtype = -1, matches; + unsigned int invmask = inverse ? 0xff : 0; + bool vector = GET_CODE (op) == CONST_VECTOR; + + if (vector) + { + n_elts = CONST_VECTOR_NUNITS (op); + innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); + } + else + { + n_elts = 1; + if (mode == VOIDmode) + mode = DImode; + innersize = GET_MODE_SIZE (mode); + } + + /* Vectors of float constants. */ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx el0 = CONST_VECTOR_ELT (op, 0); + REAL_VALUE_TYPE r0; + + if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0))) + return -1; + + REAL_VALUE_FROM_CONST_DOUBLE (r0, el0); + + for (i = 1; i < n_elts; i++) + { + rtx elt = CONST_VECTOR_ELT (op, i); + REAL_VALUE_TYPE re; + + REAL_VALUE_FROM_CONST_DOUBLE (re, elt); + + if (!REAL_VALUES_EQUAL (r0, re)) + return -1; + } + + if (modconst) + *modconst = CONST_VECTOR_ELT (op, 0); + + if (elementwidth) + *elementwidth = 0; + + if (el0 == CONST0_RTX (GET_MODE (el0))) + return 19; + else + return 18; + } + + /* Splat vector constant out into a byte vector. */ + for (i = 0; i < n_elts; i++) + { + rtx el = vector ? CONST_VECTOR_ELT (op, i) : op; + unsigned HOST_WIDE_INT elpart; + unsigned int part, parts; + + if (CONST_INT_P (el)) + { + elpart = INTVAL (el); + parts = 1; + } + else if (CONST_DOUBLE_P (el)) + { + elpart = CONST_DOUBLE_LOW (el); + parts = 2; + } + else + gcc_unreachable (); + + for (part = 0; part < parts; part++) + { + unsigned int byte; + for (byte = 0; byte < innersize; byte++) + { + bytes[idx++] = (elpart & 0xff) ^ invmask; + elpart >>= BITS_PER_UNIT; + } + if (CONST_DOUBLE_P (el)) + elpart = CONST_DOUBLE_HIGH (el); + } + } + + /* Sanity check. */ + gcc_assert (idx == GET_MODE_SIZE (mode)); + + do + { + CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); + + CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]); + + CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0); + + CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]); + + CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); + + CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]); + + CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff); + + CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]); + + CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0); + + CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); + + CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); + + CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); + + CHECK (1, 8, 16, bytes[i] == bytes[0]); + + CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) + && bytes[i] == bytes[(i + 8) % idx]); + } + while (0); + + if (immtype == -1) + return -1; + + if (elementwidth) + *elementwidth = elsize; + + if (modconst) + { + unsigned HOST_WIDE_INT imm = 0; + + /* Un-invert bytes of recognized vector, if necessary. */ + if (invmask != 0) + for (i = 0; i < idx; i++) + bytes[i] ^= invmask; + + if (immtype == 17) + { + /* FIXME: Broken on 32-bit H_W_I hosts. */ + gcc_assert (sizeof (HOST_WIDE_INT) == 8); + + for (i = 0; i < 8; i++) + imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) + << (i * BITS_PER_UNIT); + + *modconst = GEN_INT (imm); + } + else + { + unsigned HOST_WIDE_INT imm = 0; + + for (i = 0; i < elsize / BITS_PER_UNIT; i++) + imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); + + *modconst = GEN_INT (imm); + } + } + + return immtype; +#undef CHECK +} + +/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, + VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for + float elements), and a modified constant (whatever should be output for a + VMOV) in *MODCONST. */ + +int +neon_immediate_valid_for_move (rtx op, enum machine_mode mode, + rtx *modconst, int *elementwidth) +{ + rtx tmpconst; + int tmpwidth; + int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth); + + if (retval == -1) + return 0; + + if (modconst) + *modconst = tmpconst; + + if (elementwidth) + *elementwidth = tmpwidth; + + return 1; +} + +/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If + the immediate is valid, write a constant suitable for using as an operand + to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to + *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ + +int +neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse, + rtx *modconst, int *elementwidth) +{ + rtx tmpconst; + int tmpwidth; + int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth); + + if (retval < 0 || retval > 5) + return 0; + + if (modconst) + *modconst = tmpconst; + + if (elementwidth) + *elementwidth = tmpwidth; + + return 1; +} + +/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If + the immediate is valid, write a constant suitable for using as an operand + to VSHR/VSHL to *MODCONST and the corresponding element width to + *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift, + because they have different limitations. */ + +int +neon_immediate_valid_for_shift (rtx op, enum machine_mode mode, + rtx *modconst, int *elementwidth, + bool isleftshift) +{ + unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); + unsigned int n_elts = CONST_VECTOR_NUNITS (op), i; + unsigned HOST_WIDE_INT last_elt = 0; + unsigned HOST_WIDE_INT maxshift; + + /* Split vector constant out into a byte vector. */ + for (i = 0; i < n_elts; i++) + { + rtx el = CONST_VECTOR_ELT (op, i); + unsigned HOST_WIDE_INT elpart; + + if (CONST_INT_P (el)) + elpart = INTVAL (el); + else if (CONST_DOUBLE_P (el)) + return 0; + else + gcc_unreachable (); + + if (i != 0 && elpart != last_elt) + return 0; + + last_elt = elpart; + } + + /* Shift less than element size. */ + maxshift = innersize * 8; + + if (isleftshift) + { + /* Left shift immediate value can be from 0 to -1. */ + if (last_elt >= maxshift) + return 0; + } + else + { + /* Right shift immediate value can be from 1 to . */ + if (last_elt == 0 || last_elt > maxshift) + return 0; + } + + if (elementwidth) + *elementwidth = innersize * 8; + + if (modconst) + *modconst = CONST_VECTOR_ELT (op, 0); + + return 1; +} + +/* Return a string suitable for output of Neon immediate logic operation + MNEM. */ + +char * +neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode, + int inverse, int quad) +{ + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); + + gcc_assert (is_valid != 0); + + if (quad) + sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); + else + sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); + + return templ; +} + +/* Return a string suitable for output of Neon immediate shift operation + (VSHR or VSHL) MNEM. */ + +char * +neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, + enum machine_mode mode, int quad, + bool isleftshift) +{ + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift); + gcc_assert (is_valid != 0); + + if (quad) + sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width); + else + sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width); + + return templ; +} + +/* Output a sequence of pairwise operations to implement a reduction. + NOTE: We do "too much work" here, because pairwise operations work on two + registers-worth of operands in one go. Unfortunately we can't exploit those + extra calculations to do the full operation in fewer steps, I don't think. + Although all vector elements of the result but the first are ignored, we + actually calculate the same result in each of the elements. An alternative + such as initially loading a vector with zero to use as each of the second + operands would use up an additional register and take an extra instruction, + for no particular gain. */ + +void +neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, + rtx (*reduc) (rtx, rtx, rtx)) +{ + enum machine_mode inner = GET_MODE_INNER (mode); + unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner); + rtx tmpsum = op1; + + for (i = parts / 2; i >= 1; i /= 2) + { + rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode); + emit_insn (reduc (dest, tmpsum, tmpsum)); + tmpsum = dest; + } +} + +/* If VALS is a vector constant that can be loaded into a register + using VDUP, generate instructions to do so and return an RTX to + assign to the register. Otherwise return NULL_RTX. */ + +static rtx +neon_vdup_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + bool all_same = true; + rtx x; + int i; + + if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) + return NULL_RTX; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (!all_same) + /* The elements are not all the same. We could handle repeating + patterns of a mode larger than INNER_MODE here (e.g. int8x8_t + {0, C, 0, C, 0, C, 0, C} which can be loaded using + vdup.i16). */ + return NULL_RTX; + + /* We can load this constant by using VDUP and a constant in a + single ARM register. This will be cheaper than a vector + load. */ + + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + return gen_rtx_VEC_DUPLICATE (mode, x); +} + +/* Generate code to load VALS, which is a PARALLEL containing only + constants (for vec_init) or CONST_VECTOR, efficiently into a + register. Returns an RTX to copy into the register, or NULL_RTX + for a PARALLEL that can not be converted into a CONST_VECTOR. */ + +rtx +neon_make_constant (rtx vals) +{ + enum machine_mode mode = GET_MODE (vals); + rtx target; + rtx const_vec = NULL_RTX; + int n_elts = GET_MODE_NUNITS (mode); + int n_const = 0; + int i; + + if (GET_CODE (vals) == CONST_VECTOR) + const_vec = vals; + else if (GET_CODE (vals) == PARALLEL) + { + /* A CONST_VECTOR must contain only CONST_INTs and + CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). + Only store valid constants in a CONST_VECTOR. */ + for (i = 0; i < n_elts; ++i) + { + rtx x = XVECEXP (vals, 0, i); + if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) + n_const++; + } + if (n_const == n_elts) + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); + } + else + gcc_unreachable (); + + if (const_vec != NULL + && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) + /* Load using VMOV. On Cortex-A8 this takes one cycle. */ + return const_vec; + else if ((target = neon_vdup_constant (vals)) != NULL_RTX) + /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON + pipeline cycle; creating the constant takes one or two ARM + pipeline cycles. */ + return target; + else if (const_vec != NULL_RTX) + /* Load from constant pool. On Cortex-A8 this takes two cycles + (for either double or quad vectors). We can not take advantage + of single-cycle VLD1 because we need a PC-relative addressing + mode. */ + return const_vec; + else + /* A PARALLEL containing something not valid inside CONST_VECTOR. + We can not construct an initializer. */ + return NULL_RTX; +} + +/* Initialize vector TARGET to VALS. */ + +void +neon_expand_vector_init (rtx target, rtx vals) +{ + enum machine_mode mode = GET_MODE (target); + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int n_elts = GET_MODE_NUNITS (mode); + int n_var = 0, one_var = -1; + bool all_same = true; + rtx x, mem; + int i; + + for (i = 0; i < n_elts; ++i) + { + x = XVECEXP (vals, 0, i); + if (!CONSTANT_P (x)) + ++n_var, one_var = i; + + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) + all_same = false; + } + + if (n_var == 0) + { + rtx constant = neon_make_constant (vals); + if (constant != NULL_RTX) + { + emit_move_insn (target, constant); + return; + } + } + + /* Splat a single non-constant element if we can. */ + if (all_same && GET_MODE_SIZE (inner_mode) <= 4) + { + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_VEC_DUPLICATE (mode, x))); + return; + } + + /* One field is non-constant. Load constant then overwrite varying + field. This is more efficient than using the stack. */ + if (n_var == 1) + { + rtx copy = copy_rtx (vals); + rtx index = GEN_INT (one_var); + + /* Load constant part of vector, substitute neighboring value for + varying element. */ + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); + neon_expand_vector_init (target, copy); + + /* Insert variable. */ + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); + switch (mode) + { + case V8QImode: + emit_insn (gen_neon_vset_lanev8qi (target, x, target, index)); + break; + case V16QImode: + emit_insn (gen_neon_vset_lanev16qi (target, x, target, index)); + break; + case V4HImode: + emit_insn (gen_neon_vset_lanev4hi (target, x, target, index)); + break; + case V8HImode: + emit_insn (gen_neon_vset_lanev8hi (target, x, target, index)); + break; + case V2SImode: + emit_insn (gen_neon_vset_lanev2si (target, x, target, index)); + break; + case V4SImode: + emit_insn (gen_neon_vset_lanev4si (target, x, target, index)); + break; + case V2SFmode: + emit_insn (gen_neon_vset_lanev2sf (target, x, target, index)); + break; + case V4SFmode: + emit_insn (gen_neon_vset_lanev4sf (target, x, target, index)); + break; + case V2DImode: + emit_insn (gen_neon_vset_lanev2di (target, x, target, index)); + break; + default: + gcc_unreachable (); + } + return; + } + + /* Construct the vector in memory one field at a time + and load the whole vector. */ + mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); + for (i = 0; i < n_elts; i++) + emit_move_insn (adjust_address_nv (mem, inner_mode, + i * GET_MODE_SIZE (inner_mode)), + XVECEXP (vals, 0, i)); + emit_move_insn (target, mem); +} + +/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise + ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so + reported source locations are bogus. */ + +static void +bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, + const char *err) +{ + HOST_WIDE_INT lane; + + gcc_assert (CONST_INT_P (operand)); + + lane = INTVAL (operand); + + if (lane < low || lane >= high) + error (err); +} + +/* Bounds-check lanes. */ + +void +neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +{ + bounds_check (operand, low, high, "lane out of range"); +} + +/* Bounds-check constants. */ + +void +neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) +{ + bounds_check (operand, low, high, "constant out of range"); +} + +HOST_WIDE_INT +neon_element_bits (enum machine_mode mode) +{ + if (mode == DImode) + return GET_MODE_BITSIZE (mode); + else + return GET_MODE_BITSIZE (GET_MODE_INNER (mode)); +} + + +/* Predicates for `match_operand' and `match_operator'. */ + +/* Return TRUE if OP is a valid coprocessor memory address pattern. + WB is true if full writeback address modes are allowed and is false + if limited writeback address modes (POST_INC and PRE_DEC) are + allowed. */ + +int +arm_coproc_mem_operand (rtx op, bool wb) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed || lra_in_progress) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return FALSE; + + /* Constants are converted into offsets from labels. */ + if (!MEM_P (op)) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) + return TRUE; + + /* Match: (mem (reg)). */ + if (REG_P (ind)) + return arm_address_register_rtx_p (ind, 0); + + /* Autoincremment addressing modes. POST_INC and PRE_DEC are + acceptable in any case (subject to verification by + arm_address_register_rtx_p). We need WB to be true to accept + PRE_INC and POST_DEC. */ + if (GET_CODE (ind) == POST_INC + || GET_CODE (ind) == PRE_DEC + || (wb + && (GET_CODE (ind) == PRE_INC + || GET_CODE (ind) == POST_DEC))) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + + if (wb + && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) + && arm_address_register_rtx_p (XEXP (ind, 0), 0) + && GET_CODE (XEXP (ind, 1)) == PLUS + && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) + ind = XEXP (ind, 1); + + /* Match: + (plus (reg) + (const)). */ + if (GET_CODE (ind) == PLUS + && REG_P (XEXP (ind, 0)) + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) + && CONST_INT_P (XEXP (ind, 1)) + && INTVAL (XEXP (ind, 1)) > -1024 + && INTVAL (XEXP (ind, 1)) < 1024 + && (INTVAL (XEXP (ind, 1)) & 3) == 0) + return TRUE; + + return FALSE; +} + +/* Return TRUE if OP is a memory operand which we can load or store a vector + to/from. TYPE is one of the following values: + 0 - Vector load/stor (vldr) + 1 - Core registers (ldm) + 2 - Element/structure loads (vld1) + */ +int +neon_vector_mem_operand (rtx op, int type, bool strict) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return !strict; + + /* Constants are converted into offsets from labels. */ + if (!MEM_P (op)) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) + return TRUE; + + /* Match: (mem (reg)). */ + if (REG_P (ind)) + return arm_address_register_rtx_p (ind, 0); + + /* Allow post-increment with Neon registers. */ + if ((type != 1 && GET_CODE (ind) == POST_INC) + || (type == 0 && GET_CODE (ind) == PRE_DEC)) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + + /* FIXME: vld1 allows register post-modify. */ + + /* Match: + (plus (reg) + (const)). */ + if (type == 0 + && GET_CODE (ind) == PLUS + && REG_P (XEXP (ind, 0)) + && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) + && CONST_INT_P (XEXP (ind, 1)) + && INTVAL (XEXP (ind, 1)) > -1024 + /* For quad modes, we restrict the constant offset to be slightly less + than what the instruction format permits. We have no such constraint + on double mode offsets. (This must match arm_legitimate_index_p.) */ + && (INTVAL (XEXP (ind, 1)) + < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024)) + && (INTVAL (XEXP (ind, 1)) & 3) == 0) + return TRUE; + + return FALSE; +} + +/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct + type. */ +int +neon_struct_mem_operand (rtx op) +{ + rtx ind; + + /* Reject eliminable registers. */ + if (! (reload_in_progress || reload_completed) + && ( reg_mentioned_p (frame_pointer_rtx, op) + || reg_mentioned_p (arg_pointer_rtx, op) + || reg_mentioned_p (virtual_incoming_args_rtx, op) + || reg_mentioned_p (virtual_outgoing_args_rtx, op) + || reg_mentioned_p (virtual_stack_dynamic_rtx, op) + || reg_mentioned_p (virtual_stack_vars_rtx, op))) + return FALSE; + + /* Constants are converted into offsets from labels. */ + if (!MEM_P (op)) + return FALSE; + + ind = XEXP (op, 0); + + if (reload_completed + && (GET_CODE (ind) == LABEL_REF + || (GET_CODE (ind) == CONST + && GET_CODE (XEXP (ind, 0)) == PLUS + && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) + return TRUE; + + /* Match: (mem (reg)). */ + if (REG_P (ind)) + return arm_address_register_rtx_p (ind, 0); + + /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */ + if (GET_CODE (ind) == POST_INC + || GET_CODE (ind) == PRE_DEC) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + + return FALSE; +} + +/* Return true if X is a register that will be eliminated later on. */ +int +arm_eliminable_register (rtx x) +{ + return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM + || REGNO (x) == ARG_POINTER_REGNUM + || (REGNO (x) >= FIRST_VIRTUAL_REGISTER + && REGNO (x) <= LAST_VIRTUAL_REGISTER)); +} + +/* Return GENERAL_REGS if a scratch register required to reload x to/from + coprocessor registers. Otherwise return NO_REGS. */ + +enum reg_class +coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) +{ + if (mode == HFmode) + { + if (!TARGET_NEON_FP16) + return GENERAL_REGS; + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true)) + return NO_REGS; + return GENERAL_REGS; + } + + /* The neon move patterns handle all legitimate vector and struct + addresses. */ + if (TARGET_NEON + && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR) + && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || VALID_NEON_STRUCT_MODE (mode))) + return NO_REGS; + + if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) + return NO_REGS; + + return GENERAL_REGS; +} + +/* Values which must be returned in the most-significant end of the return + register. */ + +static bool +arm_return_in_msb (const_tree valtype) +{ + return (TARGET_AAPCS_BASED + && BYTES_BIG_ENDIAN + && (AGGREGATE_TYPE_P (valtype) + || TREE_CODE (valtype) == COMPLEX_TYPE + || FIXED_POINT_TYPE_P (valtype))); +} + +/* Return TRUE if X references a SYMBOL_REF. */ +int +symbol_mentioned_p (rtx x) +{ + const char * fmt; + int i; + + if (GET_CODE (x) == SYMBOL_REF) + return 1; + + /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they + are constant offsets, not symbols. */ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return 0; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (symbol_mentioned_p (XVECEXP (x, i, j))) + return 1; + } + else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i))) + return 1; + } + + return 0; +} + +/* Return TRUE if X references a LABEL_REF. */ +int +label_mentioned_p (rtx x) +{ + const char * fmt; + int i; + + if (GET_CODE (x) == LABEL_REF) + return 1; + + /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing + instruction, but they are constant offsets, not symbols. */ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return 0; + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if (label_mentioned_p (XVECEXP (x, i, j))) + return 1; + } + else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i))) + return 1; + } + + return 0; +} + +int +tls_mentioned_p (rtx x) +{ + switch (GET_CODE (x)) + { + case CONST: + return tls_mentioned_p (XEXP (x, 0)); + + case UNSPEC: + if (XINT (x, 1) == UNSPEC_TLS) + return 1; + + default: + return 0; + } +} + +/* Must not copy any rtx that uses a pc-relative address. */ + +static int +arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED) +{ + if (GET_CODE (*x) == UNSPEC + && (XINT (*x, 1) == UNSPEC_PIC_BASE + || XINT (*x, 1) == UNSPEC_PIC_UNIFIED)) + return 1; + return 0; +} + +static bool +arm_cannot_copy_insn_p (rtx insn) +{ + /* The tls call insn cannot be copied, as it is paired with a data + word. */ + if (recog_memoized (insn) == CODE_FOR_tlscall) + return true; + + return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL); +} + +enum rtx_code +minmax_code (rtx x) +{ + enum rtx_code code = GET_CODE (x); + + switch (code) + { + case SMAX: + return GE; + case SMIN: + return LE; + case UMIN: + return LEU; + case UMAX: + return GEU; + default: + gcc_unreachable (); + } +} + +/* Match pair of min/max operators that can be implemented via usat/ssat. */ + +bool +arm_sat_operator_match (rtx lo_bound, rtx hi_bound, + int *mask, bool *signed_sat) +{ + /* The high bound must be a power of two minus one. */ + int log = exact_log2 (INTVAL (hi_bound) + 1); + if (log == -1) + return false; + + /* The low bound is either zero (for usat) or one less than the + negation of the high bound (for ssat). */ + if (INTVAL (lo_bound) == 0) + { + if (mask) + *mask = log; + if (signed_sat) + *signed_sat = false; + + return true; + } + + if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1) + { + if (mask) + *mask = log + 1; + if (signed_sat) + *signed_sat = true; + + return true; + } + + return false; +} + +/* Return 1 if memory locations are adjacent. */ +int +adjacent_mem_locations (rtx a, rtx b) +{ + /* We don't guarantee to preserve the order of these memory refs. */ + if (volatile_refs_p (a) || volatile_refs_p (b)) + return 0; + + if ((REG_P (XEXP (a, 0)) + || (GET_CODE (XEXP (a, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (a, 0), 1)))) + && (REG_P (XEXP (b, 0)) + || (GET_CODE (XEXP (b, 0)) == PLUS + && CONST_INT_P (XEXP (XEXP (b, 0), 1))))) + { + HOST_WIDE_INT val0 = 0, val1 = 0; + rtx reg0, reg1; + int val_diff; + + if (GET_CODE (XEXP (a, 0)) == PLUS) + { + reg0 = XEXP (XEXP (a, 0), 0); + val0 = INTVAL (XEXP (XEXP (a, 0), 1)); + } + else + reg0 = XEXP (a, 0); + + if (GET_CODE (XEXP (b, 0)) == PLUS) + { + reg1 = XEXP (XEXP (b, 0), 0); + val1 = INTVAL (XEXP (XEXP (b, 0), 1)); + } + else + reg1 = XEXP (b, 0); + + /* Don't accept any offset that will require multiple + instructions to handle, since this would cause the + arith_adjacentmem pattern to output an overlong sequence. */ + if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS)) + return 0; + + /* Don't allow an eliminable register: register elimination can make + the offset too large. */ + if (arm_eliminable_register (reg0)) + return 0; + + val_diff = val1 - val0; + + if (arm_ld_sched) + { + /* If the target has load delay slots, then there's no benefit + to using an ldm instruction unless the offset is zero and + we are optimizing for size. */ + return (optimize_size && (REGNO (reg0) == REGNO (reg1)) + && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4) + && (val_diff == 4 || val_diff == -4)); + } + + return ((REGNO (reg0) == REGNO (reg1)) + && (val_diff == 4 || val_diff == -4)); + } + + return 0; +} + +/* Return true if OP is a valid load or store multiple operation. LOAD is true + for load operations, false for store operations. CONSECUTIVE is true + if the register numbers in the operation must be consecutive in the register + bank. RETURN_PC is true if value is to be loaded in PC. + The pattern we are trying to match for load is: + [(SET (R_d0) (MEM (PLUS (addr) (offset)))) + (SET (R_d1) (MEM (PLUS (addr) (offset + )))) + : + : + (SET (R_dn) (MEM (PLUS (addr) (offset + n * )))) + ] + where + 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))). + 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn). + 3. If consecutive is TRUE, then for kth register being loaded, + REGNO (R_dk) = REGNO (R_d0) + k. + The pattern for store is similar. */ +bool +ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode, + bool consecutive, bool return_pc) +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + rtx reg, mem, addr; + unsigned regno; + unsigned first_regno; + HOST_WIDE_INT i = 1, base = 0, offset = 0; + rtx elt; + bool addr_reg_in_reglist = false; + bool update = false; + int reg_increment; + int offset_adj; + int regs_per_val; + + /* If not in SImode, then registers must be consecutive + (e.g., VLDM instructions for DFmode). */ + gcc_assert ((mode == SImode) || consecutive); + /* Setting return_pc for stores is illegal. */ + gcc_assert (!return_pc || load); + + /* Set up the increments and the regs per val based on the mode. */ + reg_increment = GET_MODE_SIZE (mode); + regs_per_val = reg_increment / 4; + offset_adj = return_pc ? 1 : 0; + + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET + || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))) + return false; + + /* Check if this is a write-back. */ + elt = XVECEXP (op, 0, offset_adj); + if (GET_CODE (SET_SRC (elt)) == PLUS) + { + i++; + base = 1; + update = true; + + /* The offset adjustment must be the number of registers being + popped times the size of a single register. */ + if (!REG_P (SET_DEST (elt)) + || !REG_P (XEXP (SET_SRC (elt), 0)) + || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0))) + || !CONST_INT_P (XEXP (SET_SRC (elt), 1)) + || INTVAL (XEXP (SET_SRC (elt), 1)) != + ((count - 1 - offset_adj) * reg_increment)) + return false; + } + + i = i + offset_adj; + base = base + offset_adj; + /* Perform a quick check so we don't blow up below. If only one reg is loaded, + success depends on the type: VLDM can do just one reg, + LDM must do at least two. */ + if ((count <= i) && (mode == SImode)) + return false; + + elt = XVECEXP (op, 0, i - 1); + if (GET_CODE (elt) != SET) + return false; + + if (load) + { + reg = SET_DEST (elt); + mem = SET_SRC (elt); + } + else + { + reg = SET_SRC (elt); + mem = SET_DEST (elt); + } + + if (!REG_P (reg) || !MEM_P (mem)) + return false; + + regno = REGNO (reg); + first_regno = regno; + addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS) + { + if (!CONST_INT_P (XEXP (addr, 1))) + return false; + + offset = INTVAL (XEXP (addr, 1)); + addr = XEXP (addr, 0); + } + + if (!REG_P (addr)) + return false; + + /* Don't allow SP to be loaded unless it is also the base register. It + guarantees that SP is reset correctly when an LDM instruction + is interrupted. Otherwise, we might end up with a corrupt stack. */ + if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM)) + return false; + + for (; i < count; i++) + { + elt = XVECEXP (op, 0, i); + if (GET_CODE (elt) != SET) + return false; + + if (load) + { + reg = SET_DEST (elt); + mem = SET_SRC (elt); + } + else + { + reg = SET_SRC (elt); + mem = SET_DEST (elt); + } + + if (!REG_P (reg) + || GET_MODE (reg) != mode + || REGNO (reg) <= regno + || (consecutive + && (REGNO (reg) != + (unsigned int) (first_regno + regs_per_val * (i - base)))) + /* Don't allow SP to be loaded unless it is also the base register. It + guarantees that SP is reset correctly when an LDM instruction + is interrupted. Otherwise, we might end up with a corrupt stack. */ + || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM)) + || !MEM_P (mem) + || GET_MODE (mem) != mode + || ((GET_CODE (XEXP (mem, 0)) != PLUS + || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr) + || !CONST_INT_P (XEXP (XEXP (mem, 0), 1)) + || (INTVAL (XEXP (XEXP (mem, 0), 1)) != + offset + (i - base) * reg_increment)) + && (!REG_P (XEXP (mem, 0)) + || offset + (i - base) * reg_increment != 0))) + return false; + + regno = REGNO (reg); + if (regno == REGNO (addr)) + addr_reg_in_reglist = true; + } + + if (load) + { + if (update && addr_reg_in_reglist) + return false; + + /* For Thumb-1, address register is always modified - either by write-back + or by explicit load. If the pattern does not describe an update, + then the address register must be in the list of loaded registers. */ + if (TARGET_THUMB1) + return update || addr_reg_in_reglist; + } + + return true; +} + +/* Return true iff it would be profitable to turn a sequence of NOPS loads + or stores (depending on IS_STORE) into a load-multiple or store-multiple + instruction. ADD_OFFSET is nonzero if the base address register needs + to be modified with an add instruction before we can use it. */ + +static bool +multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED, + int nops, HOST_WIDE_INT add_offset) + { + /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm + if the offset isn't small enough. The reason 2 ldrs are faster + is because these ARMs are able to do more than one cache access + in a single cycle. The ARM9 and StrongARM have Harvard caches, + whilst the ARM8 has a double bandwidth cache. This means that + these cores can do both an instruction fetch and a data fetch in + a single cycle, so the trick of calculating the address into a + scratch register (one of the result regs) and then doing a load + multiple actually becomes slower (and no smaller in code size). + That is the transformation + + ldr rd1, [rbase + offset] + ldr rd2, [rbase + offset + 4] + + to + + add rd1, rbase, offset + ldmia rd1, {rd1, rd2} + + produces worse code -- '3 cycles + any stalls on rd2' instead of + '2 cycles + any stalls on rd2'. On ARMs with only one cache + access per cycle, the first sequence could never complete in less + than 6 cycles, whereas the ldm sequence would only take 5 and + would make better use of sequential accesses if not hitting the + cache. + + We cheat here and test 'arm_ld_sched' which we currently know to + only be true for the ARM8, ARM9 and StrongARM. If this ever + changes, then the test below needs to be reworked. */ + if (nops == 2 && arm_ld_sched && add_offset != 0) + return false; + + /* XScale has load-store double instructions, but they have stricter + alignment requirements than load-store multiple, so we cannot + use them. + + For XScale ldm requires 2 + NREGS cycles to complete and blocks + the pipeline until completion. + + NREGS CYCLES + 1 3 + 2 4 + 3 5 + 4 6 + + An ldr instruction takes 1-3 cycles, but does not block the + pipeline. + + NREGS CYCLES + 1 1-3 + 2 2-6 + 3 3-9 + 4 4-12 + + Best case ldr will always win. However, the more ldr instructions + we issue, the less likely we are to be able to schedule them well. + Using ldr instructions also increases code size. + + As a compromise, we use ldr for counts of 1 or 2 regs, and ldm + for counts of 3 or 4 regs. */ + if (nops <= 2 && arm_tune_xscale && !optimize_size) + return false; + return true; +} + +/* Subroutine of load_multiple_sequence and store_multiple_sequence. + Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute + an array ORDER which describes the sequence to use when accessing the + offsets that produces an ascending order. In this sequence, each + offset must be larger by exactly 4 than the previous one. ORDER[0] + must have been filled in with the lowest offset by the caller. + If UNSORTED_REGS is nonnull, it is an array of register numbers that + we use to verify that ORDER produces an ascending order of registers. + Return true if it was possible to construct such an order, false if + not. */ + +static bool +compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order, + int *unsorted_regs) +{ + int i; + for (i = 1; i < nops; i++) + { + int j; + + order[i] = order[i - 1]; + for (j = 0; j < nops; j++) + if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4) + { + /* We must find exactly one offset that is higher than the + previous one by 4. */ + if (order[i] != order[i - 1]) + return false; + order[i] = j; + } + if (order[i] == order[i - 1]) + return false; + /* The register numbers must be ascending. */ + if (unsorted_regs != NULL + && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]]) + return false; + } + return true; +} + +/* Used to determine in a peephole whether a sequence of load + instructions can be changed into a load-multiple instruction. + NOPS is the number of separate load instructions we are examining. The + first NOPS entries in OPERANDS are the destination registers, the + next NOPS entries are memory operands. If this function is + successful, *BASE is set to the common base register of the memory + accesses; *LOAD_OFFSET is set to the first memory location's offset + from that base register. + REGS is an array filled in with the destination register numbers. + SAVED_ORDER (if nonnull), is an array filled in with an order that maps + insn numbers to an ascending order of stores. If CHECK_REGS is true, + the sequence of registers in REGS matches the loads from ascending memory + locations, and the function verifies that the register numbers are + themselves ascending. If CHECK_REGS is false, the register numbers + are stored in the order they are found in the operands. */ +static int +load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order, + int *base, HOST_WIDE_INT *load_offset, bool check_regs) +{ + int unsorted_regs[MAX_LDM_STM_OPS]; + HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; + int order[MAX_LDM_STM_OPS]; + rtx base_reg_rtx = NULL; + int base_reg = -1; + int i, ldm_case; + + /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be + easily extended if required. */ + gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); + + memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); + + /* Loop over the operands and check that the memory references are + suitable (i.e. immediate offsets from the same base register). At + the same time, extract the target register, and the memory + offsets. */ + for (i = 0; i < nops; i++) + { + rtx reg; + rtx offset; + + /* Convert a subreg of a mem into the mem itself. */ + if (GET_CODE (operands[nops + i]) == SUBREG) + operands[nops + i] = alter_subreg (operands + (nops + i), true); + + gcc_assert (MEM_P (operands[nops + i])); + + /* Don't reorder volatile memory references; it doesn't seem worth + looking for the case where the order is ok anyway. */ + if (MEM_VOLATILE_P (operands[nops + i])) + return 0; + + offset = const0_rtx; + + if ((REG_P (reg = XEXP (operands[nops + i], 0)) + || (GET_CODE (reg) == SUBREG + && REG_P (reg = SUBREG_REG (reg)))) + || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS + && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0))) + || (GET_CODE (reg) == SUBREG + && REG_P (reg = SUBREG_REG (reg)))) + && (CONST_INT_P (offset + = XEXP (XEXP (operands[nops + i], 0), 1))))) + { + if (i == 0) + { + base_reg = REGNO (reg); + base_reg_rtx = reg; + if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) + return 0; + } + else if (base_reg != (int) REGNO (reg)) + /* Not addressed from the same base register. */ + return 0; + + unsorted_regs[i] = (REG_P (operands[i]) + ? REGNO (operands[i]) + : REGNO (SUBREG_REG (operands[i]))); + + /* If it isn't an integer register, or if it overwrites the + base register but isn't the last insn in the list, then + we can't do this. */ + if (unsorted_regs[i] < 0 + || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) + || unsorted_regs[i] > 14 + || (i != nops - 1 && unsorted_regs[i] == base_reg)) + return 0; + + /* Don't allow SP to be loaded unless it is also the base + register. It guarantees that SP is reset correctly when + an LDM instruction is interrupted. Otherwise, we might + end up with a corrupt stack. */ + if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM) + return 0; + + unsorted_offsets[i] = INTVAL (offset); + if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) + order[0] = i; + } + else + /* Not a suitable memory address. */ + return 0; + } + + /* All the useful information has now been extracted from the + operands into unsorted_regs and unsorted_offsets; additionally, + order[0] has been set to the lowest offset in the list. Sort + the offsets into order, verifying that they are adjacent, and + check that the register numbers are ascending. */ + if (!compute_offset_order (nops, unsorted_offsets, order, + check_regs ? unsorted_regs : NULL)) + return 0; + + if (saved_order) + memcpy (saved_order, order, sizeof order); + + if (base) + { + *base = base_reg; + + for (i = 0; i < nops; i++) + regs[i] = unsorted_regs[check_regs ? order[i] : i]; + + *load_offset = unsorted_offsets[order[0]]; + } + + if (TARGET_THUMB1 + && !peep2_reg_dead_p (nops, base_reg_rtx)) + return 0; + + if (unsorted_offsets[order[0]] == 0) + ldm_case = 1; /* ldmia */ + else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) + ldm_case = 2; /* ldmib */ + else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) + ldm_case = 3; /* ldmda */ + else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) + ldm_case = 4; /* ldmdb */ + else if (const_ok_for_arm (unsorted_offsets[order[0]]) + || const_ok_for_arm (-unsorted_offsets[order[0]])) + ldm_case = 5; + else + return 0; + + if (!multiple_operation_profitable_p (false, nops, + ldm_case == 5 + ? unsorted_offsets[order[0]] : 0)) + return 0; + + return ldm_case; +} + +/* Used to determine in a peephole whether a sequence of store instructions can + be changed into a store-multiple instruction. + NOPS is the number of separate store instructions we are examining. + NOPS_TOTAL is the total number of instructions recognized by the peephole + pattern. + The first NOPS entries in OPERANDS are the source registers, the next + NOPS entries are memory operands. If this function is successful, *BASE is + set to the common base register of the memory accesses; *LOAD_OFFSET is set + to the first memory location's offset from that base register. REGS is an + array filled in with the source register numbers, REG_RTXS (if nonnull) is + likewise filled with the corresponding rtx's. + SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn + numbers to an ascending order of stores. + If CHECK_REGS is true, the sequence of registers in *REGS matches the stores + from ascending memory locations, and the function verifies that the register + numbers are themselves ascending. If CHECK_REGS is false, the register + numbers are stored in the order they are found in the operands. */ +static int +store_multiple_sequence (rtx *operands, int nops, int nops_total, + int *regs, rtx *reg_rtxs, int *saved_order, int *base, + HOST_WIDE_INT *load_offset, bool check_regs) +{ + int unsorted_regs[MAX_LDM_STM_OPS]; + rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS]; + HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; + int order[MAX_LDM_STM_OPS]; + int base_reg = -1; + rtx base_reg_rtx = NULL; + int i, stm_case; + + /* Write back of base register is currently only supported for Thumb 1. */ + int base_writeback = TARGET_THUMB1; + + /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be + easily extended if required. */ + gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); + + memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); + + /* Loop over the operands and check that the memory references are + suitable (i.e. immediate offsets from the same base register). At + the same time, extract the target register, and the memory + offsets. */ + for (i = 0; i < nops; i++) + { + rtx reg; + rtx offset; + + /* Convert a subreg of a mem into the mem itself. */ + if (GET_CODE (operands[nops + i]) == SUBREG) + operands[nops + i] = alter_subreg (operands + (nops + i), true); + + gcc_assert (MEM_P (operands[nops + i])); + + /* Don't reorder volatile memory references; it doesn't seem worth + looking for the case where the order is ok anyway. */ + if (MEM_VOLATILE_P (operands[nops + i])) + return 0; + + offset = const0_rtx; + + if ((REG_P (reg = XEXP (operands[nops + i], 0)) + || (GET_CODE (reg) == SUBREG + && REG_P (reg = SUBREG_REG (reg)))) + || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS + && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0))) + || (GET_CODE (reg) == SUBREG + && REG_P (reg = SUBREG_REG (reg)))) + && (CONST_INT_P (offset + = XEXP (XEXP (operands[nops + i], 0), 1))))) + { + unsorted_reg_rtxs[i] = (REG_P (operands[i]) + ? operands[i] : SUBREG_REG (operands[i])); + unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]); + + if (i == 0) + { + base_reg = REGNO (reg); + base_reg_rtx = reg; + if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) + return 0; + } + else if (base_reg != (int) REGNO (reg)) + /* Not addressed from the same base register. */ + return 0; + + /* If it isn't an integer register, then we can't do this. */ + if (unsorted_regs[i] < 0 + || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) + /* The effects are unpredictable if the base register is + both updated and stored. */ + || (base_writeback && unsorted_regs[i] == base_reg) + || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM) + || unsorted_regs[i] > 14) + return 0; + + unsorted_offsets[i] = INTVAL (offset); + if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) + order[0] = i; + } + else + /* Not a suitable memory address. */ + return 0; + } + + /* All the useful information has now been extracted from the + operands into unsorted_regs and unsorted_offsets; additionally, + order[0] has been set to the lowest offset in the list. Sort + the offsets into order, verifying that they are adjacent, and + check that the register numbers are ascending. */ + if (!compute_offset_order (nops, unsorted_offsets, order, + check_regs ? unsorted_regs : NULL)) + return 0; + + if (saved_order) + memcpy (saved_order, order, sizeof order); + + if (base) + { + *base = base_reg; + + for (i = 0; i < nops; i++) + { + regs[i] = unsorted_regs[check_regs ? order[i] : i]; + if (reg_rtxs) + reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i]; + } + + *load_offset = unsorted_offsets[order[0]]; + } + + if (TARGET_THUMB1 + && !peep2_reg_dead_p (nops_total, base_reg_rtx)) + return 0; + + if (unsorted_offsets[order[0]] == 0) + stm_case = 1; /* stmia */ + else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) + stm_case = 2; /* stmib */ + else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) + stm_case = 3; /* stmda */ + else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) + stm_case = 4; /* stmdb */ + else + return 0; + + if (!multiple_operation_profitable_p (false, nops, 0)) + return 0; + + return stm_case; +} + +/* Routines for use in generating RTL. */ + +/* Generate a load-multiple instruction. COUNT is the number of loads in + the instruction; REGS and MEMS are arrays containing the operands. + BASEREG is the base register to be used in addressing the memory operands. + WBACK_OFFSET is nonzero if the instruction should update the base + register. */ + +static rtx +arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, + HOST_WIDE_INT wback_offset) +{ + int i = 0, j; + rtx result; + + if (!multiple_operation_profitable_p (false, count, 0)) + { + rtx seq; + + start_sequence (); + + for (i = 0; i < count; i++) + emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]); + + if (wback_offset != 0) + emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset)); + + seq = get_insns (); + end_sequence (); + + return seq; + } + + result = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); + if (wback_offset != 0) + { + XVECEXP (result, 0, 0) + = gen_rtx_SET (VOIDmode, basereg, + plus_constant (Pmode, basereg, wback_offset)); + i = 1; + count++; + } + + for (j = 0; i < count; i++, j++) + XVECEXP (result, 0, i) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]); + + return result; +} + +/* Generate a store-multiple instruction. COUNT is the number of stores in + the instruction; REGS and MEMS are arrays containing the operands. + BASEREG is the base register to be used in addressing the memory operands. + WBACK_OFFSET is nonzero if the instruction should update the base + register. */ + +static rtx +arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, + HOST_WIDE_INT wback_offset) +{ + int i = 0, j; + rtx result; + + if (GET_CODE (basereg) == PLUS) + basereg = XEXP (basereg, 0); + + if (!multiple_operation_profitable_p (false, count, 0)) + { + rtx seq; + + start_sequence (); + + for (i = 0; i < count; i++) + emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i])); + + if (wback_offset != 0) + emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset)); + + seq = get_insns (); + end_sequence (); + + return seq; + } + + result = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); + if (wback_offset != 0) + { + XVECEXP (result, 0, 0) + = gen_rtx_SET (VOIDmode, basereg, + plus_constant (Pmode, basereg, wback_offset)); + i = 1; + count++; + } + + for (j = 0; i < count; i++, j++) + XVECEXP (result, 0, i) + = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j])); + + return result; +} + +/* Generate either a load-multiple or a store-multiple instruction. This + function can be used in situations where we can start with a single MEM + rtx and adjust its address upwards. + COUNT is the number of operations in the instruction, not counting a + possible update of the base register. REGS is an array containing the + register operands. + BASEREG is the base register to be used in addressing the memory operands, + which are constructed from BASEMEM. + WRITE_BACK specifies whether the generated instruction should include an + update of the base register. + OFFSETP is used to pass an offset to and from this function; this offset + is not used when constructing the address (instead BASEMEM should have an + appropriate offset in its address), it is used only for setting + MEM_OFFSET. It is updated only if WRITE_BACK is true.*/ + +static rtx +arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg, + bool write_back, rtx basemem, HOST_WIDE_INT *offsetp) +{ + rtx mems[MAX_LDM_STM_OPS]; + HOST_WIDE_INT offset = *offsetp; + int i; + + gcc_assert (count <= MAX_LDM_STM_OPS); + + if (GET_CODE (basereg) == PLUS) + basereg = XEXP (basereg, 0); + + for (i = 0; i < count; i++) + { + rtx addr = plus_constant (Pmode, basereg, i * 4); + mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset); + offset += 4; + } + + if (write_back) + *offsetp = offset; + + if (is_load) + return arm_gen_load_multiple_1 (count, regs, mems, basereg, + write_back ? 4 * count : 0); + else + return arm_gen_store_multiple_1 (count, regs, mems, basereg, + write_back ? 4 * count : 0); +} + +rtx +arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back, + rtx basemem, HOST_WIDE_INT *offsetp) +{ + return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem, + offsetp); +} + +rtx +arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back, + rtx basemem, HOST_WIDE_INT *offsetp) +{ + return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem, + offsetp); +} + +/* Called from a peephole2 expander to turn a sequence of loads into an + LDM instruction. OPERANDS are the operands found by the peephole matcher; + NOPS indicates how many separate loads we are trying to combine. SORT_REGS + is true if we can reorder the registers because they are used commutatively + subsequently. + Returns true iff we could generate a new instruction. */ + +bool +gen_ldm_seq (rtx *operands, int nops, bool sort_regs) +{ + int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; + rtx mems[MAX_LDM_STM_OPS]; + int i, j, base_reg; + rtx base_reg_rtx; + HOST_WIDE_INT offset; + int write_back = FALSE; + int ldm_case; + rtx addr; + + ldm_case = load_multiple_sequence (operands, nops, regs, mem_order, + &base_reg, &offset, !sort_regs); + + if (ldm_case == 0) + return false; + + if (sort_regs) + for (i = 0; i < nops - 1; i++) + for (j = i + 1; j < nops; j++) + if (regs[i] > regs[j]) + { + int t = regs[i]; + regs[i] = regs[j]; + regs[j] = t; + } + base_reg_rtx = gen_rtx_REG (Pmode, base_reg); + + if (TARGET_THUMB1) + { + gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx)); + gcc_assert (ldm_case == 1 || ldm_case == 5); + write_back = TRUE; + } + + if (ldm_case == 5) + { + rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]); + emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset))); + offset = 0; + if (!TARGET_THUMB1) + { + base_reg = regs[0]; + base_reg_rtx = newbase; + } + } + + for (i = 0; i < nops; i++) + { + addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4); + mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], + SImode, addr, 0); + } + emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx, + write_back ? offset + i * 4 : 0)); + return true; +} + +/* Called from a peephole2 expander to turn a sequence of stores into an + STM instruction. OPERANDS are the operands found by the peephole matcher; + NOPS indicates how many separate stores we are trying to combine. + Returns true iff we could generate a new instruction. */ + +bool +gen_stm_seq (rtx *operands, int nops) +{ + int i; + int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; + rtx mems[MAX_LDM_STM_OPS]; + int base_reg; + rtx base_reg_rtx; + HOST_WIDE_INT offset; + int write_back = FALSE; + int stm_case; + rtx addr; + bool base_reg_dies; + + stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL, + mem_order, &base_reg, &offset, true); + + if (stm_case == 0) + return false; + + base_reg_rtx = gen_rtx_REG (Pmode, base_reg); + + base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx); + if (TARGET_THUMB1) + { + gcc_assert (base_reg_dies); + write_back = TRUE; + } + + if (stm_case == 5) + { + gcc_assert (base_reg_dies); + emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); + offset = 0; + } + + addr = plus_constant (Pmode, base_reg_rtx, offset); + + for (i = 0; i < nops; i++) + { + addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4); + mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], + SImode, addr, 0); + } + emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx, + write_back ? offset + i * 4 : 0)); + return true; +} + +/* Called from a peephole2 expander to turn a sequence of stores that are + preceded by constant loads into an STM instruction. OPERANDS are the + operands found by the peephole matcher; NOPS indicates how many + separate stores we are trying to combine; there are 2 * NOPS + instructions in the peephole. + Returns true iff we could generate a new instruction. */ + +bool +gen_const_stm_seq (rtx *operands, int nops) +{ + int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS]; + int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; + rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS]; + rtx mems[MAX_LDM_STM_OPS]; + int base_reg; + rtx base_reg_rtx; + HOST_WIDE_INT offset; + int write_back = FALSE; + int stm_case; + rtx addr; + bool base_reg_dies; + int i, j; + HARD_REG_SET allocated; + + stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs, + mem_order, &base_reg, &offset, false); + + if (stm_case == 0) + return false; + + memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs); + + /* If the same register is used more than once, try to find a free + register. */ + CLEAR_HARD_REG_SET (allocated); + for (i = 0; i < nops; i++) + { + for (j = i + 1; j < nops; j++) + if (regs[i] == regs[j]) + { + rtx t = peep2_find_free_register (0, nops * 2, + TARGET_THUMB1 ? "l" : "r", + SImode, &allocated); + if (t == NULL_RTX) + return false; + reg_rtxs[i] = t; + regs[i] = REGNO (t); + } + } + + /* Compute an ordering that maps the register numbers to an ascending + sequence. */ + reg_order[0] = 0; + for (i = 0; i < nops; i++) + if (regs[i] < regs[reg_order[0]]) + reg_order[0] = i; + + for (i = 1; i < nops; i++) + { + int this_order = reg_order[i - 1]; + for (j = 0; j < nops; j++) + if (regs[j] > regs[reg_order[i - 1]] + && (this_order == reg_order[i - 1] + || regs[j] < regs[this_order])) + this_order = j; + reg_order[i] = this_order; + } + + /* Ensure that registers that must be live after the instruction end + up with the correct value. */ + for (i = 0; i < nops; i++) + { + int this_order = reg_order[i]; + if ((this_order != mem_order[i] + || orig_reg_rtxs[this_order] != reg_rtxs[this_order]) + && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order])) + return false; + } + + /* Load the constants. */ + for (i = 0; i < nops; i++) + { + rtx op = operands[2 * nops + mem_order[i]]; + sorted_regs[i] = regs[reg_order[i]]; + emit_move_insn (reg_rtxs[reg_order[i]], op); + } + + base_reg_rtx = gen_rtx_REG (Pmode, base_reg); + + base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx); + if (TARGET_THUMB1) + { + gcc_assert (base_reg_dies); + write_back = TRUE; + } + + if (stm_case == 5) + { + gcc_assert (base_reg_dies); + emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); + offset = 0; + } + + addr = plus_constant (Pmode, base_reg_rtx, offset); + + for (i = 0; i < nops; i++) + { + addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4); + mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], + SImode, addr, 0); + } + emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx, + write_back ? offset + i * 4 : 0)); + return true; +} + +/* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit + unaligned copies on processors which support unaligned semantics for those + instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency + (using more registers) by doing e.g. load/load/store/store for a factor of 2. + An interleave factor of 1 (the minimum) will perform no interleaving. + Load/store multiple are used for aligned addresses where possible. */ + +static void +arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, + HOST_WIDE_INT length, + unsigned int interleave_factor) +{ + rtx *regs = XALLOCAVEC (rtx, interleave_factor); + int *regnos = XALLOCAVEC (int, interleave_factor); + HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD; + HOST_WIDE_INT i, j; + HOST_WIDE_INT remaining = length, words; + rtx halfword_tmp = NULL, byte_tmp = NULL; + rtx dst, src; + bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD; + bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD; + HOST_WIDE_INT srcoffset, dstoffset; + HOST_WIDE_INT src_autoinc, dst_autoinc; + rtx mem, addr; + + gcc_assert (1 <= interleave_factor && interleave_factor <= 4); + + /* Use hard registers if we have aligned source or destination so we can use + load/store multiple with contiguous registers. */ + if (dst_aligned || src_aligned) + for (i = 0; i < interleave_factor; i++) + regs[i] = gen_rtx_REG (SImode, i); + else + for (i = 0; i < interleave_factor; i++) + regs[i] = gen_reg_rtx (SImode); + + dst = copy_addr_to_reg (XEXP (dstbase, 0)); + src = copy_addr_to_reg (XEXP (srcbase, 0)); + + srcoffset = dstoffset = 0; + + /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. + For copying the last bytes we want to subtract this offset again. */ + src_autoinc = dst_autoinc = 0; + + for (i = 0; i < interleave_factor; i++) + regnos[i] = i; + + /* Copy BLOCK_SIZE_BYTES chunks. */ + + for (i = 0; i + block_size_bytes <= length; i += block_size_bytes) + { + /* Load words. */ + if (src_aligned && interleave_factor > 1) + { + emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src, + TRUE, srcbase, &srcoffset)); + src_autoinc += UNITS_PER_WORD * interleave_factor; + } + else + { + for (j = 0; j < interleave_factor; j++) + { + addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD + - src_autoinc)); + mem = adjust_automodify_address (srcbase, SImode, addr, + srcoffset + j * UNITS_PER_WORD); + emit_insn (gen_unaligned_loadsi (regs[j], mem)); + } + srcoffset += block_size_bytes; + } + + /* Store words. */ + if (dst_aligned && interleave_factor > 1) + { + emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst, + TRUE, dstbase, &dstoffset)); + dst_autoinc += UNITS_PER_WORD * interleave_factor; + } + else + { + for (j = 0; j < interleave_factor; j++) + { + addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD + - dst_autoinc)); + mem = adjust_automodify_address (dstbase, SImode, addr, + dstoffset + j * UNITS_PER_WORD); + emit_insn (gen_unaligned_storesi (mem, regs[j])); + } + dstoffset += block_size_bytes; + } + + remaining -= block_size_bytes; + } + + /* Copy any whole words left (note these aren't interleaved with any + subsequent halfword/byte load/stores in the interests of simplicity). */ + + words = remaining / UNITS_PER_WORD; + + gcc_assert (words < interleave_factor); + + if (src_aligned && words > 1) + { + emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, + &srcoffset)); + src_autoinc += UNITS_PER_WORD * words; + } + else + { + for (j = 0; j < words; j++) + { + addr = plus_constant (Pmode, src, + srcoffset + j * UNITS_PER_WORD - src_autoinc); + mem = adjust_automodify_address (srcbase, SImode, addr, + srcoffset + j * UNITS_PER_WORD); + emit_insn (gen_unaligned_loadsi (regs[j], mem)); + } + srcoffset += words * UNITS_PER_WORD; + } + + if (dst_aligned && words > 1) + { + emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase, + &dstoffset)); + dst_autoinc += words * UNITS_PER_WORD; + } + else + { + for (j = 0; j < words; j++) + { + addr = plus_constant (Pmode, dst, + dstoffset + j * UNITS_PER_WORD - dst_autoinc); + mem = adjust_automodify_address (dstbase, SImode, addr, + dstoffset + j * UNITS_PER_WORD); + emit_insn (gen_unaligned_storesi (mem, regs[j])); + } + dstoffset += words * UNITS_PER_WORD; + } + + remaining -= words * UNITS_PER_WORD; + + gcc_assert (remaining < 4); + + /* Copy a halfword if necessary. */ + + if (remaining >= 2) + { + halfword_tmp = gen_reg_rtx (SImode); + + addr = plus_constant (Pmode, src, srcoffset - src_autoinc); + mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset); + emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem)); + + /* Either write out immediately, or delay until we've loaded the last + byte, depending on interleave factor. */ + if (interleave_factor == 1) + { + addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); + mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); + emit_insn (gen_unaligned_storehi (mem, + gen_lowpart (HImode, halfword_tmp))); + halfword_tmp = NULL; + dstoffset += 2; + } + + remaining -= 2; + srcoffset += 2; + } + + gcc_assert (remaining < 2); + + /* Copy last byte. */ + + if ((remaining & 1) != 0) + { + byte_tmp = gen_reg_rtx (SImode); + + addr = plus_constant (Pmode, src, srcoffset - src_autoinc); + mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset); + emit_move_insn (gen_lowpart (QImode, byte_tmp), mem); + + if (interleave_factor == 1) + { + addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); + mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); + emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); + byte_tmp = NULL; + dstoffset++; + } + + remaining--; + srcoffset++; + } + + /* Store last halfword if we haven't done so already. */ + + if (halfword_tmp) + { + addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); + mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); + emit_insn (gen_unaligned_storehi (mem, + gen_lowpart (HImode, halfword_tmp))); + dstoffset += 2; + } + + /* Likewise for last byte. */ + + if (byte_tmp) + { + addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); + mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); + emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); + dstoffset++; + } + + gcc_assert (remaining == 0 && srcoffset == dstoffset); +} + +/* From mips_adjust_block_mem: + + Helper function for doing a loop-based block operation on memory + reference MEM. Each iteration of the loop will operate on LENGTH + bytes of MEM. + + Create a new base register for use within the loop and point it to + the start of MEM. Create a new memory reference that uses this + register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ + +static void +arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, + rtx *loop_mem) +{ + *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); + + /* Although the new mem does not refer to a known location, + it does keep up to LENGTH bytes of alignment. */ + *loop_mem = change_address (mem, BLKmode, *loop_reg); + set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); +} + +/* From mips_block_move_loop: + + Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER + bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that + the memory regions do not overlap. */ + +static void +arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, + unsigned int interleave_factor, + HOST_WIDE_INT bytes_per_iter) +{ + rtx label, src_reg, dest_reg, final_src, test; + HOST_WIDE_INT leftover; + + leftover = length % bytes_per_iter; + length -= leftover; + + /* Create registers and memory references for use within the loop. */ + arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); + arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); + + /* Calculate the value that SRC_REG should have after the last iteration of + the loop. */ + final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), + 0, 0, OPTAB_WIDEN); + + /* Emit the start of the loop. */ + label = gen_label_rtx (); + emit_label (label); + + /* Emit the loop body. */ + arm_block_move_unaligned_straight (dest, src, bytes_per_iter, + interleave_factor); + + /* Move on to the next block. */ + emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); + emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); + + /* Emit the loop condition. */ + test = gen_rtx_NE (VOIDmode, src_reg, final_src); + emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); + + /* Mop up any left-over bytes. */ + if (leftover) + arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); +} + +/* Emit a block move when either the source or destination is unaligned (not + aligned to a four-byte boundary). This may need further tuning depending on + core type, optimize_size setting, etc. */ + +static int +arm_movmemqi_unaligned (rtx *operands) +{ + HOST_WIDE_INT length = INTVAL (operands[2]); + + if (optimize_size) + { + bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; + bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD; + /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit + size of code if optimizing for size. We'll use ldm/stm if src_aligned + or dst_aligned though: allow more interleaving in those cases since the + resulting code can be smaller. */ + unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; + HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; + + if (length > 12) + arm_block_move_unaligned_loop (operands[0], operands[1], length, + interleave_factor, bytes_per_iter); + else + arm_block_move_unaligned_straight (operands[0], operands[1], length, + interleave_factor); + } + else + { + /* Note that the loop created by arm_block_move_unaligned_loop may be + subject to loop unrolling, which makes tuning this condition a little + redundant. */ + if (length > 32) + arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16); + else + arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); + } + + return 1; +} + +int +arm_gen_movmemqi (rtx *operands) +{ + HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes; + HOST_WIDE_INT srcoffset, dstoffset; + int i; + rtx src, dst, srcbase, dstbase; + rtx part_bytes_reg = NULL; + rtx mem; + + if (!CONST_INT_P (operands[2]) + || !CONST_INT_P (operands[3]) + || INTVAL (operands[2]) > 64) + return 0; + + if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) + return arm_movmemqi_unaligned (operands); + + if (INTVAL (operands[3]) & 3) + return 0; + + dstbase = operands[0]; + srcbase = operands[1]; + + dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0)); + src = copy_to_mode_reg (SImode, XEXP (srcbase, 0)); + + in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2])); + out_words_to_go = INTVAL (operands[2]) / 4; + last_bytes = INTVAL (operands[2]) & 3; + dstoffset = srcoffset = 0; + + if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0) + part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); + + for (i = 0; in_words_to_go >= 2; i+=4) + { + if (in_words_to_go > 4) + emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src, + TRUE, srcbase, &srcoffset)); + else + emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go, + src, FALSE, srcbase, + &srcoffset)); + + if (out_words_to_go) + { + if (out_words_to_go > 4) + emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst, + TRUE, dstbase, &dstoffset)); + else if (out_words_to_go != 1) + emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, + out_words_to_go, dst, + (last_bytes == 0 + ? FALSE : TRUE), + dstbase, &dstoffset)); + else + { + mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); + emit_move_insn (mem, gen_rtx_REG (SImode, 0)); + if (last_bytes != 0) + { + emit_insn (gen_addsi3 (dst, dst, GEN_INT (4))); + dstoffset += 4; + } + } + } + + in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4; + out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4; + } + + /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */ + if (out_words_to_go) + { + rtx sreg; + + mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); + sreg = copy_to_reg (mem); + + mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); + emit_move_insn (mem, sreg); + in_words_to_go--; + + gcc_assert (!in_words_to_go); /* Sanity check */ + } + + if (in_words_to_go) + { + gcc_assert (in_words_to_go > 0); + + mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); + part_bytes_reg = copy_to_mode_reg (SImode, mem); + } + + gcc_assert (!last_bytes || part_bytes_reg); + + if (BYTES_BIG_ENDIAN && last_bytes) + { + rtx tmp = gen_reg_rtx (SImode); + + /* The bytes we want are in the top end of the word. */ + emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, + GEN_INT (8 * (4 - last_bytes)))); + part_bytes_reg = tmp; + + while (last_bytes) + { + mem = adjust_automodify_address (dstbase, QImode, + plus_constant (Pmode, dst, + last_bytes - 1), + dstoffset + last_bytes - 1); + emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); + + if (--last_bytes) + { + tmp = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8))); + part_bytes_reg = tmp; + } + } + + } + else + { + if (last_bytes > 1) + { + mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset); + emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg)); + last_bytes -= 2; + if (last_bytes) + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_addsi3 (dst, dst, const2_rtx)); + emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16))); + part_bytes_reg = tmp; + dstoffset += 2; + } + } + + if (last_bytes) + { + mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset); + emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); + } + } + + return 1; +} + +/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx +by mode size. */ +inline static rtx +next_consecutive_mem (rtx mem) +{ + enum machine_mode mode = GET_MODE (mem); + HOST_WIDE_INT offset = GET_MODE_SIZE (mode); + rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset); + + return adjust_automodify_address (mem, mode, addr, offset); +} + +/* Copy using LDRD/STRD instructions whenever possible. + Returns true upon success. */ +bool +gen_movmem_ldrd_strd (rtx *operands) +{ + unsigned HOST_WIDE_INT len; + HOST_WIDE_INT align; + rtx src, dst, base; + rtx reg0; + bool src_aligned, dst_aligned; + bool src_volatile, dst_volatile; + + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (CONST_INT_P (operands[3])); + + len = UINTVAL (operands[2]); + if (len > 64) + return false; + + /* Maximum alignment we can assume for both src and dst buffers. */ + align = INTVAL (operands[3]); + + if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0)) + return false; + + /* Place src and dst addresses in registers + and update the corresponding mem rtx. */ + dst = operands[0]; + dst_volatile = MEM_VOLATILE_P (dst); + dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (dst, 0)); + dst = adjust_automodify_address (dst, VOIDmode, base, 0); + + src = operands[1]; + src_volatile = MEM_VOLATILE_P (src); + src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (src, 0)); + src = adjust_automodify_address (src, VOIDmode, base, 0); + + if (!unaligned_access && !(src_aligned && dst_aligned)) + return false; + + if (src_volatile || dst_volatile) + return false; + + /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ + if (!(dst_aligned || src_aligned)) + return arm_gen_movmemqi (operands); + + src = adjust_address (src, DImode, 0); + dst = adjust_address (dst, DImode, 0); + while (len >= 8) + { + len -= 8; + reg0 = gen_reg_rtx (DImode); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loaddi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storedi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + } + + gcc_assert (len < 8); + if (len >= 4) + { + /* More than a word but less than a double-word to copy. Copy a word. */ + reg0 = gen_reg_rtx (SImode); + src = adjust_address (src, SImode, 0); + dst = adjust_address (dst, SImode, 0); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loadsi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storesi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + len -= 4; + } + + if (len == 0) + return true; + + /* Copy the remaining bytes. */ + if (len >= 2) + { + dst = adjust_address (dst, HImode, 0); + src = adjust_address (src, HImode, 0); + reg0 = gen_reg_rtx (SImode); + if (src_aligned) + emit_insn (gen_zero_extendhisi2 (reg0, src)); + else + emit_insn (gen_unaligned_loadhiu (reg0, src)); + + if (dst_aligned) + emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0))); + else + emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0))); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + if (len == 2) + return true; + } + + dst = adjust_address (dst, QImode, 0); + src = adjust_address (src, QImode, 0); + reg0 = gen_reg_rtx (QImode); + emit_move_insn (reg0, src); + emit_move_insn (dst, reg0); + return true; +} + +/* Select a dominance comparison mode if possible for a test of the general + form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. + COND_OR == DOM_CC_X_AND_Y => (X && Y) + COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y) + COND_OR == DOM_CC_X_OR_Y => (X || Y) + In all cases OP will be either EQ or NE, but we don't need to know which + here. If we are unable to support a dominance comparison we return + CC mode. This will then fail to match for the RTL expressions that + generate this call. */ +enum machine_mode +arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) +{ + enum rtx_code cond1, cond2; + int swapped = 0; + + /* Currently we will probably get the wrong result if the individual + comparisons are not simple. This also ensures that it is safe to + reverse a comparison if necessary. */ + if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1)) + != CCmode) + || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1)) + != CCmode)) + return CCmode; + + /* The if_then_else variant of this tests the second condition if the + first passes, but is true if the first fails. Reverse the first + condition to get a true "inclusive-or" expression. */ + if (cond_or == DOM_CC_NX_OR_Y) + cond1 = reverse_condition (cond1); + + /* If the comparisons are not equal, and one doesn't dominate the other, + then we can't do this. */ + if (cond1 != cond2 + && !comparison_dominates_p (cond1, cond2) + && (swapped = 1, !comparison_dominates_p (cond2, cond1))) + return CCmode; + + if (swapped) + { + enum rtx_code temp = cond1; + cond1 = cond2; + cond2 = temp; + } + + switch (cond1) + { + case EQ: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DEQmode; + + switch (cond2) + { + case EQ: return CC_DEQmode; + case LE: return CC_DLEmode; + case LEU: return CC_DLEUmode; + case GE: return CC_DGEmode; + case GEU: return CC_DGEUmode; + default: gcc_unreachable (); + } + + case LT: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DLTmode; + + switch (cond2) + { + case LT: + return CC_DLTmode; + case LE: + return CC_DLEmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + case GT: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DGTmode; + + switch (cond2) + { + case GT: + return CC_DGTmode; + case GE: + return CC_DGEmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + case LTU: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DLTUmode; + + switch (cond2) + { + case LTU: + return CC_DLTUmode; + case LEU: + return CC_DLEUmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + case GTU: + if (cond_or == DOM_CC_X_AND_Y) + return CC_DGTUmode; + + switch (cond2) + { + case GTU: + return CC_DGTUmode; + case GEU: + return CC_DGEUmode; + case NE: + return CC_DNEmode; + default: + gcc_unreachable (); + } + + /* The remaining cases only occur when both comparisons are the + same. */ + case NE: + gcc_assert (cond1 == cond2); + return CC_DNEmode; + + case LE: + gcc_assert (cond1 == cond2); + return CC_DLEmode; + + case GE: + gcc_assert (cond1 == cond2); + return CC_DGEmode; + + case LEU: + gcc_assert (cond1 == cond2); + return CC_DLEUmode; + + case GEU: + gcc_assert (cond1 == cond2); + return CC_DGEUmode; + + default: + gcc_unreachable (); + } +} + +enum machine_mode +arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) +{ + /* All floating point compares return CCFP if it is an equality + comparison, and CCFPE otherwise. */ + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + switch (op) + { + case EQ: + case NE: + case UNORDERED: + case ORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case LTGT: + return CCFPmode; + + case LT: + case LE: + case GT: + case GE: + return CCFPEmode; + + default: + gcc_unreachable (); + } + } + + /* A compare with a shifted operand. Because of canonicalization, the + comparison will have to be swapped when we emit the assembler. */ + if (GET_MODE (y) == SImode + && (REG_P (y) || (GET_CODE (y) == SUBREG)) + && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT + || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE + || GET_CODE (x) == ROTATERT)) + return CC_SWPmode; + + /* This operation is performed swapped, but since we only rely on the Z + flag we don't need an additional mode. */ + if (GET_MODE (y) == SImode + && (REG_P (y) || (GET_CODE (y) == SUBREG)) + && GET_CODE (x) == NEG + && (op == EQ || op == NE)) + return CC_Zmode; + + /* This is a special case that is used by combine to allow a + comparison of a shifted byte load to be split into a zero-extend + followed by a comparison of the shifted integer (only valid for + equalities and unsigned inequalities). */ + if (GET_MODE (x) == SImode + && GET_CODE (x) == ASHIFT + && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24 + && GET_CODE (XEXP (x, 0)) == SUBREG + && MEM_P (SUBREG_REG (XEXP (x, 0))) + && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode + && (op == EQ || op == NE + || op == GEU || op == GTU || op == LTU || op == LEU) + && CONST_INT_P (y)) + return CC_Zmode; + + /* A construct for a conditional compare, if the false arm contains + 0, then both conditions must be true, otherwise either condition + must be true. Not all conditions are possible, so CCmode is + returned if it can't be done. */ + if (GET_CODE (x) == IF_THEN_ELSE + && (XEXP (x, 2) == const0_rtx + || XEXP (x, 2) == const1_rtx) + && COMPARISON_P (XEXP (x, 0)) + && COMPARISON_P (XEXP (x, 1))) + return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), + INTVAL (XEXP (x, 2))); + + /* Alternate canonicalizations of the above. These are somewhat cleaner. */ + if (GET_CODE (x) == AND + && (op == EQ || op == NE) + && COMPARISON_P (XEXP (x, 0)) + && COMPARISON_P (XEXP (x, 1))) + return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), + DOM_CC_X_AND_Y); + + if (GET_CODE (x) == IOR + && (op == EQ || op == NE) + && COMPARISON_P (XEXP (x, 0)) + && COMPARISON_P (XEXP (x, 1))) + return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), + DOM_CC_X_OR_Y); + + /* An operation (on Thumb) where we want to test for a single bit. + This is done by shifting that bit up into the top bit of a + scratch register; we can then branch on the sign bit. */ + if (TARGET_THUMB1 + && GET_MODE (x) == SImode + && (op == EQ || op == NE) + && GET_CODE (x) == ZERO_EXTRACT + && XEXP (x, 1) == const1_rtx) + return CC_Nmode; + + /* An operation that sets the condition codes as a side-effect, the + V flag is not set correctly, so we can only use comparisons where + this doesn't matter. (For LT and GE we can use "mi" and "pl" + instead.) */ + /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */ + if (GET_MODE (x) == SImode + && y == const0_rtx + && (op == EQ || op == NE || op == LT || op == GE) + && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS + || GET_CODE (x) == AND || GET_CODE (x) == IOR + || GET_CODE (x) == XOR || GET_CODE (x) == MULT + || GET_CODE (x) == NOT || GET_CODE (x) == NEG + || GET_CODE (x) == LSHIFTRT + || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT + || GET_CODE (x) == ROTATERT + || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT))) + return CC_NOOVmode; + + if (GET_MODE (x) == QImode && (op == EQ || op == NE)) + return CC_Zmode; + + if (GET_MODE (x) == SImode && (op == LTU || op == GEU) + && GET_CODE (x) == PLUS + && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) + return CC_Cmode; + + if (GET_MODE (x) == DImode || GET_MODE (y) == DImode) + { + switch (op) + { + case EQ: + case NE: + /* A DImode comparison against zero can be implemented by + or'ing the two halves together. */ + if (y == const0_rtx) + return CC_Zmode; + + /* We can do an equality test in three Thumb instructions. */ + if (!TARGET_32BIT) + return CC_Zmode; + + /* FALLTHROUGH */ + + case LTU: + case LEU: + case GTU: + case GEU: + /* DImode unsigned comparisons can be implemented by cmp + + cmpeq without a scratch register. Not worth doing in + Thumb-2. */ + if (TARGET_32BIT) + return CC_CZmode; + + /* FALLTHROUGH */ + + case LT: + case LE: + case GT: + case GE: + /* DImode signed and unsigned comparisons can be implemented + by cmp + sbcs with a scratch register, but that does not + set the Z flag - we must reverse GT/LE/GTU/LEU. */ + gcc_assert (op != EQ && op != NE); + return CC_NCVmode; + + default: + gcc_unreachable (); + } + } + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) + return GET_MODE (x); + + return CCmode; +} + +/* X and Y are two things to compare using CODE. Emit the compare insn and + return the rtx for register 0 in the proper mode. FP means this is a + floating point compare: I don't think that it is needed on the arm. */ +rtx +arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch) +{ + enum machine_mode mode; + rtx cc_reg; + int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode; + + /* We might have X as a constant, Y as a register because of the predicates + used for cmpdi. If so, force X to a register here. */ + if (dimode_comparison && !REG_P (x)) + x = force_reg (DImode, x); + + mode = SELECT_CC_MODE (code, x, y); + cc_reg = gen_rtx_REG (mode, CC_REGNUM); + + if (dimode_comparison + && mode != CC_CZmode) + { + rtx clobber, set; + + /* To compare two non-zero values for equality, XOR them and + then compare against zero. Not used for ARM mode; there + CC_CZmode is cheaper. */ + if (mode == CC_Zmode && y != const0_rtx) + { + gcc_assert (!reload_completed); + x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN); + y = const0_rtx; + } + + /* A scratch register is required. */ + if (reload_completed) + gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode); + else + scratch = gen_rtx_SCRATCH (SImode); + + clobber = gen_rtx_CLOBBER (VOIDmode, scratch); + set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + } + else + emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + + return cc_reg; +} + +/* Generate a sequence of insns that will generate the correct return + address mask depending on the physical architecture that the program + is running on. */ +rtx +arm_gen_return_addr_mask (void) +{ + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_return_addr_mask (reg)); + return reg; +} + +void +arm_reload_in_hi (rtx *operands) +{ + rtx ref = operands[1]; + rtx base, scratch; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (ref) == SUBREG) + { + offset = SUBREG_BYTE (ref); + ref = SUBREG_REG (ref); + } + + if (REG_P (ref)) + { + /* We have a pseudo which has been spilt onto the stack; there + are two cases here: the first where there is a simple + stack-slot replacement and a second where the stack-slot is + out of range, or is used as a subreg. */ + if (reg_equiv_mem (REGNO (ref))) + { + ref = reg_equiv_mem (REGNO (ref)); + base = find_replacement (&XEXP (ref, 0)); + } + else + /* The slot is out of range, or was dressed up in a SUBREG. */ + base = reg_equiv_address (REGNO (ref)); + } + else + base = find_replacement (&XEXP (ref, 0)); + + /* Handle the case where the address is too complex to be offset by 1. */ + if (GET_CODE (base) == MINUS + || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1)))) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + emit_set_insn (base_plus, base); + base = base_plus; + } + else if (GET_CODE (base) == PLUS) + { + /* The addend must be CONST_INT, or we would have dealt with it above. */ + HOST_WIDE_INT hi, lo; + + offset += INTVAL (XEXP (base, 1)); + base = XEXP (base, 0); + + /* Rework the address into a legal sequence of insns. */ + /* Valid range for lo is -4095 -> 4095 */ + lo = (offset >= 0 + ? (offset & 0xfff) + : -((-offset) & 0xfff)); + + /* Corner case, if lo is the max offset then we would be out of range + once we have added the additional 1 below, so bump the msb into the + pre-loading insn(s). */ + if (lo == 4095) + lo &= 0x7ff; + + hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff) + ^ (HOST_WIDE_INT) 0x80000000) + - (HOST_WIDE_INT) 0x80000000); + + gcc_assert (hi + lo == offset); + + if (hi != 0) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + /* Get the base address; addsi3 knows how to handle constants + that require more than one insn. */ + emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi))); + base = base_plus; + offset = lo; + } + } + + /* Operands[2] may overlap operands[0] (though it won't overlap + operands[1]), that's why we asked for a DImode reg -- so we can + use the bit that does not overlap. */ + if (REGNO (operands[2]) == REGNO (operands[0])) + scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + else + scratch = gen_rtx_REG (SImode, REGNO (operands[2])); + + emit_insn (gen_zero_extendqisi2 (scratch, + gen_rtx_MEM (QImode, + plus_constant (Pmode, base, + offset)))); + emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0), + gen_rtx_MEM (QImode, + plus_constant (Pmode, base, + offset + 1)))); + if (!BYTES_BIG_ENDIAN) + emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0), + gen_rtx_IOR (SImode, + gen_rtx_ASHIFT + (SImode, + gen_rtx_SUBREG (SImode, operands[0], 0), + GEN_INT (8)), + scratch)); + else + emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0), + gen_rtx_IOR (SImode, + gen_rtx_ASHIFT (SImode, scratch, + GEN_INT (8)), + gen_rtx_SUBREG (SImode, operands[0], 0))); +} + +/* Handle storing a half-word to memory during reload by synthesizing as two + byte stores. Take care not to clobber the input values until after we + have moved them somewhere safe. This code assumes that if the DImode + scratch in operands[2] overlaps either the input value or output address + in some way, then that value must die in this insn (we absolutely need + two scratch registers for some corner cases). */ +void +arm_reload_out_hi (rtx *operands) +{ + rtx ref = operands[0]; + rtx outval = operands[1]; + rtx base, scratch; + HOST_WIDE_INT offset = 0; + + if (GET_CODE (ref) == SUBREG) + { + offset = SUBREG_BYTE (ref); + ref = SUBREG_REG (ref); + } + + if (REG_P (ref)) + { + /* We have a pseudo which has been spilt onto the stack; there + are two cases here: the first where there is a simple + stack-slot replacement and a second where the stack-slot is + out of range, or is used as a subreg. */ + if (reg_equiv_mem (REGNO (ref))) + { + ref = reg_equiv_mem (REGNO (ref)); + base = find_replacement (&XEXP (ref, 0)); + } + else + /* The slot is out of range, or was dressed up in a SUBREG. */ + base = reg_equiv_address (REGNO (ref)); + } + else + base = find_replacement (&XEXP (ref, 0)); + + scratch = gen_rtx_REG (SImode, REGNO (operands[2])); + + /* Handle the case where the address is too complex to be offset by 1. */ + if (GET_CODE (base) == MINUS + || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1)))) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + /* Be careful not to destroy OUTVAL. */ + if (reg_overlap_mentioned_p (base_plus, outval)) + { + /* Updating base_plus might destroy outval, see if we can + swap the scratch and base_plus. */ + if (!reg_overlap_mentioned_p (scratch, outval)) + { + rtx tmp = scratch; + scratch = base_plus; + base_plus = tmp; + } + else + { + rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); + + /* Be conservative and copy OUTVAL into the scratch now, + this should only be necessary if outval is a subreg + of something larger than a word. */ + /* XXX Might this clobber base? I can't see how it can, + since scratch is known to overlap with OUTVAL, and + must be wider than a word. */ + emit_insn (gen_movhi (scratch_hi, outval)); + outval = scratch_hi; + } + } + + emit_set_insn (base_plus, base); + base = base_plus; + } + else if (GET_CODE (base) == PLUS) + { + /* The addend must be CONST_INT, or we would have dealt with it above. */ + HOST_WIDE_INT hi, lo; + + offset += INTVAL (XEXP (base, 1)); + base = XEXP (base, 0); + + /* Rework the address into a legal sequence of insns. */ + /* Valid range for lo is -4095 -> 4095 */ + lo = (offset >= 0 + ? (offset & 0xfff) + : -((-offset) & 0xfff)); + + /* Corner case, if lo is the max offset then we would be out of range + once we have added the additional 1 below, so bump the msb into the + pre-loading insn(s). */ + if (lo == 4095) + lo &= 0x7ff; + + hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff) + ^ (HOST_WIDE_INT) 0x80000000) + - (HOST_WIDE_INT) 0x80000000); + + gcc_assert (hi + lo == offset); + + if (hi != 0) + { + rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); + + /* Be careful not to destroy OUTVAL. */ + if (reg_overlap_mentioned_p (base_plus, outval)) + { + /* Updating base_plus might destroy outval, see if we + can swap the scratch and base_plus. */ + if (!reg_overlap_mentioned_p (scratch, outval)) + { + rtx tmp = scratch; + scratch = base_plus; + base_plus = tmp; + } + else + { + rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2])); + + /* Be conservative and copy outval into scratch now, + this should only be necessary if outval is a + subreg of something larger than a word. */ + /* XXX Might this clobber base? I can't see how it + can, since scratch is known to overlap with + outval. */ + emit_insn (gen_movhi (scratch_hi, outval)); + outval = scratch_hi; + } + } + + /* Get the base address; addsi3 knows how to handle constants + that require more than one insn. */ + emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi))); + base = base_plus; + offset = lo; + } + } + + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_movqi (gen_rtx_MEM (QImode, + plus_constant (Pmode, base, + offset + 1)), + gen_lowpart (QImode, outval))); + emit_insn (gen_lshrsi3 (scratch, + gen_rtx_SUBREG (SImode, outval, 0), + GEN_INT (8))); + emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base, + offset)), + gen_lowpart (QImode, scratch))); + } + else + { + emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base, + offset)), + gen_lowpart (QImode, outval))); + emit_insn (gen_lshrsi3 (scratch, + gen_rtx_SUBREG (SImode, outval, 0), + GEN_INT (8))); + emit_insn (gen_movqi (gen_rtx_MEM (QImode, + plus_constant (Pmode, base, + offset + 1)), + gen_lowpart (QImode, scratch))); + } +} + +/* Return true if a type must be passed in memory. For AAPCS, small aggregates + (padded to the size of a word) should be passed in a register. */ + +static bool +arm_must_pass_in_stack (enum machine_mode mode, const_tree type) +{ + if (TARGET_AAPCS_BASED) + return must_pass_in_stack_var_size (mode, type); + else + return must_pass_in_stack_var_size_or_pad (mode, type); +} + + +/* For use by FUNCTION_ARG_PADDING (MODE, TYPE). + Return true if an argument passed on the stack should be padded upwards, + i.e. if the least-significant byte has useful data. + For legacy APCS ABIs we use the default. For AAPCS based ABIs small + aggregate types are placed in the lowest memory address. */ + +bool +arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type) +{ + if (!TARGET_AAPCS_BASED) + return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward; + + if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type)) + return false; + + return true; +} + + +/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). + Return !BYTES_BIG_ENDIAN if the least significant byte of the + register has useful data, and return the opposite if the most + significant byte does. */ + +bool +arm_pad_reg_upward (enum machine_mode mode, + tree type, int first ATTRIBUTE_UNUSED) +{ + if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN) + { + /* For AAPCS, small aggregates, small fixed-point types, + and small complex types are always padded upwards. */ + if (type) + { + if ((AGGREGATE_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE + || FIXED_POINT_TYPE_P (type)) + && int_size_in_bytes (type) <= 4) + return true; + } + else + { + if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode)) + && GET_MODE_SIZE (mode) <= 4) + return true; + } + } + + /* Otherwise, use default padding. */ + return !BYTES_BIG_ENDIAN; +} + +/* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction, + assuming that the address in the base register is word aligned. */ +bool +offset_ok_for_ldrd_strd (HOST_WIDE_INT offset) +{ + HOST_WIDE_INT max_offset; + + /* Offset must be a multiple of 4 in Thumb mode. */ + if (TARGET_THUMB2 && ((offset & 3) != 0)) + return false; + + if (TARGET_THUMB2) + max_offset = 1020; + else if (TARGET_ARM) + max_offset = 255; + else + return false; + + return ((offset <= max_offset) && (offset >= -max_offset)); +} + +/* Checks whether the operands are valid for use in an LDRD/STRD instruction. + Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns. + Assumes that the address in the base register RN is word aligned. Pattern + guarantees that both memory accesses use the same base register, + the offsets are constants within the range, and the gap between the offsets is 4. + If preload complete then check that registers are legal. WBACK indicates whether + address is updated. LOAD indicates whether memory access is load or store. */ +bool +operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset, + bool wback, bool load) +{ + unsigned int t, t2, n; + + if (!reload_completed) + return true; + + if (!offset_ok_for_ldrd_strd (offset)) + return false; + + t = REGNO (rt); + t2 = REGNO (rt2); + n = REGNO (rn); + + if ((TARGET_THUMB2) + && ((wback && (n == t || n == t2)) + || (t == SP_REGNUM) + || (t == PC_REGNUM) + || (t2 == SP_REGNUM) + || (t2 == PC_REGNUM) + || (!load && (n == PC_REGNUM)) + || (load && (t == t2)) + /* Triggers Cortex-M3 LDRD errata. */ + || (!wback && load && fix_cm3_ldrd && (n == t)))) + return false; + + if ((TARGET_ARM) + && ((wback && (n == t || n == t2)) + || (t2 == PC_REGNUM) + || (t % 2 != 0) /* First destination register is not even. */ + || (t2 != t + 1) + /* PC can be used as base register (for offset addressing only), + but it is depricated. */ + || (n == PC_REGNUM))) + return false; + + return true; +} + +/* Helper for gen_operands_ldrd_strd. Returns true iff the memory + operand MEM's address contains an immediate offset from the base + register and has no side effects, in which case it sets BASE and + OFFSET accordingly. */ +static bool +mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (base != NULL && offset != NULL); + + /* TODO: Handle more general memory operand patterns, such as + PRE_DEC and PRE_INC. */ + + if (side_effects_p (mem)) + return false; + + /* Can't deal with subregs. */ + if (GET_CODE (mem) == SUBREG) + return false; + + gcc_assert (MEM_P (mem)); + + *offset = const0_rtx; + + addr = XEXP (mem, 0); + + /* If addr isn't valid for DImode, then we can't handle it. */ + if (!arm_legitimate_address_p (DImode, addr, + reload_in_progress || reload_completed)) + return false; + + if (REG_P (addr)) + { + *base = addr; + return true; + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return (REG_P (*base) && CONST_INT_P (*offset)); + } + + return false; +} + +#define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0) + +/* Called from a peephole2 to replace two word-size accesses with a + single LDRD/STRD instruction. Returns true iff we can generate a + new instruction sequence. That is, both accesses use the same base + register and the gap between constant offsets is 4. This function + may reorder its operands to match ldrd/strd RTL templates. + OPERANDS are the operands found by the peephole matcher; + OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the + corresponding memory operands. LOAD indicaates whether the access + is load or store. CONST_STORE indicates a store of constant + integer values held in OPERANDS[4,5] and assumes that the pattern + is of length 4 insn, for the purpose of checking dead registers. + COMMUTE indicates that register operands may be reordered. */ +bool +gen_operands_ldrd_strd (rtx *operands, bool load, + bool const_store, bool commute) +{ + int nops = 2; + HOST_WIDE_INT offsets[2], offset; + rtx base = NULL_RTX; + rtx cur_base, cur_offset, tmp; + int i, gap; + HARD_REG_SET regset; + + gcc_assert (!const_store || !load); + /* Check that the memory references are immediate offsets from the + same base register. Extract the base register, the destination + registers, and the corresponding memory offsets. */ + for (i = 0; i < nops; i++) + { + if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset)) + return false; + + if (i == 0) + base = cur_base; + else if (REGNO (base) != REGNO (cur_base)) + return false; + + offsets[i] = INTVAL (cur_offset); + if (GET_CODE (operands[i]) == SUBREG) + { + tmp = SUBREG_REG (operands[i]); + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); + operands[i] = tmp; + } + } + + /* Make sure there is no dependency between the individual loads. */ + if (load && REGNO (operands[0]) == REGNO (base)) + return false; /* RAW */ + + if (load && REGNO (operands[0]) == REGNO (operands[1])) + return false; /* WAW */ + + /* If the same input register is used in both stores + when storing different constants, try to find a free register. + For example, the code + mov r0, 0 + str r0, [r2] + mov r0, 1 + str r0, [r2, #4] + can be transformed into + mov r1, 0 + strd r1, r0, [r2] + in Thumb mode assuming that r1 is free. */ + if (const_store + && REGNO (operands[0]) == REGNO (operands[1]) + && INTVAL (operands[4]) != INTVAL (operands[5])) + { + if (TARGET_THUMB2) + { + CLEAR_HARD_REG_SET (regset); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + + /* Use the new register in the first load to ensure that + if the original input register is not dead after peephole, + then it will have the correct constant value. */ + operands[0] = tmp; + } + else if (TARGET_ARM) + { + return false; + int regno = REGNO (operands[0]); + if (!peep2_reg_dead_p (4, operands[0])) + { + /* When the input register is even and is not dead after the + pattern, it has to hold the second constant but we cannot + form a legal STRD in ARM mode with this register as the second + register. */ + if (regno % 2 == 0) + return false; + + /* Is regno-1 free? */ + SET_HARD_REG_SET (regset); + CLEAR_HARD_REG_BIT(regset, regno - 1); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + + operands[0] = tmp; + } + else + { + /* Find a DImode register. */ + CLEAR_HARD_REG_SET (regset); + tmp = peep2_find_free_register (0, 4, "r", DImode, ®set); + if (tmp != NULL_RTX) + { + operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0); + operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4); + } + else + { + /* Can we use the input register to form a DI register? */ + SET_HARD_REG_SET (regset); + CLEAR_HARD_REG_BIT(regset, + regno % 2 == 0 ? regno + 1 : regno - 1); + tmp = peep2_find_free_register (0, 4, "r", SImode, ®set); + if (tmp == NULL_RTX) + return false; + operands[regno % 2 == 1 ? 0 : 1] = tmp; + } + } + + gcc_assert (operands[0] != NULL_RTX); + gcc_assert (operands[1] != NULL_RTX); + gcc_assert (REGNO (operands[0]) % 2 == 0); + gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1); + } + } + + /* Make sure the instructions are ordered with lower memory access first. */ + if (offsets[0] > offsets[1]) + { + gap = offsets[0] - offsets[1]; + offset = offsets[1]; + + /* Swap the instructions such that lower memory is accessed first. */ + SWAP_RTX (operands[0], operands[1]); + SWAP_RTX (operands[2], operands[3]); + if (const_store) + SWAP_RTX (operands[4], operands[5]); + } + else + { + gap = offsets[1] - offsets[0]; + offset = offsets[0]; + } + + /* Make sure accesses are to consecutive memory locations. */ + if (gap != 4) + return false; + + /* Make sure we generate legal instructions. */ + if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, + false, load)) + return true; + + /* In Thumb state, where registers are almost unconstrained, there + is little hope to fix it. */ + if (TARGET_THUMB2) + return false; + + if (load && commute) + { + /* Try reordering registers. */ + SWAP_RTX (operands[0], operands[1]); + if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset, + false, load)) + return true; + } + + if (const_store) + { + /* If input registers are dead after this pattern, they can be + reordered or replaced by other registers that are free in the + current pattern. */ + if (!peep2_reg_dead_p (4, operands[0]) + || !peep2_reg_dead_p (4, operands[1])) + return false; + + /* Try to reorder the input registers. */ + /* For example, the code + mov r0, 0 + mov r1, 1 + str r1, [r2] + str r0, [r2, #4] + can be transformed into + mov r1, 0 + mov r0, 1 + strd r0, [r2] + */ + if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset, + false, false)) + { + SWAP_RTX (operands[0], operands[1]); + return true; + } + + /* Try to find a free DI register. */ + CLEAR_HARD_REG_SET (regset); + add_to_hard_reg_set (®set, SImode, REGNO (operands[0])); + add_to_hard_reg_set (®set, SImode, REGNO (operands[1])); + while (true) + { + tmp = peep2_find_free_register (0, 4, "r", DImode, ®set); + if (tmp == NULL_RTX) + return false; + + /* DREG must be an even-numbered register in DImode. + Split it into SI registers. */ + operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0); + operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4); + gcc_assert (operands[0] != NULL_RTX); + gcc_assert (operands[1] != NULL_RTX); + gcc_assert (REGNO (operands[0]) % 2 == 0); + gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1])); + + return (operands_ok_ldrd_strd (operands[0], operands[1], + base, offset, + false, load)); + } + } + + return false; +} +#undef SWAP_RTX + + + + +/* Print a symbolic form of X to the debug file, F. */ +static void +arm_print_value (FILE *f, rtx x) +{ + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x)); + return; + + case CONST_DOUBLE: + fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3)); + return; + + case CONST_VECTOR: + { + int i; + + fprintf (f, "<"); + for (i = 0; i < CONST_VECTOR_NUNITS (x); i++) + { + fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i))); + if (i < (CONST_VECTOR_NUNITS (x) - 1)) + fputc (',', f); + } + fprintf (f, ">"); + } + return; + + case CONST_STRING: + fprintf (f, "\"%s\"", XSTR (x, 0)); + return; + + case SYMBOL_REF: + fprintf (f, "`%s'", XSTR (x, 0)); + return; + + case LABEL_REF: + fprintf (f, "L%d", INSN_UID (XEXP (x, 0))); + return; + + case CONST: + arm_print_value (f, XEXP (x, 0)); + return; + + case PLUS: + arm_print_value (f, XEXP (x, 0)); + fprintf (f, "+"); + arm_print_value (f, XEXP (x, 1)); + return; + + case PC: + fprintf (f, "pc"); + return; + + default: + fprintf (f, "????"); + return; + } +} + +/* Routines for manipulation of the constant pool. */ + +/* Arm instructions cannot load a large constant directly into a + register; they have to come from a pc relative load. The constant + must therefore be placed in the addressable range of the pc + relative load. Depending on the precise pc relative load + instruction the range is somewhere between 256 bytes and 4k. This + means that we often have to dump a constant inside a function, and + generate code to branch around it. + + It is important to minimize this, since the branches will slow + things down and make the code larger. + + Normally we can hide the table after an existing unconditional + branch so that there is no interruption of the flow, but in the + worst case the code looks like this: + + ldr rn, L1 + ... + b L2 + align + L1: .long value + L2: + ... + + ldr rn, L3 + ... + b L4 + align + L3: .long value + L4: + ... + + We fix this by performing a scan after scheduling, which notices + which instructions need to have their operands fetched from the + constant table and builds the table. + + The algorithm starts by building a table of all the constants that + need fixing up and all the natural barriers in the function (places + where a constant table can be dropped without breaking the flow). + For each fixup we note how far the pc-relative replacement will be + able to reach and the offset of the instruction into the function. + + Having built the table we then group the fixes together to form + tables that are as large as possible (subject to addressing + constraints) and emit each table of constants after the last + barrier that is within range of all the instructions in the group. + If a group does not contain a barrier, then we forcibly create one + by inserting a jump instruction into the flow. Once the table has + been inserted, the insns are then modified to reference the + relevant entry in the pool. + + Possible enhancements to the algorithm (not implemented) are: + + 1) For some processors and object formats, there may be benefit in + aligning the pools to the start of cache lines; this alignment + would need to be taken into account when calculating addressability + of a pool. */ + +/* These typedefs are located at the start of this file, so that + they can be used in the prototypes there. This comment is to + remind readers of that fact so that the following structures + can be understood more easily. + + typedef struct minipool_node Mnode; + typedef struct minipool_fixup Mfix; */ + +struct minipool_node +{ + /* Doubly linked chain of entries. */ + Mnode * next; + Mnode * prev; + /* The maximum offset into the code that this entry can be placed. While + pushing fixes for forward references, all entries are sorted in order + of increasing max_address. */ + HOST_WIDE_INT max_address; + /* Similarly for an entry inserted for a backwards ref. */ + HOST_WIDE_INT min_address; + /* The number of fixes referencing this entry. This can become zero + if we "unpush" an entry. In this case we ignore the entry when we + come to emit the code. */ + int refcount; + /* The offset from the start of the minipool. */ + HOST_WIDE_INT offset; + /* The value in table. */ + rtx value; + /* The mode of value. */ + enum machine_mode mode; + /* The size of the value. With iWMMXt enabled + sizes > 4 also imply an alignment of 8-bytes. */ + int fix_size; +}; + +struct minipool_fixup +{ + Mfix * next; + rtx insn; + HOST_WIDE_INT address; + rtx * loc; + enum machine_mode mode; + int fix_size; + rtx value; + Mnode * minipool; + HOST_WIDE_INT forwards; + HOST_WIDE_INT backwards; +}; + +/* Fixes less than a word need padding out to a word boundary. */ +#define MINIPOOL_FIX_SIZE(mode) \ + (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4) + +static Mnode * minipool_vector_head; +static Mnode * minipool_vector_tail; +static rtx minipool_vector_label; +static int minipool_pad; + +/* The linked list of all minipool fixes required for this function. */ +Mfix * minipool_fix_head; +Mfix * minipool_fix_tail; +/* The fix entry for the current minipool, once it has been placed. */ +Mfix * minipool_barrier; + +#ifndef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION 0 +#endif + +static HOST_WIDE_INT +get_jump_table_size (rtx insn) +{ + /* ADDR_VECs only take room if read-only data does into the text + section. */ + if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section) + { + rtx body = PATTERN (insn); + int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0; + HOST_WIDE_INT size; + HOST_WIDE_INT modesize; + + modesize = GET_MODE_SIZE (GET_MODE (body)); + size = modesize * XVECLEN (body, elt); + switch (modesize) + { + case 1: + /* Round up size of TBB table to a halfword boundary. */ + size = (size + 1) & ~(HOST_WIDE_INT)1; + break; + case 2: + /* No padding necessary for TBH. */ + break; + case 4: + /* Add two bytes for alignment on Thumb. */ + if (TARGET_THUMB) + size += 2; + break; + default: + gcc_unreachable (); + } + return size; + } + + return 0; +} + +/* Return the maximum amount of padding that will be inserted before + label LABEL. */ + +static HOST_WIDE_INT +get_label_padding (rtx label) +{ + HOST_WIDE_INT align, min_insn_size; + + align = 1 << label_to_alignment (label); + min_insn_size = TARGET_THUMB ? 2 : 4; + return align > min_insn_size ? align - min_insn_size : 0; +} + +/* Move a minipool fix MP from its current location to before MAX_MP. + If MAX_MP is NULL, then MP doesn't need moving, but the addressing + constraints may need updating. */ +static Mnode * +move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp, + HOST_WIDE_INT max_address) +{ + /* The code below assumes these are different. */ + gcc_assert (mp != max_mp); + + if (max_mp == NULL) + { + if (max_address < mp->max_address) + mp->max_address = max_address; + } + else + { + if (max_address > max_mp->max_address - mp->fix_size) + mp->max_address = max_mp->max_address - mp->fix_size; + else + mp->max_address = max_address; + + /* Unlink MP from its current position. Since max_mp is non-null, + mp->prev must be non-null. */ + mp->prev->next = mp->next; + if (mp->next != NULL) + mp->next->prev = mp->prev; + else + minipool_vector_tail = mp->prev; + + /* Re-insert it before MAX_MP. */ + mp->next = max_mp; + mp->prev = max_mp->prev; + max_mp->prev = mp; + + if (mp->prev != NULL) + mp->prev->next = mp; + else + minipool_vector_head = mp; + } + + /* Save the new entry. */ + max_mp = mp; + + /* Scan over the preceding entries and adjust their addresses as + required. */ + while (mp->prev != NULL + && mp->prev->max_address > mp->max_address - mp->prev->fix_size) + { + mp->prev->max_address = mp->max_address - mp->prev->fix_size; + mp = mp->prev; + } + + return max_mp; +} + +/* Add a constant to the minipool for a forward reference. Returns the + node added or NULL if the constant will not fit in this pool. */ +static Mnode * +add_minipool_forward_ref (Mfix *fix) +{ + /* If set, max_mp is the first pool_entry that has a lower + constraint than the one we are trying to add. */ + Mnode * max_mp = NULL; + HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad; + Mnode * mp; + + /* If the minipool starts before the end of FIX->INSN then this FIX + can not be placed into the current pool. Furthermore, adding the + new constant pool entry may cause the pool to start FIX_SIZE bytes + earlier. */ + if (minipool_vector_head && + (fix->address + get_attr_length (fix->insn) + >= minipool_vector_head->max_address - fix->fix_size)) + return NULL; + + /* Scan the pool to see if a constant with the same value has + already been added. While we are doing this, also note the + location where we must insert the constant if it doesn't already + exist. */ + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + { + if (GET_CODE (fix->value) == GET_CODE (mp->value) + && fix->mode == mp->mode + && (!LABEL_P (fix->value) + || (CODE_LABEL_NUMBER (fix->value) + == CODE_LABEL_NUMBER (mp->value))) + && rtx_equal_p (fix->value, mp->value)) + { + /* More than one fix references this entry. */ + mp->refcount++; + return move_minipool_fix_forward_ref (mp, max_mp, max_address); + } + + /* Note the insertion point if necessary. */ + if (max_mp == NULL + && mp->max_address > max_address) + max_mp = mp; + + /* If we are inserting an 8-bytes aligned quantity and + we have not already found an insertion point, then + make sure that all such 8-byte aligned quantities are + placed at the start of the pool. */ + if (ARM_DOUBLEWORD_ALIGN + && max_mp == NULL + && fix->fix_size >= 8 + && mp->fix_size < 8) + { + max_mp = mp; + max_address = mp->max_address; + } + } + + /* The value is not currently in the minipool, so we need to create + a new entry for it. If MAX_MP is NULL, the entry will be put on + the end of the list since the placement is less constrained than + any existing entry. Otherwise, we insert the new fix before + MAX_MP and, if necessary, adjust the constraints on the other + entries. */ + mp = XNEW (Mnode); + mp->fix_size = fix->fix_size; + mp->mode = fix->mode; + mp->value = fix->value; + mp->refcount = 1; + /* Not yet required for a backwards ref. */ + mp->min_address = -65536; + + if (max_mp == NULL) + { + mp->max_address = max_address; + mp->next = NULL; + mp->prev = minipool_vector_tail; + + if (mp->prev == NULL) + { + minipool_vector_head = mp; + minipool_vector_label = gen_label_rtx (); + } + else + mp->prev->next = mp; + + minipool_vector_tail = mp; + } + else + { + if (max_address > max_mp->max_address - mp->fix_size) + mp->max_address = max_mp->max_address - mp->fix_size; + else + mp->max_address = max_address; + + mp->next = max_mp; + mp->prev = max_mp->prev; + max_mp->prev = mp; + if (mp->prev != NULL) + mp->prev->next = mp; + else + minipool_vector_head = mp; + } + + /* Save the new entry. */ + max_mp = mp; + + /* Scan over the preceding entries and adjust their addresses as + required. */ + while (mp->prev != NULL + && mp->prev->max_address > mp->max_address - mp->prev->fix_size) + { + mp->prev->max_address = mp->max_address - mp->prev->fix_size; + mp = mp->prev; + } + + return max_mp; +} + +static Mnode * +move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp, + HOST_WIDE_INT min_address) +{ + HOST_WIDE_INT offset; + + /* The code below assumes these are different. */ + gcc_assert (mp != min_mp); + + if (min_mp == NULL) + { + if (min_address > mp->min_address) + mp->min_address = min_address; + } + else + { + /* We will adjust this below if it is too loose. */ + mp->min_address = min_address; + + /* Unlink MP from its current position. Since min_mp is non-null, + mp->next must be non-null. */ + mp->next->prev = mp->prev; + if (mp->prev != NULL) + mp->prev->next = mp->next; + else + minipool_vector_head = mp->next; + + /* Reinsert it after MIN_MP. */ + mp->prev = min_mp; + mp->next = min_mp->next; + min_mp->next = mp; + if (mp->next != NULL) + mp->next->prev = mp; + else + minipool_vector_tail = mp; + } + + min_mp = mp; + + offset = 0; + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + { + mp->offset = offset; + if (mp->refcount > 0) + offset += mp->fix_size; + + if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size) + mp->next->min_address = mp->min_address + mp->fix_size; + } + + return min_mp; +} + +/* Add a constant to the minipool for a backward reference. Returns the + node added or NULL if the constant will not fit in this pool. + + Note that the code for insertion for a backwards reference can be + somewhat confusing because the calculated offsets for each fix do + not take into account the size of the pool (which is still under + construction. */ +static Mnode * +add_minipool_backward_ref (Mfix *fix) +{ + /* If set, min_mp is the last pool_entry that has a lower constraint + than the one we are trying to add. */ + Mnode *min_mp = NULL; + /* This can be negative, since it is only a constraint. */ + HOST_WIDE_INT min_address = fix->address - fix->backwards; + Mnode *mp; + + /* If we can't reach the current pool from this insn, or if we can't + insert this entry at the end of the pool without pushing other + fixes out of range, then we don't try. This ensures that we + can't fail later on. */ + if (min_address >= minipool_barrier->address + || (minipool_vector_tail->min_address + fix->fix_size + >= minipool_barrier->address)) + return NULL; + + /* Scan the pool to see if a constant with the same value has + already been added. While we are doing this, also note the + location where we must insert the constant if it doesn't already + exist. */ + for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev) + { + if (GET_CODE (fix->value) == GET_CODE (mp->value) + && fix->mode == mp->mode + && (!LABEL_P (fix->value) + || (CODE_LABEL_NUMBER (fix->value) + == CODE_LABEL_NUMBER (mp->value))) + && rtx_equal_p (fix->value, mp->value) + /* Check that there is enough slack to move this entry to the + end of the table (this is conservative). */ + && (mp->max_address + > (minipool_barrier->address + + minipool_vector_tail->offset + + minipool_vector_tail->fix_size))) + { + mp->refcount++; + return move_minipool_fix_backward_ref (mp, min_mp, min_address); + } + + if (min_mp != NULL) + mp->min_address += fix->fix_size; + else + { + /* Note the insertion point if necessary. */ + if (mp->min_address < min_address) + { + /* For now, we do not allow the insertion of 8-byte alignment + requiring nodes anywhere but at the start of the pool. */ + if (ARM_DOUBLEWORD_ALIGN + && fix->fix_size >= 8 && mp->fix_size < 8) + return NULL; + else + min_mp = mp; + } + else if (mp->max_address + < minipool_barrier->address + mp->offset + fix->fix_size) + { + /* Inserting before this entry would push the fix beyond + its maximum address (which can happen if we have + re-located a forwards fix); force the new fix to come + after it. */ + if (ARM_DOUBLEWORD_ALIGN + && fix->fix_size >= 8 && mp->fix_size < 8) + return NULL; + else + { + min_mp = mp; + min_address = mp->min_address + fix->fix_size; + } + } + /* Do not insert a non-8-byte aligned quantity before 8-byte + aligned quantities. */ + else if (ARM_DOUBLEWORD_ALIGN + && fix->fix_size < 8 + && mp->fix_size >= 8) + { + min_mp = mp; + min_address = mp->min_address + fix->fix_size; + } + } + } + + /* We need to create a new entry. */ + mp = XNEW (Mnode); + mp->fix_size = fix->fix_size; + mp->mode = fix->mode; + mp->value = fix->value; + mp->refcount = 1; + mp->max_address = minipool_barrier->address + 65536; + + mp->min_address = min_address; + + if (min_mp == NULL) + { + mp->prev = NULL; + mp->next = minipool_vector_head; + + if (mp->next == NULL) + { + minipool_vector_tail = mp; + minipool_vector_label = gen_label_rtx (); + } + else + mp->next->prev = mp; + + minipool_vector_head = mp; + } + else + { + mp->next = min_mp->next; + mp->prev = min_mp; + min_mp->next = mp; + + if (mp->next != NULL) + mp->next->prev = mp; + else + minipool_vector_tail = mp; + } + + /* Save the new entry. */ + min_mp = mp; + + if (mp->prev) + mp = mp->prev; + else + mp->offset = 0; + + /* Scan over the following entries and adjust their offsets. */ + while (mp->next != NULL) + { + if (mp->next->min_address < mp->min_address + mp->fix_size) + mp->next->min_address = mp->min_address + mp->fix_size; + + if (mp->refcount) + mp->next->offset = mp->offset + mp->fix_size; + else + mp->next->offset = mp->offset; + + mp = mp->next; + } + + return min_mp; +} + +static void +assign_minipool_offsets (Mfix *barrier) +{ + HOST_WIDE_INT offset = 0; + Mnode *mp; + + minipool_barrier = barrier; + + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + { + mp->offset = offset; + + if (mp->refcount > 0) + offset += mp->fix_size; + } +} + +/* Output the literal table */ +static void +dump_minipool (rtx scan) +{ + Mnode * mp; + Mnode * nmp; + int align64 = 0; + + if (ARM_DOUBLEWORD_ALIGN) + for (mp = minipool_vector_head; mp != NULL; mp = mp->next) + if (mp->refcount > 0 && mp->fix_size >= 8) + { + align64 = 1; + break; + } + + if (dump_file) + fprintf (dump_file, + ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n", + INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4); + + scan = emit_label_after (gen_label_rtx (), scan); + scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan); + scan = emit_label_after (minipool_vector_label, scan); + + for (mp = minipool_vector_head; mp != NULL; mp = nmp) + { + if (mp->refcount > 0) + { + if (dump_file) + { + fprintf (dump_file, + ";; Offset %u, min %ld, max %ld ", + (unsigned) mp->offset, (unsigned long) mp->min_address, + (unsigned long) mp->max_address); + arm_print_value (dump_file, mp->value); + fputc ('\n', dump_file); + } + + switch (mp->fix_size) + { +#ifdef HAVE_consttable_1 + case 1: + scan = emit_insn_after (gen_consttable_1 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_2 + case 2: + scan = emit_insn_after (gen_consttable_2 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_4 + case 4: + scan = emit_insn_after (gen_consttable_4 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_8 + case 8: + scan = emit_insn_after (gen_consttable_8 (mp->value), scan); + break; + +#endif +#ifdef HAVE_consttable_16 + case 16: + scan = emit_insn_after (gen_consttable_16 (mp->value), scan); + break; + +#endif + default: + gcc_unreachable (); + } + } + + nmp = mp->next; + free (mp); + } + + minipool_vector_head = minipool_vector_tail = NULL; + scan = emit_insn_after (gen_consttable_end (), scan); + scan = emit_barrier_after (scan); +} + +/* Return the cost of forcibly inserting a barrier after INSN. */ +static int +arm_barrier_cost (rtx insn) +{ + /* Basing the location of the pool on the loop depth is preferable, + but at the moment, the basic block information seems to be + corrupt by this stage of the compilation. */ + int base_cost = 50; + rtx next = next_nonnote_insn (insn); + + if (next != NULL && LABEL_P (next)) + base_cost -= 20; + + switch (GET_CODE (insn)) + { + case CODE_LABEL: + /* It will always be better to place the table before the label, rather + than after it. */ + return 50; + + case INSN: + case CALL_INSN: + return base_cost; + + case JUMP_INSN: + return base_cost - 10; + + default: + return base_cost + 10; + } +} + +/* Find the best place in the insn stream in the range + (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier. + Create the barrier by inserting a jump and add a new fix entry for + it. */ +static Mfix * +create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address) +{ + HOST_WIDE_INT count = 0; + rtx barrier; + rtx from = fix->insn; + /* The instruction after which we will insert the jump. */ + rtx selected = NULL; + int selected_cost; + /* The address at which the jump instruction will be placed. */ + HOST_WIDE_INT selected_address; + Mfix * new_fix; + HOST_WIDE_INT max_count = max_address - fix->address; + rtx label = gen_label_rtx (); + + selected_cost = arm_barrier_cost (from); + selected_address = fix->address; + + while (from && count < max_count) + { + rtx tmp; + int new_cost; + + /* This code shouldn't have been called if there was a natural barrier + within range. */ + gcc_assert (!BARRIER_P (from)); + + /* Count the length of this insn. This must stay in sync with the + code that pushes minipool fixes. */ + if (LABEL_P (from)) + count += get_label_padding (from); + else + count += get_attr_length (from); + + /* If there is a jump table, add its length. */ + if (tablejump_p (from, NULL, &tmp)) + { + count += get_jump_table_size (tmp); + + /* Jump tables aren't in a basic block, so base the cost on + the dispatch insn. If we select this location, we will + still put the pool after the table. */ + new_cost = arm_barrier_cost (from); + + if (count < max_count + && (!selected || new_cost <= selected_cost)) + { + selected = tmp; + selected_cost = new_cost; + selected_address = fix->address + count; + } + + /* Continue after the dispatch table. */ + from = NEXT_INSN (tmp); + continue; + } + + new_cost = arm_barrier_cost (from); + + if (count < max_count + && (!selected || new_cost <= selected_cost)) + { + selected = from; + selected_cost = new_cost; + selected_address = fix->address + count; + } + + from = NEXT_INSN (from); + } + + /* Make sure that we found a place to insert the jump. */ + gcc_assert (selected); + + /* Make sure we do not split a call and its corresponding + CALL_ARG_LOCATION note. */ + if (CALL_P (selected)) + { + rtx next = NEXT_INSN (selected); + if (next && NOTE_P (next) + && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION) + selected = next; + } + + /* Create a new JUMP_INSN that branches around a barrier. */ + from = emit_jump_insn_after (gen_jump (label), selected); + JUMP_LABEL (from) = label; + barrier = emit_barrier_after (from); + emit_label_after (label, barrier); + + /* Create a minipool barrier entry for the new barrier. */ + new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix)); + new_fix->insn = barrier; + new_fix->address = selected_address; + new_fix->next = fix->next; + fix->next = new_fix; + + return new_fix; +} + +/* Record that there is a natural barrier in the insn stream at + ADDRESS. */ +static void +push_minipool_barrier (rtx insn, HOST_WIDE_INT address) +{ + Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); + + fix->insn = insn; + fix->address = address; + + fix->next = NULL; + if (minipool_fix_head != NULL) + minipool_fix_tail->next = fix; + else + minipool_fix_head = fix; + + minipool_fix_tail = fix; +} + +/* Record INSN, which will need fixing up to load a value from the + minipool. ADDRESS is the offset of the insn since the start of the + function; LOC is a pointer to the part of the insn which requires + fixing; VALUE is the constant that must be loaded, which is of type + MODE. */ +static void +push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc, + enum machine_mode mode, rtx value) +{ + Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix)); + + fix->insn = insn; + fix->address = address; + fix->loc = loc; + fix->mode = mode; + fix->fix_size = MINIPOOL_FIX_SIZE (mode); + fix->value = value; + fix->forwards = get_attr_pool_range (insn); + fix->backwards = get_attr_neg_pool_range (insn); + fix->minipool = NULL; + + /* If an insn doesn't have a range defined for it, then it isn't + expecting to be reworked by this code. Better to stop now than + to generate duff assembly code. */ + gcc_assert (fix->forwards || fix->backwards); + + /* If an entry requires 8-byte alignment then assume all constant pools + require 4 bytes of padding. Trying to do this later on a per-pool + basis is awkward because existing pool entries have to be modified. */ + if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8) + minipool_pad = 4; + + if (dump_file) + { + fprintf (dump_file, + ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ", + GET_MODE_NAME (mode), + INSN_UID (insn), (unsigned long) address, + -1 * (long)fix->backwards, (long)fix->forwards); + arm_print_value (dump_file, fix->value); + fprintf (dump_file, "\n"); + } + + /* Add it to the chain of fixes. */ + fix->next = NULL; + + if (minipool_fix_head != NULL) + minipool_fix_tail->next = fix; + else + minipool_fix_head = fix; + + minipool_fix_tail = fix; +} + +/* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline. + Returns the number of insns needed, or 99 if we always want to synthesize + the value. */ +int +arm_max_const_double_inline_cost () +{ + /* Let the value get synthesized to avoid the use of literal pools. */ + if (arm_disable_literal_pool) + return 99; + + return ((optimize_size || arm_ld_sched) ? 3 : 4); +} + +/* Return the cost of synthesizing a 64-bit constant VAL inline. + Returns the number of insns needed, or 99 if we don't know how to + do it. */ +int +arm_const_double_inline_cost (rtx val) +{ + rtx lowpart, highpart; + enum machine_mode mode; + + mode = GET_MODE (val); + + if (mode == VOIDmode) + mode = DImode; + + gcc_assert (GET_MODE_SIZE (mode) == 8); + + lowpart = gen_lowpart (SImode, val); + highpart = gen_highpart_mode (SImode, mode, val); + + gcc_assert (CONST_INT_P (lowpart)); + gcc_assert (CONST_INT_P (highpart)); + + return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart), + NULL_RTX, NULL_RTX, 0, 0) + + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart), + NULL_RTX, NULL_RTX, 0, 0)); +} + +/* Return true if it is worthwhile to split a 64-bit constant into two + 32-bit operations. This is the case if optimizing for size, or + if we have load delay slots, or if one 32-bit part can be done with + a single data operation. */ +bool +arm_const_double_by_parts (rtx val) +{ + enum machine_mode mode = GET_MODE (val); + rtx part; + + if (optimize_size || arm_ld_sched) + return true; + + if (mode == VOIDmode) + mode = DImode; + + part = gen_highpart_mode (SImode, mode, val); + + gcc_assert (CONST_INT_P (part)); + + if (const_ok_for_arm (INTVAL (part)) + || const_ok_for_arm (~INTVAL (part))) + return true; + + part = gen_lowpart (SImode, val); + + gcc_assert (CONST_INT_P (part)); + + if (const_ok_for_arm (INTVAL (part)) + || const_ok_for_arm (~INTVAL (part))) + return true; + + return false; +} + +/* Return true if it is possible to inline both the high and low parts + of a 64-bit constant into 32-bit data processing instructions. */ +bool +arm_const_double_by_immediates (rtx val) +{ + enum machine_mode mode = GET_MODE (val); + rtx part; + + if (mode == VOIDmode) + mode = DImode; + + part = gen_highpart_mode (SImode, mode, val); + + gcc_assert (CONST_INT_P (part)); + + if (!const_ok_for_arm (INTVAL (part))) + return false; + + part = gen_lowpart (SImode, val); + + gcc_assert (CONST_INT_P (part)); + + if (!const_ok_for_arm (INTVAL (part))) + return false; + + return true; +} + +/* Scan INSN and note any of its operands that need fixing. + If DO_PUSHES is false we do not actually push any of the fixups + needed. */ +static void +note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes) +{ + int opno; + + extract_insn (insn); + + if (!constrain_operands (1)) + fatal_insn_not_found (insn); + + if (recog_data.n_alternatives == 0) + return; + + /* Fill in recog_op_alt with information about the constraints of + this insn. */ + preprocess_constraints (); + + for (opno = 0; opno < recog_data.n_operands; opno++) + { + /* Things we need to fix can only occur in inputs. */ + if (recog_data.operand_type[opno] != OP_IN) + continue; + + /* If this alternative is a memory reference, then any mention + of constants in this alternative is really to fool reload + into allowing us to accept one there. We need to fix them up + now so that we output the right code. */ + if (recog_op_alt[opno][which_alternative].memory_ok) + { + rtx op = recog_data.operand[opno]; + + if (CONSTANT_P (op)) + { + if (do_pushes) + push_minipool_fix (insn, address, recog_data.operand_loc[opno], + recog_data.operand_mode[opno], op); + } + else if (MEM_P (op) + && GET_CODE (XEXP (op, 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0))) + { + if (do_pushes) + { + rtx cop = avoid_constant_pool_reference (op); + + /* Casting the address of something to a mode narrower + than a word can cause avoid_constant_pool_reference() + to return the pool reference itself. That's no good to + us here. Lets just hope that we can use the + constant pool value directly. */ + if (op == cop) + cop = get_pool_constant (XEXP (op, 0)); + + push_minipool_fix (insn, address, + recog_data.operand_loc[opno], + recog_data.operand_mode[opno], cop); + } + + } + } + } + + return; +} + +/* Rewrite move insn into subtract of 0 if the condition codes will + be useful in next conditional jump insn. */ + +static void +thumb1_reorg (void) +{ + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + rtx dest, src; + rtx pat, op0, set = NULL; + rtx prev, insn = BB_END (bb); + bool insn_clobbered = false; + + while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn)) + insn = PREV_INSN (insn); + + /* Find the last cbranchsi4_insn in basic block BB. */ + if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) + continue; + + /* Get the register with which we are comparing. */ + pat = PATTERN (insn); + op0 = XEXP (XEXP (SET_SRC (pat), 0), 0); + + /* Find the first flag setting insn before INSN in basic block BB. */ + gcc_assert (insn != BB_HEAD (bb)); + for (prev = PREV_INSN (insn); + (!insn_clobbered + && prev != BB_HEAD (bb) + && (NOTE_P (prev) + || DEBUG_INSN_P (prev) + || ((set = single_set (prev)) != NULL + && get_attr_conds (prev) == CONDS_NOCOND))); + prev = PREV_INSN (prev)) + { + if (reg_set_p (op0, prev)) + insn_clobbered = true; + } + + /* Skip if op0 is clobbered by insn other than prev. */ + if (insn_clobbered) + continue; + + if (!set) + continue; + + dest = SET_DEST (set); + src = SET_SRC (set); + if (!low_register_operand (dest, SImode) + || !low_register_operand (src, SImode)) + continue; + + /* Rewrite move into subtract of 0 if its operand is compared with ZERO + in INSN. Both src and dest of the move insn are checked. */ + if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest)) + { + dest = copy_rtx (dest); + src = copy_rtx (src); + src = gen_rtx_MINUS (SImode, src, const0_rtx); + PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src); + INSN_CODE (prev) = -1; + /* Set test register in INSN to dest. */ + XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest); + INSN_CODE (insn) = -1; + } + } +} + +/* Convert instructions to their cc-clobbering variant if possible, since + that allows us to use smaller encodings. */ + +static void +thumb2_reorg (void) +{ + basic_block bb; + regset_head live; + + INIT_REG_SET (&live); + + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + df_analyze (); + + FOR_EACH_BB_FN (bb, cfun) + { + rtx insn; + + COPY_REG_SET (&live, DF_LR_OUT (bb)); + df_simulate_initialize_backwards (bb, &live); + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (NONJUMP_INSN_P (insn) + && !REGNO_REG_SET_P (&live, CC_REGNUM) + && GET_CODE (PATTERN (insn)) == SET) + { + enum {SKIP, CONV, SWAP_CONV} action = SKIP; + rtx pat = PATTERN (insn); + rtx dst = XEXP (pat, 0); + rtx src = XEXP (pat, 1); + rtx op0 = NULL_RTX, op1 = NULL_RTX; + + if (!OBJECT_P (src)) + op0 = XEXP (src, 0); + + if (BINARY_P (src)) + op1 = XEXP (src, 1); + + if (low_register_operand (dst, SImode)) + { + switch (GET_CODE (src)) + { + case PLUS: + /* Adding two registers and storing the result + in the first source is already a 16-bit + operation. */ + if (rtx_equal_p (dst, op0) + && register_operand (op1, SImode)) + break; + + if (low_register_operand (op0, SImode)) + { + /* ADDS ,, */ + if (low_register_operand (op1, SImode)) + action = CONV; + /* ADDS ,# */ + /* SUBS ,# */ + else if (rtx_equal_p (dst, op0) + && CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), -255, 255)) + action = CONV; + /* ADDS ,,# */ + /* SUBS ,,# */ + else if (CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), -7, 7)) + action = CONV; + } + /* ADCS , */ + else if (GET_CODE (XEXP (src, 0)) == PLUS + && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst) + && low_register_operand (XEXP (XEXP (src, 0), 1), + SImode) + && COMPARISON_P (op1) + && cc_register (XEXP (op1, 0), VOIDmode) + && maybe_get_arm_condition_code (op1) == ARM_CS + && XEXP (op1, 1) == const0_rtx) + action = CONV; + break; + + case MINUS: + /* RSBS ,,#0 + Not handled here: see NEG below. */ + /* SUBS ,,# + SUBS ,# + Not handled here: see PLUS above. */ + /* SUBS ,, */ + if (low_register_operand (op0, SImode) + && low_register_operand (op1, SImode)) + action = CONV; + break; + + case MULT: + /* MULS ,, + As an exception to the rule, this is only used + when optimizing for size since MULS is slow on all + known implementations. We do not even want to use + MULS in cold code, if optimizing for speed, so we + test the global flag here. */ + if (!optimize_size) + break; + /* else fall through. */ + case AND: + case IOR: + case XOR: + /* ANDS , */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) + action = CONV; + else if (rtx_equal_p (dst, op1) + && low_register_operand (op0, SImode)) + action = SWAP_CONV; + break; + + case ASHIFTRT: + case ASHIFT: + case LSHIFTRT: + /* ASRS , */ + /* LSRS , */ + /* LSLS , */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) + action = CONV; + /* ASRS ,,# */ + /* LSRS ,,# */ + /* LSLS ,,# */ + else if (low_register_operand (op0, SImode) + && CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), 0, 31)) + action = CONV; + break; + + case ROTATERT: + /* RORS , */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) + action = CONV; + break; + + case NOT: + case NEG: + /* MVNS , */ + /* NEGS , (a.k.a RSBS) */ + if (low_register_operand (op0, SImode)) + action = CONV; + break; + + case CONST_INT: + /* MOVS ,# */ + if (CONST_INT_P (src) + && IN_RANGE (INTVAL (src), 0, 255)) + action = CONV; + break; + + case REG: + /* MOVS and MOV with registers have different + encodings, so are not relevant here. */ + break; + + default: + break; + } + } + + if (action != SKIP) + { + rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); + rtvec vec; + + if (action == SWAP_CONV) + { + src = copy_rtx (src); + XEXP (src, 0) = op1; + XEXP (src, 1) = op0; + pat = gen_rtx_SET (VOIDmode, dst, src); + vec = gen_rtvec (2, pat, clobber); + } + else /* action == CONV */ + vec = gen_rtvec (2, pat, clobber); + + PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); + INSN_CODE (insn) = -1; + } + } + + if (NONDEBUG_INSN_P (insn)) + df_simulate_one_insn_backwards (bb, insn, &live); + } + } + + CLEAR_REG_SET (&live); +} + +/* Gcc puts the pool in the wrong place for ARM, since we can only + load addresses a limited distance around the pc. We do some + special munging to move the constant pool values to the correct + point in the code. */ +static void +arm_reorg (void) +{ + rtx insn; + HOST_WIDE_INT address = 0; + Mfix * fix; + + if (TARGET_THUMB1) + thumb1_reorg (); + else if (TARGET_THUMB2) + thumb2_reorg (); + + /* Ensure all insns that must be split have been split at this point. + Otherwise, the pool placement code below may compute incorrect + insn lengths. Note that when optimizing, all insns have already + been split at this point. */ + if (!optimize) + split_all_insns_noflow (); + + minipool_fix_head = minipool_fix_tail = NULL; + + /* The first insn must always be a note, or the code below won't + scan it properly. */ + insn = get_insns (); + gcc_assert (NOTE_P (insn)); + minipool_pad = 0; + + /* Scan all the insns and record the operands that will need fixing. */ + for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn)) + { + if (BARRIER_P (insn)) + push_minipool_barrier (insn, address); + else if (INSN_P (insn)) + { + rtx table; + + note_invalid_constants (insn, address, true); + address += get_attr_length (insn); + + /* If the insn is a vector jump, add the size of the table + and skip the table. */ + if (tablejump_p (insn, NULL, &table)) + { + address += get_jump_table_size (table); + insn = table; + } + } + else if (LABEL_P (insn)) + /* Add the worst-case padding due to alignment. We don't add + the _current_ padding because the minipool insertions + themselves might change it. */ + address += get_label_padding (insn); + } + + fix = minipool_fix_head; + + /* Now scan the fixups and perform the required changes. */ + while (fix) + { + Mfix * ftmp; + Mfix * fdel; + Mfix * last_added_fix; + Mfix * last_barrier = NULL; + Mfix * this_fix; + + /* Skip any further barriers before the next fix. */ + while (fix && BARRIER_P (fix->insn)) + fix = fix->next; + + /* No more fixes. */ + if (fix == NULL) + break; + + last_added_fix = NULL; + + for (ftmp = fix; ftmp; ftmp = ftmp->next) + { + if (BARRIER_P (ftmp->insn)) + { + if (ftmp->address >= minipool_vector_head->max_address) + break; + + last_barrier = ftmp; + } + else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL) + break; + + last_added_fix = ftmp; /* Keep track of the last fix added. */ + } + + /* If we found a barrier, drop back to that; any fixes that we + could have reached but come after the barrier will now go in + the next mini-pool. */ + if (last_barrier != NULL) + { + /* Reduce the refcount for those fixes that won't go into this + pool after all. */ + for (fdel = last_barrier->next; + fdel && fdel != ftmp; + fdel = fdel->next) + { + fdel->minipool->refcount--; + fdel->minipool = NULL; + } + + ftmp = last_barrier; + } + else + { + /* ftmp is first fix that we can't fit into this pool and + there no natural barriers that we could use. Insert a + new barrier in the code somewhere between the previous + fix and this one, and arrange to jump around it. */ + HOST_WIDE_INT max_address; + + /* The last item on the list of fixes must be a barrier, so + we can never run off the end of the list of fixes without + last_barrier being set. */ + gcc_assert (ftmp); + + max_address = minipool_vector_head->max_address; + /* Check that there isn't another fix that is in range that + we couldn't fit into this pool because the pool was + already too large: we need to put the pool before such an + instruction. The pool itself may come just after the + fix because create_fix_barrier also allows space for a + jump instruction. */ + if (ftmp->address < max_address) + max_address = ftmp->address + 1; + + last_barrier = create_fix_barrier (last_added_fix, max_address); + } + + assign_minipool_offsets (last_barrier); + + while (ftmp) + { + if (!BARRIER_P (ftmp->insn) + && ((ftmp->minipool = add_minipool_backward_ref (ftmp)) + == NULL)) + break; + + ftmp = ftmp->next; + } + + /* Scan over the fixes we have identified for this pool, fixing them + up and adding the constants to the pool itself. */ + for (this_fix = fix; this_fix && ftmp != this_fix; + this_fix = this_fix->next) + if (!BARRIER_P (this_fix->insn)) + { + rtx addr + = plus_constant (Pmode, + gen_rtx_LABEL_REF (VOIDmode, + minipool_vector_label), + this_fix->minipool->offset); + *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr); + } + + dump_minipool (last_barrier->insn); + fix = ftmp; + } + + /* From now on we must synthesize any constants that we can't handle + directly. This can happen if the RTL gets split during final + instruction generation. */ + after_arm_reorg = 1; + + /* Free the minipool memory. */ + obstack_free (&minipool_obstack, minipool_startobj); +} + +/* Routines to output assembly language. */ + +/* If the rtx is the correct value then return the string of the number. + In this way we can ensure that valid double constants are generated even + when cross compiling. */ +const char * +fp_immediate_constant (rtx x) +{ + REAL_VALUE_TYPE r; + + if (!fp_consts_inited) + init_fp_table (); + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + + gcc_assert (REAL_VALUES_EQUAL (r, value_fp0)); + return "0"; +} + +/* As for fp_immediate_constant, but value is passed directly, not in rtx. */ +static const char * +fp_const_from_val (REAL_VALUE_TYPE *r) +{ + if (!fp_consts_inited) + init_fp_table (); + + gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0)); + return "0"; +} + +/* OPERANDS[0] is the entire list of insns that constitute pop, + OPERANDS[1] is the base register, RETURN_PC is true iff return insn + is in the list, UPDATE is true iff the list contains explicit + update of base register. */ +void +arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse, + bool update) +{ + int i; + char pattern[100]; + int offset; + const char *conditional; + int num_saves = XVECLEN (operands[0], 0); + unsigned int regno; + unsigned int regno_base = REGNO (operands[1]); + + offset = 0; + offset += update ? 1 : 0; + offset += return_pc ? 1 : 0; + + /* Is the base register in the list? */ + for (i = offset; i < num_saves; i++) + { + regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0)); + /* If SP is in the list, then the base register must be SP. */ + gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM)); + /* If base register is in the list, there must be no explicit update. */ + if (regno == regno_base) + gcc_assert (!update); + } + + conditional = reverse ? "%?%D0" : "%?%d0"; + if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM) + { + /* Output pop (not stmfd) because it has a shorter encoding. */ + gcc_assert (update); + sprintf (pattern, "pop%s\t{", conditional); + } + else + { + /* Output ldmfd when the base register is SP, otherwise output ldmia. + It's just a convention, their semantics are identical. */ + if (regno_base == SP_REGNUM) + sprintf (pattern, "ldm%sfd\t", conditional); + else if (TARGET_UNIFIED_ASM) + sprintf (pattern, "ldmia%s\t", conditional); + else + sprintf (pattern, "ldm%sia\t", conditional); + + strcat (pattern, reg_names[regno_base]); + if (update) + strcat (pattern, "!, {"); + else + strcat (pattern, ", {"); + } + + /* Output the first destination register. */ + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]); + + /* Output the rest of the destination registers. */ + for (i = offset + 1; i < num_saves; i++) + { + strcat (pattern, ", "); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]); + } + + strcat (pattern, "}"); + + if (IS_INTERRUPT (arm_current_func_type ()) && return_pc) + strcat (pattern, "^"); + + output_asm_insn (pattern, &cond); +} + + +/* Output the assembly for a store multiple. */ + +const char * +vfp_output_fstmd (rtx * operands) +{ + char pattern[100]; + int p; + int base; + int i; + + strcpy (pattern, "fstmfdd%?\t%m0!, {%P1"); + p = strlen (pattern); + + gcc_assert (REG_P (operands[1])); + + base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2; + for (i = 1; i < XVECLEN (operands[2], 0); i++) + { + p += sprintf (&pattern[p], ", d%d", base + i); + } + strcpy (&pattern[p], "}"); + + output_asm_insn (pattern, operands); + return ""; +} + + +/* Emit RTL to save block of VFP register pairs to the stack. Returns the + number of bytes pushed. */ + +static int +vfp_emit_fstmd (int base_reg, int count) +{ + rtx par; + rtx dwarf; + rtx tmp, reg; + int i; + + /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two + register pairs are stored by a store multiple insn. We avoid this + by pushing an extra pair. */ + if (count == 2 && !arm_arch6) + { + if (base_reg == LAST_VFP_REGNUM - 3) + base_reg -= 2; + count++; + } + + /* FSTMD may not store more than 16 doubleword registers at once. Split + larger stores into multiple parts (up to a maximum of two, in + practice). */ + if (count > 16) + { + int saved; + /* NOTE: base_reg is an internal register number, so each D register + counts as 2. */ + saved = vfp_emit_fstmd (base_reg + 32, count - 16); + saved += vfp_emit_fstmd (base_reg, 16); + return saved; + } + + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count)); + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1)); + + reg = gen_rtx_REG (DFmode, base_reg); + base_reg += 2; + + XVECEXP (par, 0, 0) + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (BLKmode, + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + plus_constant + (Pmode, stack_pointer_rtx, + - (count * 8))) + ), + gen_rtx_UNSPEC (BLKmode, + gen_rtvec (1, reg), + UNSPEC_PUSH_MULT)); + + tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -(count * 8))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (DFmode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 1) = tmp; + + for (i = 1; i < count; i++) + { + reg = gen_rtx_REG (DFmode, base_reg); + base_reg += 2; + XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg); + + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (DFmode, + plus_constant (Pmode, + stack_pointer_rtx, + i * 8)), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, i + 1) = tmp; + } + + par = emit_insn (par); + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (par) = 1; + + return count * 8; +} + +/* Emit a call instruction with pattern PAT. ADDR is the address of + the call target. */ + +void +arm_emit_call_insn (rtx pat, rtx addr) +{ + rtx insn; + + insn = emit_call_insn (pat); + + /* The PIC register is live on entry to VxWorks PIC PLT entries. + If the call might use such an entry, add a use of the PIC register + to the instruction's CALL_INSN_FUNCTION_USAGE. */ + if (TARGET_VXWORKS_RTP + && flag_pic + && GET_CODE (addr) == SYMBOL_REF + && (SYMBOL_REF_DECL (addr) + ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) + : !SYMBOL_REF_LOCAL_P (addr))) + { + require_pic_register (); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg); + } +} + +/* Output a 'call' insn. */ +const char * +output_call (rtx *operands) +{ + gcc_assert (!arm_arch5); /* Patterns should call blx directly. */ + + /* Handle calls to lr using ip (which may be clobbered in subr anyway). */ + if (REGNO (operands[0]) == LR_REGNUM) + { + operands[0] = gen_rtx_REG (SImode, IP_REGNUM); + output_asm_insn ("mov%?\t%0, %|lr", operands); + } + + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + + if (TARGET_INTERWORK || arm_arch4t) + output_asm_insn ("bx%?\t%0", operands); + else + output_asm_insn ("mov%?\t%|pc, %0", operands); + + return ""; +} + +/* Output a 'call' insn that is a reference in memory. This is + disabled for ARMv5 and we prefer a blx instead because otherwise + there's a significant performance overhead. */ +const char * +output_call_mem (rtx *operands) +{ + gcc_assert (!arm_arch5); + if (TARGET_INTERWORK) + { + output_asm_insn ("ldr%?\t%|ip, %0", operands); + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + output_asm_insn ("bx%?\t%|ip", operands); + } + else if (regno_use_in (LR_REGNUM, operands[0])) + { + /* LR is used in the memory address. We load the address in the + first instruction. It's safe to use IP as the target of the + load since the call will kill it anyway. */ + output_asm_insn ("ldr%?\t%|ip, %0", operands); + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + if (arm_arch4t) + output_asm_insn ("bx%?\t%|ip", operands); + else + output_asm_insn ("mov%?\t%|pc, %|ip", operands); + } + else + { + output_asm_insn ("mov%?\t%|lr, %|pc", operands); + output_asm_insn ("ldr%?\t%|pc, %0", operands); + } + + return ""; +} + + +/* Output a move from arm registers to arm registers of a long double + OPERANDS[0] is the destination. + OPERANDS[1] is the source. */ +const char * +output_mov_long_double_arm_from_arm (rtx *operands) +{ + /* We have to be careful here because the two might overlap. */ + int dest_start = REGNO (operands[0]); + int src_start = REGNO (operands[1]); + rtx ops[2]; + int i; + + if (dest_start < src_start) + { + for (i = 0; i < 3; i++) + { + ops[0] = gen_rtx_REG (SImode, dest_start + i); + ops[1] = gen_rtx_REG (SImode, src_start + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + else + { + for (i = 2; i >= 0; i--) + { + ops[0] = gen_rtx_REG (SImode, dest_start + i); + ops[1] = gen_rtx_REG (SImode, src_start + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + + return ""; +} + +void +arm_emit_movpair (rtx dest, rtx src) + { + /* If the src is an immediate, simplify it. */ + if (CONST_INT_P (src)) + { + HOST_WIDE_INT val = INTVAL (src); + emit_set_insn (dest, GEN_INT (val & 0x0000ffff)); + if ((val >> 16) & 0x0000ffff) + emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16), + GEN_INT (16)), + GEN_INT ((val >> 16) & 0x0000ffff)); + return; + } + emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); + emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); + } + +/* Output a move between double words. It must be REG<-MEM + or MEM<-REG. */ +const char * +output_move_double (rtx *operands, bool emit, int *count) +{ + enum rtx_code code0 = GET_CODE (operands[0]); + enum rtx_code code1 = GET_CODE (operands[1]); + rtx otherops[3]; + if (count) + *count = 1; + + /* The only case when this might happen is when + you are looking at the length of a DImode instruction + that has an invalid constant in it. */ + if (code0 == REG && code1 != MEM) + { + gcc_assert (!emit); + *count = 2; + return ""; + } + + if (code0 == REG) + { + unsigned int reg0 = REGNO (operands[0]); + + otherops[0] = gen_rtx_REG (SImode, 1 + reg0); + + gcc_assert (code1 == MEM); /* Constraints should ensure this. */ + + switch (GET_CODE (XEXP (operands[1], 0))) + { + case REG: + + if (emit) + { + if (TARGET_LDRD + && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0)))) + output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands); + else + output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); + } + break; + + case PRE_INC: + gcc_assert (TARGET_LDRD); + if (emit) + output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands); + break; + + case PRE_DEC: + if (emit) + { + if (TARGET_LDRD) + output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands); + else + output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands); + } + break; + + case POST_INC: + if (emit) + { + if (TARGET_LDRD) + output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands); + else + output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands); + } + break; + + case POST_DEC: + gcc_assert (TARGET_LDRD); + if (emit) + output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands); + break; + + case PRE_MODIFY: + case POST_MODIFY: + /* Autoicrement addressing modes should never have overlapping + base and destination registers, and overlapping index registers + are already prohibited, so this doesn't need to worry about + fix_cm3_ldrd. */ + otherops[0] = operands[0]; + otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0); + otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1); + + if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY) + { + if (reg_overlap_mentioned_p (otherops[0], otherops[2])) + { + /* Registers overlap so split out the increment. */ + if (emit) + { + output_asm_insn ("add%?\t%1, %1, %2", otherops); + output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops); + } + if (count) + *count = 2; + } + else + { + /* Use a single insn if we can. + FIXME: IWMMXT allows offsets larger than ldrd can + handle, fix these up with a pair of ldr. */ + if (TARGET_THUMB2 + || !CONST_INT_P (otherops[2]) + || (INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256)) + { + if (emit) + output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops); + } + else + { + if (emit) + { + output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops); + output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); + } + if (count) + *count = 2; + + } + } + } + else + { + /* Use a single insn if we can. + FIXME: IWMMXT allows offsets larger than ldrd can handle, + fix these up with a pair of ldr. */ + if (TARGET_THUMB2 + || !CONST_INT_P (otherops[2]) + || (INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256)) + { + if (emit) + output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops); + } + else + { + if (emit) + { + output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); + output_asm_insn ("ldr%?\t%0, [%1], %2", otherops); + } + if (count) + *count = 2; + } + } + break; + + case LABEL_REF: + case CONST: + /* We might be able to use ldrd %0, %1 here. However the range is + different to ldr/adr, and it is broken on some ARMv7-M + implementations. */ + /* Use the second register of the pair to avoid problematic + overlap. */ + otherops[1] = operands[1]; + if (emit) + output_asm_insn ("adr%?\t%0, %1", otherops); + operands[1] = otherops[0]; + if (emit) + { + if (TARGET_LDRD) + output_asm_insn ("ldr%(d%)\t%0, [%1]", operands); + else + output_asm_insn ("ldm%(ia%)\t%1, %M0", operands); + } + + if (count) + *count = 2; + break; + + /* ??? This needs checking for thumb2. */ + default: + if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1), + GET_MODE (XEXP (XEXP (operands[1], 0), 1)))) + { + otherops[0] = operands[0]; + otherops[1] = XEXP (XEXP (operands[1], 0), 0); + otherops[2] = XEXP (XEXP (operands[1], 0), 1); + + if (GET_CODE (XEXP (operands[1], 0)) == PLUS) + { + if (CONST_INT_P (otherops[2]) && !TARGET_LDRD) + { + switch ((int) INTVAL (otherops[2])) + { + case -8: + if (emit) + output_asm_insn ("ldm%(db%)\t%1, %M0", otherops); + return ""; + case -4: + if (TARGET_THUMB2) + break; + if (emit) + output_asm_insn ("ldm%(da%)\t%1, %M0", otherops); + return ""; + case 4: + if (TARGET_THUMB2) + break; + if (emit) + output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops); + return ""; + } + } + otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1); + operands[1] = otherops[0]; + if (TARGET_LDRD + && (REG_P (otherops[2]) + || TARGET_THUMB2 + || (CONST_INT_P (otherops[2]) + && INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256))) + { + if (reg_overlap_mentioned_p (operands[0], + otherops[2])) + { + rtx tmp; + /* Swap base and index registers over to + avoid a conflict. */ + tmp = otherops[1]; + otherops[1] = otherops[2]; + otherops[2] = tmp; + } + /* If both registers conflict, it will usually + have been fixed by a splitter. */ + if (reg_overlap_mentioned_p (operands[0], otherops[2]) + || (fix_cm3_ldrd && reg0 == REGNO (otherops[1]))) + { + if (emit) + { + output_asm_insn ("add%?\t%0, %1, %2", otherops); + output_asm_insn ("ldr%(d%)\t%0, [%1]", operands); + } + if (count) + *count = 2; + } + else + { + otherops[0] = operands[0]; + if (emit) + output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops); + } + return ""; + } + + if (CONST_INT_P (otherops[2])) + { + if (emit) + { + if (!(const_ok_for_arm (INTVAL (otherops[2])))) + output_asm_insn ("sub%?\t%0, %1, #%n2", otherops); + else + output_asm_insn ("add%?\t%0, %1, %2", otherops); + } + } + else + { + if (emit) + output_asm_insn ("add%?\t%0, %1, %2", otherops); + } + } + else + { + if (emit) + output_asm_insn ("sub%?\t%0, %1, %2", otherops); + } + + if (count) + *count = 2; + + if (TARGET_LDRD) + return "ldr%(d%)\t%0, [%1]"; + + return "ldm%(ia%)\t%1, %M0"; + } + else + { + otherops[1] = adjust_address (operands[1], SImode, 4); + /* Take care of overlapping base/data reg. */ + if (reg_mentioned_p (operands[0], operands[1])) + { + if (emit) + { + output_asm_insn ("ldr%?\t%0, %1", otherops); + output_asm_insn ("ldr%?\t%0, %1", operands); + } + if (count) + *count = 2; + + } + else + { + if (emit) + { + output_asm_insn ("ldr%?\t%0, %1", operands); + output_asm_insn ("ldr%?\t%0, %1", otherops); + } + if (count) + *count = 2; + } + } + } + } + else + { + /* Constraints should ensure this. */ + gcc_assert (code0 == MEM && code1 == REG); + gcc_assert ((REGNO (operands[1]) != IP_REGNUM) + || (TARGET_ARM && TARGET_LDRD)); + + switch (GET_CODE (XEXP (operands[0], 0))) + { + case REG: + if (emit) + { + if (TARGET_LDRD) + output_asm_insn ("str%(d%)\t%1, [%m0]", operands); + else + output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); + } + break; + + case PRE_INC: + gcc_assert (TARGET_LDRD); + if (emit) + output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands); + break; + + case PRE_DEC: + if (emit) + { + if (TARGET_LDRD) + output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands); + else + output_asm_insn ("stm%(db%)\t%m0!, %M1", operands); + } + break; + + case POST_INC: + if (emit) + { + if (TARGET_LDRD) + output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands); + else + output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands); + } + break; + + case POST_DEC: + gcc_assert (TARGET_LDRD); + if (emit) + output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands); + break; + + case PRE_MODIFY: + case POST_MODIFY: + otherops[0] = operands[1]; + otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0); + otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1); + + /* IWMMXT allows offsets larger than ldrd can handle, + fix these up with a pair of ldr. */ + if (!TARGET_THUMB2 + && CONST_INT_P (otherops[2]) + && (INTVAL(otherops[2]) <= -256 + || INTVAL(otherops[2]) >= 256)) + { + if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) + { + if (emit) + { + output_asm_insn ("str%?\t%0, [%1, %2]!", otherops); + output_asm_insn ("str%?\t%H0, [%1, #4]", otherops); + } + if (count) + *count = 2; + } + else + { + if (emit) + { + output_asm_insn ("str%?\t%H0, [%1, #4]", otherops); + output_asm_insn ("str%?\t%0, [%1], %2", otherops); + } + if (count) + *count = 2; + } + } + else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) + { + if (emit) + output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops); + } + else + { + if (emit) + output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops); + } + break; + + case PLUS: + otherops[2] = XEXP (XEXP (operands[0], 0), 1); + if (CONST_INT_P (otherops[2]) && !TARGET_LDRD) + { + switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1))) + { + case -8: + if (emit) + output_asm_insn ("stm%(db%)\t%m0, %M1", operands); + return ""; + + case -4: + if (TARGET_THUMB2) + break; + if (emit) + output_asm_insn ("stm%(da%)\t%m0, %M1", operands); + return ""; + + case 4: + if (TARGET_THUMB2) + break; + if (emit) + output_asm_insn ("stm%(ib%)\t%m0, %M1", operands); + return ""; + } + } + if (TARGET_LDRD + && (REG_P (otherops[2]) + || TARGET_THUMB2 + || (CONST_INT_P (otherops[2]) + && INTVAL (otherops[2]) > -256 + && INTVAL (otherops[2]) < 256))) + { + otherops[0] = operands[1]; + otherops[1] = XEXP (XEXP (operands[0], 0), 0); + if (emit) + output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops); + return ""; + } + /* Fall through */ + + default: + otherops[0] = adjust_address (operands[0], SImode, 4); + otherops[1] = operands[1]; + if (emit) + { + output_asm_insn ("str%?\t%1, %0", operands); + output_asm_insn ("str%?\t%H1, %0", otherops); + } + if (count) + *count = 2; + } + } + + return ""; +} + +/* Output a move, load or store for quad-word vectors in ARM registers. Only + handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */ + +const char * +output_move_quad (rtx *operands) +{ + if (REG_P (operands[0])) + { + /* Load, or reg->reg move. */ + + if (MEM_P (operands[1])) + { + switch (GET_CODE (XEXP (operands[1], 0))) + { + case REG: + output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands); + break; + + case LABEL_REF: + case CONST: + output_asm_insn ("adr%?\t%0, %1", operands); + output_asm_insn ("ldm%(ia%)\t%0, %M0", operands); + break; + + default: + gcc_unreachable (); + } + } + else + { + rtx ops[2]; + int dest, src, i; + + gcc_assert (REG_P (operands[1])); + + dest = REGNO (operands[0]); + src = REGNO (operands[1]); + + /* This seems pretty dumb, but hopefully GCC won't try to do it + very often. */ + if (dest < src) + for (i = 0; i < 4; i++) + { + ops[0] = gen_rtx_REG (SImode, dest + i); + ops[1] = gen_rtx_REG (SImode, src + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + else + for (i = 3; i >= 0; i--) + { + ops[0] = gen_rtx_REG (SImode, dest + i); + ops[1] = gen_rtx_REG (SImode, src + i); + output_asm_insn ("mov%?\t%0, %1", ops); + } + } + } + else + { + gcc_assert (MEM_P (operands[0])); + gcc_assert (REG_P (operands[1])); + gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0])); + + switch (GET_CODE (XEXP (operands[0], 0))) + { + case REG: + output_asm_insn ("stm%(ia%)\t%m0, %M1", operands); + break; + + default: + gcc_unreachable (); + } + } + + return ""; +} + +/* Output a VFP load or store instruction. */ + +const char * +output_move_vfp (rtx *operands) +{ + rtx reg, mem, addr, ops[2]; + int load = REG_P (operands[0]); + int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; + const char *templ; + char buff[50]; + enum machine_mode mode; + + reg = operands[!load]; + mem = operands[load]; + + mode = GET_MODE (reg); + + gcc_assert (REG_P (reg)); + gcc_assert (IS_VFP_REGNUM (REGNO (reg))); + gcc_assert (mode == SFmode + || mode == DFmode + || mode == SImode + || mode == DImode + || (TARGET_NEON && VALID_NEON_DREG_MODE (mode))); + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + switch (GET_CODE (addr)) + { + case PRE_DEC: + templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + case POST_INC: + templ = "f%smia%c%%?\t%%0!, {%%%s1}%s"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + default: + templ = "f%s%c%%?\t%%%s0, %%1%s"; + ops[0] = reg; + ops[1] = mem; + break; + } + + sprintf (buff, templ, + load ? "ld" : "st", + dp ? 'd' : 's', + dp ? "P" : "", + integer_p ? "\t%@ int" : ""); + output_asm_insn (buff, ops); + + return ""; +} + +/* Output a Neon double-word or quad-word load or store, or a load + or store for larger structure modes. + + WARNING: The ordering of elements is weird in big-endian mode, + because the EABI requires that vectors stored in memory appear + as though they were stored by a VSTM, as required by the EABI. + GCC RTL defines element ordering based on in-memory order. + This can be different from the architectural ordering of elements + within a NEON register. The intrinsics defined in arm_neon.h use the + NEON register element ordering, not the GCC RTL element ordering. + + For example, the in-memory ordering of a big-endian a quadword + vector with 16-bit elements when stored from register pair {d0,d1} + will be (lowest address first, d0[N] is NEON register element N): + + [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]] + + When necessary, quadword registers (dN, dN+1) are moved to ARM + registers from rN in the order: + + dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2) + + So that STM/LDM can be used on vectors in ARM registers, and the + same memory layout will result as if VSTM/VLDM were used. + + Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where + possible, which allows use of appropriate alignment tags. + Note that the choice of "64" is independent of the actual vector + element size; this size simply ensures that the behavior is + equivalent to VSTM/VLDM in both little-endian and big-endian mode. + + Due to limitations of those instructions, use of VST1.64/VLD1.64 + is not possible if: + - the address contains PRE_DEC, or + - the mode refers to more than 4 double-word registers + + In those cases, it would be possible to replace VSTM/VLDM by a + sequence of instructions; this is not currently implemented since + this is not certain to actually improve performance. */ + +const char * +output_move_neon (rtx *operands) +{ + rtx reg, mem, addr, ops[2]; + int regno, nregs, load = REG_P (operands[0]); + const char *templ; + char buff[50]; + enum machine_mode mode; + + reg = operands[!load]; + mem = operands[load]; + + mode = GET_MODE (reg); + + gcc_assert (REG_P (reg)); + regno = REGNO (reg); + nregs = HARD_REGNO_NREGS (regno, mode) / 2; + gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno) + || NEON_REGNO_OK_FOR_QUAD (regno)); + gcc_assert (VALID_NEON_DREG_MODE (mode) + || VALID_NEON_QREG_MODE (mode) + || VALID_NEON_STRUCT_MODE (mode)); + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (plus (...))). */ + if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) + addr = XEXP (addr, 0); + + switch (GET_CODE (addr)) + { + case POST_INC: + /* We have to use vldm / vstm for too-large modes. */ + if (nregs > 4) + { + templ = "v%smia%%?\t%%0!, %%h1"; + ops[0] = XEXP (addr, 0); + } + else + { + templ = "v%s1.64\t%%h1, %%A0"; + ops[0] = mem; + } + ops[1] = reg; + break; + + case PRE_DEC: + /* We have to use vldm / vstm in this case, since there is no + pre-decrement form of the vld1 / vst1 instructions. */ + templ = "v%smdb%%?\t%%0!, %%h1"; + ops[0] = XEXP (addr, 0); + ops[1] = reg; + break; + + case POST_MODIFY: + /* FIXME: Not currently enabled in neon_vector_mem_operand. */ + gcc_unreachable (); + + case LABEL_REF: + case PLUS: + { + int i; + int overlap = -1; + for (i = 0; i < nregs; i++) + { + /* We're only using DImode here because it's a convenient size. */ + ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i); + ops[1] = adjust_address (mem, DImode, 8 * i); + if (reg_overlap_mentioned_p (ops[0], mem)) + { + gcc_assert (overlap == -1); + overlap = i; + } + else + { + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, ops); + } + } + if (overlap != -1) + { + ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap); + ops[1] = adjust_address (mem, SImode, 8 * overlap); + sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st"); + output_asm_insn (buff, ops); + } + + return ""; + } + + default: + /* We have to use vldm / vstm for too-large modes. */ + if (nregs > 4) + templ = "v%smia%%?\t%%m0, %%h1"; + else + templ = "v%s1.64\t%%h1, %%A0"; + + ops[0] = mem; + ops[1] = reg; + } + + sprintf (buff, templ, load ? "ld" : "st"); + output_asm_insn (buff, ops); + + return ""; +} + +/* Compute and return the length of neon_mov, where is + one of VSTRUCT modes: EI, OI, CI or XI. */ +int +arm_attr_length_move_neon (rtx insn) +{ + rtx reg, mem, addr; + int load; + enum machine_mode mode; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) + { + mode = GET_MODE (recog_data.operand[0]); + switch (mode) + { + case EImode: + case OImode: + return 8; + case CImode: + return 12; + case XImode: + return 16; + default: + gcc_unreachable (); + } + } + + load = REG_P (recog_data.operand[0]); + reg = recog_data.operand[!load]; + mem = recog_data.operand[load]; + + gcc_assert (MEM_P (mem)); + + mode = GET_MODE (reg); + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (plus (...))). */ + if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS) + { + int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; + return insns * 4; + } + else + return 4; +} + +/* Return nonzero if the offset in the address is an immediate. Otherwise, + return zero. */ + +int +arm_address_offset_is_imm (rtx insn) +{ + rtx mem, addr; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0])) + return 0; + + mem = recog_data.operand[0]; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + if (REG_P (addr) + || (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) + && CONST_INT_P (XEXP (addr, 1)))) + return 1; + else + return 0; +} + +/* Output an ADD r, s, #n where n may be too big for one instruction. + If adding zero to one register, output nothing. */ +const char * +output_add_immediate (rtx *operands) +{ + HOST_WIDE_INT n = INTVAL (operands[2]); + + if (n != 0 || REGNO (operands[0]) != REGNO (operands[1])) + { + if (n < 0) + output_multi_immediate (operands, + "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2, + -n); + else + output_multi_immediate (operands, + "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2, + n); + } + + return ""; +} + +/* Output a multiple immediate operation. + OPERANDS is the vector of operands referred to in the output patterns. + INSTR1 is the output pattern to use for the first constant. + INSTR2 is the output pattern to use for subsequent constants. + IMMED_OP is the index of the constant slot in OPERANDS. + N is the constant value. */ +static const char * +output_multi_immediate (rtx *operands, const char *instr1, const char *instr2, + int immed_op, HOST_WIDE_INT n) +{ +#if HOST_BITS_PER_WIDE_INT > 32 + n &= 0xffffffff; +#endif + + if (n == 0) + { + /* Quick and easy output. */ + operands[immed_op] = const0_rtx; + output_asm_insn (instr1, operands); + } + else + { + int i; + const char * instr = instr1; + + /* Note that n is never zero here (which would give no output). */ + for (i = 0; i < 32; i += 2) + { + if (n & (3 << i)) + { + operands[immed_op] = GEN_INT (n & (255 << i)); + output_asm_insn (instr, operands); + instr = instr2; + i += 6; + } + } + } + + return ""; +} + +/* Return the name of a shifter operation. */ +static const char * +arm_shift_nmem(enum rtx_code code) +{ + switch (code) + { + case ASHIFT: + return ARM_LSL_NAME; + + case ASHIFTRT: + return "asr"; + + case LSHIFTRT: + return "lsr"; + + case ROTATERT: + return "ror"; + + default: + abort(); + } +} + +/* Return the appropriate ARM instruction for the operation code. + The returned result should not be overwritten. OP is the rtx of the + operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator + was shifted. */ +const char * +arithmetic_instr (rtx op, int shift_first_arg) +{ + switch (GET_CODE (op)) + { + case PLUS: + return "add"; + + case MINUS: + return shift_first_arg ? "rsb" : "sub"; + + case IOR: + return "orr"; + + case XOR: + return "eor"; + + case AND: + return "and"; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + return arm_shift_nmem(GET_CODE(op)); + + default: + gcc_unreachable (); + } +} + +/* Ensure valid constant shifts and return the appropriate shift mnemonic + for the operation code. The returned result should not be overwritten. + OP is the rtx code of the shift. + On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant + shift. */ +static const char * +shift_op (rtx op, HOST_WIDE_INT *amountp) +{ + const char * mnem; + enum rtx_code code = GET_CODE (op); + + switch (code) + { + case ROTATE: + if (!CONST_INT_P (XEXP (op, 1))) + { + output_operand_lossage ("invalid shift operand"); + return NULL; + } + + code = ROTATERT; + *amountp = 32 - INTVAL (XEXP (op, 1)); + mnem = "ror"; + break; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + case ROTATERT: + mnem = arm_shift_nmem(code); + if (CONST_INT_P (XEXP (op, 1))) + { + *amountp = INTVAL (XEXP (op, 1)); + } + else if (REG_P (XEXP (op, 1))) + { + *amountp = -1; + return mnem; + } + else + { + output_operand_lossage ("invalid shift operand"); + return NULL; + } + break; + + case MULT: + /* We never have to worry about the amount being other than a + power of 2, since this case can never be reloaded from a reg. */ + if (!CONST_INT_P (XEXP (op, 1))) + { + output_operand_lossage ("invalid shift operand"); + return NULL; + } + + *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF; + + /* Amount must be a power of two. */ + if (*amountp & (*amountp - 1)) + { + output_operand_lossage ("invalid shift operand"); + return NULL; + } + + *amountp = int_log2 (*amountp); + return ARM_LSL_NAME; + + default: + output_operand_lossage ("invalid shift operand"); + return NULL; + } + + /* This is not 100% correct, but follows from the desire to merge + multiplication by a power of 2 with the recognizer for a + shift. >=32 is not a valid shift for "lsl", so we must try and + output a shift that produces the correct arithmetical result. + Using lsr #32 is identical except for the fact that the carry bit + is not set correctly if we set the flags; but we never use the + carry bit from such an operation, so we can ignore that. */ + if (code == ROTATERT) + /* Rotate is just modulo 32. */ + *amountp &= 31; + else if (*amountp != (*amountp & 31)) + { + if (code == ASHIFT) + mnem = "lsr"; + *amountp = 32; + } + + /* Shifts of 0 are no-ops. */ + if (*amountp == 0) + return NULL; + + return mnem; +} + +/* Obtain the shift from the POWER of two. */ + +static HOST_WIDE_INT +int_log2 (HOST_WIDE_INT power) +{ + HOST_WIDE_INT shift = 0; + + while ((((HOST_WIDE_INT) 1 << shift) & power) == 0) + { + gcc_assert (shift <= 31); + shift++; + } + + return shift; +} + +/* Output a .ascii pseudo-op, keeping track of lengths. This is + because /bin/as is horribly restrictive. The judgement about + whether or not each character is 'printable' (and can be output as + is) or not (and must be printed with an octal escape) must be made + with reference to the *host* character set -- the situation is + similar to that discussed in the comments above pp_c_char in + c-pretty-print.c. */ + +#define MAX_ASCII_LEN 51 + +void +output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len) +{ + int i; + int len_so_far = 0; + + fputs ("\t.ascii\t\"", stream); + + for (i = 0; i < len; i++) + { + int c = p[i]; + + if (len_so_far >= MAX_ASCII_LEN) + { + fputs ("\"\n\t.ascii\t\"", stream); + len_so_far = 0; + } + + if (ISPRINT (c)) + { + if (c == '\\' || c == '\"') + { + putc ('\\', stream); + len_so_far++; + } + putc (c, stream); + len_so_far++; + } + else + { + fprintf (stream, "\\%03o", c); + len_so_far += 4; + } + } + + fputs ("\"\n", stream); +} + +/* Compute the register save mask for registers 0 through 12 + inclusive. This code is used by arm_compute_save_reg_mask. */ + +static unsigned long +arm_compute_save_reg0_reg12_mask (void) +{ + unsigned long func_type = arm_current_func_type (); + unsigned long save_reg_mask = 0; + unsigned int reg; + + if (IS_INTERRUPT (func_type)) + { + unsigned int max_reg; + /* Interrupt functions must not corrupt any registers, + even call clobbered ones. If this is a leaf function + we can just examine the registers used by the RTL, but + otherwise we have to assume that whatever function is + called might clobber anything, and so we have to save + all the call-clobbered registers as well. */ + if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ) + /* FIQ handlers have registers r8 - r12 banked, so + we only need to check r0 - r7, Normal ISRs only + bank r14 and r15, so we must check up to r12. + r13 is the stack pointer which is always preserved, + so we do not need to consider it here. */ + max_reg = 7; + else + max_reg = 12; + + for (reg = 0; reg <= max_reg; reg++) + if (df_regs_ever_live_p (reg) + || (! crtl->is_leaf && call_used_regs[reg])) + save_reg_mask |= (1 << reg); + + /* Also save the pic base register if necessary. */ + if (flag_pic + && !TARGET_SINGLE_PIC_BASE + && arm_pic_register != INVALID_REGNUM + && crtl->uses_pic_offset_table) + save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; + } + else if (IS_VOLATILE(func_type)) + { + /* For noreturn functions we historically omitted register saves + altogether. However this really messes up debugging. As a + compromise save just the frame pointers. Combined with the link + register saved elsewhere this should be sufficient to get + a backtrace. */ + if (frame_pointer_needed) + save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; + if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM)) + save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; + if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM)) + save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM; + } + else + { + /* In the normal case we only need to save those registers + which are call saved and which are used by this function. */ + for (reg = 0; reg <= 11; reg++) + if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) + save_reg_mask |= (1 << reg); + + /* Handle the frame pointer as a special case. */ + if (frame_pointer_needed) + save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; + + /* If we aren't loading the PIC register, + don't stack it even though it may be live. */ + if (flag_pic + && !TARGET_SINGLE_PIC_BASE + && arm_pic_register != INVALID_REGNUM + && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM) + || crtl->uses_pic_offset_table)) + save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; + + /* The prologue will copy SP into R0, so save it. */ + if (IS_STACKALIGN (func_type)) + save_reg_mask |= 1; + } + + /* Save registers so the exception handler can modify them. */ + if (crtl->calls_eh_return) + { + unsigned int i; + + for (i = 0; ; i++) + { + reg = EH_RETURN_DATA_REGNO (i); + if (reg == INVALID_REGNUM) + break; + save_reg_mask |= 1 << reg; + } + } + + return save_reg_mask; +} + +/* Return true if r3 is live at the start of the function. */ + +static bool +arm_r3_live_at_start_p (void) +{ + /* Just look at cfg info, which is still close enough to correct at this + point. This gives false positives for broken functions that might use + uninitialized data that happens to be allocated in r3, but who cares? */ + return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3); +} + +/* Compute the number of bytes used to store the static chain register on the + stack, above the stack frame. We need to know this accurately to get the + alignment of the rest of the stack frame correct. */ + +static int +arm_compute_static_chain_stack_bytes (void) +{ + /* See the defining assertion in arm_expand_prologue. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM + && IS_NESTED (arm_current_func_type ()) + && arm_r3_live_at_start_p () + && crtl->args.pretend_args_size == 0) + return 4; + + return 0; +} + +/* Compute a bit mask of which registers need to be + saved on the stack for the current function. + This is used by arm_get_frame_offsets, which may add extra registers. */ + +static unsigned long +arm_compute_save_reg_mask (void) +{ + unsigned int save_reg_mask = 0; + unsigned long func_type = arm_current_func_type (); + unsigned int reg; + + if (IS_NAKED (func_type)) + /* This should never really happen. */ + return 0; + + /* If we are creating a stack frame, then we must save the frame pointer, + IP (which will hold the old stack pointer), LR and the PC. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + save_reg_mask |= + (1 << ARM_HARD_FRAME_POINTER_REGNUM) + | (1 << IP_REGNUM) + | (1 << LR_REGNUM) + | (1 << PC_REGNUM); + + save_reg_mask |= arm_compute_save_reg0_reg12_mask (); + + /* Decide if we need to save the link register. + Interrupt routines have their own banked link register, + so they never need to save it. + Otherwise if we do not use the link register we do not need to save + it. If we are pushing other registers onto the stack however, we + can save an instruction in the epilogue by pushing the link register + now and then popping it back into the PC. This incurs extra memory + accesses though, so we only do it when optimizing for size, and only + if we know that we will not need a fancy return sequence. */ + if (df_regs_ever_live_p (LR_REGNUM) + || (save_reg_mask + && optimize_size + && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->calls_eh_return)) + save_reg_mask |= 1 << LR_REGNUM; + + if (cfun->machine->lr_save_eliminated) + save_reg_mask &= ~ (1 << LR_REGNUM); + + if (TARGET_REALLY_IWMMXT + && ((bit_count (save_reg_mask) + + ARM_NUM_INTS (crtl->args.pretend_args_size + + arm_compute_static_chain_stack_bytes()) + ) % 2) != 0) + { + /* The total number of registers that are going to be pushed + onto the stack is odd. We need to ensure that the stack + is 64-bit aligned before we start to save iWMMXt registers, + and also before we start to create locals. (A local variable + might be a double or long long which we will load/store using + an iWMMXt instruction). Therefore we need to push another + ARM register, so that the stack will be 64-bit aligned. We + try to avoid using the arg registers (r0 -r3) as they might be + used to pass values in a tail call. */ + for (reg = 4; reg <= 12; reg++) + if ((save_reg_mask & (1 << reg)) == 0) + break; + + if (reg <= 12) + save_reg_mask |= (1 << reg); + else + { + cfun->machine->sibcall_blocked = 1; + save_reg_mask |= (1 << 3); + } + } + + /* We may need to push an additional register for use initializing the + PIC base register. */ + if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic + && (save_reg_mask & THUMB2_WORK_REGS) == 0) + { + reg = thumb_find_work_register (1 << 4); + if (!call_used_regs[reg]) + save_reg_mask |= (1 << reg); + } + + return save_reg_mask; +} + + +/* Compute a bit mask of which registers need to be + saved on the stack for the current function. */ +static unsigned long +thumb1_compute_save_reg_mask (void) +{ + unsigned long mask; + unsigned reg; + + mask = 0; + for (reg = 0; reg < 12; reg ++) + if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) + mask |= 1 << reg; + + if (flag_pic + && !TARGET_SINGLE_PIC_BASE + && arm_pic_register != INVALID_REGNUM + && crtl->uses_pic_offset_table) + mask |= 1 << PIC_OFFSET_TABLE_REGNUM; + + /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */ + if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) + mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; + + /* LR will also be pushed if any lo regs are pushed. */ + if (mask & 0xff || thumb_force_lr_save ()) + mask |= (1 << LR_REGNUM); + + /* Make sure we have a low work register if we need one. + We will need one if we are going to push a high register, + but we are not currently intending to push a low register. */ + if ((mask & 0xff) == 0 + && ((mask & 0x0f00) || TARGET_BACKTRACE)) + { + /* Use thumb_find_work_register to choose which register + we will use. If the register is live then we will + have to push it. Use LAST_LO_REGNUM as our fallback + choice for the register to select. */ + reg = thumb_find_work_register (1 << LAST_LO_REGNUM); + /* Make sure the register returned by thumb_find_work_register is + not part of the return value. */ + if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ()) + reg = LAST_LO_REGNUM; + + if (! call_used_regs[reg]) + mask |= 1 << reg; + } + + /* The 504 below is 8 bytes less than 512 because there are two possible + alignment words. We can't tell here if they will be present or not so we + have to play it safe and assume that they are. */ + if ((CALLER_INTERWORKING_SLOT_SIZE + + ROUND_UP_WORD (get_frame_size ()) + + crtl->outgoing_args_size) >= 504) + { + /* This is the same as the code in thumb1_expand_prologue() which + determines which register to use for stack decrement. */ + for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++) + if (mask & (1 << reg)) + break; + + if (reg > LAST_LO_REGNUM) + { + /* Make sure we have a register available for stack decrement. */ + mask |= 1 << LAST_LO_REGNUM; + } + } + + return mask; +} + + +/* Return the number of bytes required to save VFP registers. */ +static int +arm_get_vfp_saved_size (void) +{ + unsigned int regno; + int count; + int saved; + + saved = 0; + /* Space for saved VFP registers. */ + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + count = 0; + for (regno = FIRST_VFP_REGNUM; + regno < LAST_VFP_REGNUM; + regno += 2) + { + if ((!df_regs_ever_live_p (regno) || call_used_regs[regno]) + && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1])) + { + if (count > 0) + { + /* Workaround ARM10 VFPr1 bug. */ + if (count == 2 && !arm_arch6) + count++; + saved += count * 8; + } + count = 0; + } + else + count++; + } + if (count > 0) + { + if (count == 2 && !arm_arch6) + count++; + saved += count * 8; + } + } + return saved; +} + + +/* Generate a function exit sequence. If REALLY_RETURN is false, then do + everything bar the final return instruction. If simple_return is true, + then do not output epilogue, because it has already been emitted in RTL. */ +const char * +output_return_instruction (rtx operand, bool really_return, bool reverse, + bool simple_return) +{ + char conditional[10]; + char instr[100]; + unsigned reg; + unsigned long live_regs_mask; + unsigned long func_type; + arm_stack_offsets *offsets; + + func_type = arm_current_func_type (); + + if (IS_NAKED (func_type)) + return ""; + + if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) + { + /* If this function was declared non-returning, and we have + found a tail call, then we have to trust that the called + function won't return. */ + if (really_return) + { + rtx ops[2]; + + /* Otherwise, trap an attempted return by aborting. */ + ops[0] = operand; + ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" + : "abort"); + assemble_external_libcall (ops[1]); + output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops); + } + + return ""; + } + + gcc_assert (!cfun->calls_alloca || really_return); + + sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd'); + + cfun->machine->return_used_this_function = 1; + + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + + if (!simple_return && live_regs_mask) + { + const char * return_reg; + + /* If we do not have any special requirements for function exit + (e.g. interworking) then we can load the return address + directly into the PC. Otherwise we must load it into LR. */ + if (really_return + && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK)) + return_reg = reg_names[PC_REGNUM]; + else + return_reg = reg_names[LR_REGNUM]; + + if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM)) + { + /* There are three possible reasons for the IP register + being saved. 1) a stack frame was created, in which case + IP contains the old stack pointer, or 2) an ISR routine + corrupted it, or 3) it was saved to align the stack on + iWMMXt. In case 1, restore IP into SP, otherwise just + restore IP. */ + if (frame_pointer_needed) + { + live_regs_mask &= ~ (1 << IP_REGNUM); + live_regs_mask |= (1 << SP_REGNUM); + } + else + gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT); + } + + /* On some ARM architectures it is faster to use LDR rather than + LDM to load a single register. On other architectures, the + cost is the same. In 26 bit mode, or for exception handlers, + we have to use LDM to load the PC so that the CPSR is also + restored. */ + for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) + if (live_regs_mask == (1U << reg)) + break; + + if (reg <= LAST_ARM_REGNUM + && (reg != LR_REGNUM + || ! really_return + || ! IS_INTERRUPT (func_type))) + { + sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional, + (reg == LR_REGNUM) ? return_reg : reg_names[reg]); + } + else + { + char *p; + int first = 1; + + /* Generate the load multiple instruction to restore the + registers. Note we can get here, even if + frame_pointer_needed is true, but only if sp already + points to the base of the saved core registers. */ + if (live_regs_mask & (1 << SP_REGNUM)) + { + unsigned HOST_WIDE_INT stack_adjust; + + stack_adjust = offsets->outgoing_args - offsets->saved_regs; + gcc_assert (stack_adjust == 0 || stack_adjust == 4); + + if (stack_adjust && arm_arch5 && TARGET_ARM) + if (TARGET_UNIFIED_ASM) + sprintf (instr, "ldmib%s\t%%|sp, {", conditional); + else + sprintf (instr, "ldm%sib\t%%|sp, {", conditional); + else + { + /* If we can't use ldmib (SA110 bug), + then try to pop r3 instead. */ + if (stack_adjust) + live_regs_mask |= 1 << 3; + + if (TARGET_UNIFIED_ASM) + sprintf (instr, "ldmfd%s\t%%|sp, {", conditional); + else + sprintf (instr, "ldm%sfd\t%%|sp, {", conditional); + } + } + else + if (TARGET_UNIFIED_ASM) + sprintf (instr, "pop%s\t{", conditional); + else + sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional); + + p = instr + strlen (instr); + + for (reg = 0; reg <= SP_REGNUM; reg++) + if (live_regs_mask & (1 << reg)) + { + int l = strlen (reg_names[reg]); + + if (first) + first = 0; + else + { + memcpy (p, ", ", 2); + p += 2; + } + + memcpy (p, "%|", 2); + memcpy (p + 2, reg_names[reg], l); + p += l + 2; + } + + if (live_regs_mask & (1 << LR_REGNUM)) + { + sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg); + /* If returning from an interrupt, restore the CPSR. */ + if (IS_INTERRUPT (func_type)) + strcat (p, "^"); + } + else + strcpy (p, "}"); + } + + output_asm_insn (instr, & operand); + + /* See if we need to generate an extra instruction to + perform the actual function return. */ + if (really_return + && func_type != ARM_FT_INTERWORKED + && (live_regs_mask & (1 << LR_REGNUM)) != 0) + { + /* The return has already been handled + by loading the LR into the PC. */ + return ""; + } + } + + if (really_return) + { + switch ((int) ARM_FUNC_TYPE (func_type)) + { + case ARM_FT_ISR: + case ARM_FT_FIQ: + /* ??? This is wrong for unified assembly syntax. */ + sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional); + break; + + case ARM_FT_INTERWORKED: + sprintf (instr, "bx%s\t%%|lr", conditional); + break; + + case ARM_FT_EXCEPTION: + /* ??? This is wrong for unified assembly syntax. */ + sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional); + break; + + default: + /* Use bx if it's available. */ + if (arm_arch5 || arm_arch4t) + sprintf (instr, "bx%s\t%%|lr", conditional); + else + sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional); + break; + } + + output_asm_insn (instr, & operand); + } + + return ""; +} + +/* Write the function name into the code section, directly preceding + the function prologue. + + Code will be output similar to this: + t0 + .ascii "arm_poke_function_name", 0 + .align + t1 + .word 0xff000000 + (t1 - t0) + arm_poke_function_name + mov ip, sp + stmfd sp!, {fp, ip, lr, pc} + sub fp, ip, #4 + + When performing a stack backtrace, code can inspect the value + of 'pc' stored at 'fp' + 0. If the trace function then looks + at location pc - 12 and the top 8 bits are set, then we know + that there is a function name embedded immediately preceding this + location and has length ((pc[-3]) & 0xff000000). + + We assume that pc is declared as a pointer to an unsigned long. + + It is of no benefit to output the function name if we are assembling + a leaf function. These function types will not contain a stack + backtrace structure, therefore it is not possible to determine the + function name. */ +void +arm_poke_function_name (FILE *stream, const char *name) +{ + unsigned long alignlength; + unsigned long length; + rtx x; + + length = strlen (name) + 1; + alignlength = ROUND_UP_WORD (length); + + ASM_OUTPUT_ASCII (stream, name, length); + ASM_OUTPUT_ALIGN (stream, 2); + x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength); + assemble_aligned_integer (UNITS_PER_WORD, x); +} + +/* Place some comments into the assembler stream + describing the current function. */ +static void +arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) +{ + unsigned long func_type; + + /* ??? Do we want to print some of the below anyway? */ + if (TARGET_THUMB1) + return; + + /* Sanity check. */ + gcc_assert (!arm_ccfsm_state && !arm_target_insn); + + func_type = arm_current_func_type (); + + switch ((int) ARM_FUNC_TYPE (func_type)) + { + default: + case ARM_FT_NORMAL: + break; + case ARM_FT_INTERWORKED: + asm_fprintf (f, "\t%@ Function supports interworking.\n"); + break; + case ARM_FT_ISR: + asm_fprintf (f, "\t%@ Interrupt Service Routine.\n"); + break; + case ARM_FT_FIQ: + asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n"); + break; + case ARM_FT_EXCEPTION: + asm_fprintf (f, "\t%@ ARM Exception Handler.\n"); + break; + } + + if (IS_NAKED (func_type)) + asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n"); + + if (IS_VOLATILE (func_type)) + asm_fprintf (f, "\t%@ Volatile: function does not return.\n"); + + if (IS_NESTED (func_type)) + asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n"); + if (IS_STACKALIGN (func_type)) + asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n"); + + asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n", + crtl->args.size, + crtl->args.pretend_args_size, frame_size); + + asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n", + frame_pointer_needed, + cfun->machine->uses_anonymous_args); + + if (cfun->machine->lr_save_eliminated) + asm_fprintf (f, "\t%@ link register save eliminated.\n"); + + if (crtl->calls_eh_return) + asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n"); + +} + +static void +arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, + HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED) +{ + arm_stack_offsets *offsets; + + if (TARGET_THUMB1) + { + int regno; + + /* Emit any call-via-reg trampolines that are needed for v4t support + of call_reg and call_value_reg type insns. */ + for (regno = 0; regno < LR_REGNUM; regno++) + { + rtx label = cfun->machine->call_via[regno]; + + if (label != NULL) + { + switch_to_section (function_section (current_function_decl)); + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (label)); + asm_fprintf (asm_out_file, "\tbx\t%r\n", regno); + } + } + + /* ??? Probably not safe to set this here, since it assumes that a + function will be emitted as assembly immediately after we generate + RTL for it. This does not happen for inline functions. */ + cfun->machine->return_used_this_function = 0; + } + else /* TARGET_32BIT */ + { + /* We need to take into account any stack-frame rounding. */ + offsets = arm_get_frame_offsets (); + + gcc_assert (!use_return_insn (FALSE, NULL) + || (cfun->machine->return_used_this_function != 0) + || offsets->saved_regs == offsets->outgoing_args + || frame_pointer_needed); + + /* Reset the ARM-specific per-function variables. */ + after_arm_reorg = 0; + } +} + +/* Generate and emit a sequence of insns equivalent to PUSH, but using + STR and STRD. If an even number of registers are being pushed, one + or more STRD patterns are created for each register pair. If an + odd number of registers are pushed, emit an initial STR followed by + as many STRD instructions as are needed. This works best when the + stack is initially 64-bit aligned (the normal case), since it + ensures that each STRD is also 64-bit aligned. */ +static void +thumb2_emit_strd_push (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i; + int regno; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp; + bool first = true; + + num_regs = bit_count (saved_regs_mask); + + /* Must be at least one register to save, and can't save SP or PC. */ + gcc_assert (num_regs > 0 && num_regs <= 14); + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + + /* Create sequence for DWARF info. All the frame-related data for + debugging is held in this wrapper. */ + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* Describe the stack adjustment. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + /* Find the first register. */ + for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++) + ; + + i = 0; + + /* If there's an odd number of registers to push. Start off by + pushing a single register. This ensures that subsequent strd + operations are dword aligned (assuming that SP was originally + 64-bit aligned). */ + if ((num_regs & 1) != 0) + { + rtx reg, mem, insn; + + reg = gen_rtx_REG (SImode, regno); + if (num_regs == 1) + mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)); + else + mem = gen_frame_mem (Pmode, + gen_rtx_PRE_MODIFY + (Pmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 * num_regs))); + + tmp = gen_rtx_SET (VOIDmode, mem, reg); + RTX_FRAME_RELATED_P (tmp) = 1; + insn = emit_insn (tmp); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + i++; + regno++; + XVECEXP (dwarf, 0, i) = tmp; + first = false; + } + + while (i < num_regs) + if (saved_regs_mask & (1 << regno)) + { + rtx reg1, reg2, mem1, mem2; + rtx tmp0, tmp1, tmp2; + int regno2; + + /* Find the register to pair with this one. */ + for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0; + regno2++) + ; + + reg1 = gen_rtx_REG (SImode, regno); + reg2 = gen_rtx_REG (SImode, regno2); + + if (first) + { + rtx insn; + + first = false; + mem1 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + -4 * num_regs)); + mem2 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + -4 * (num_regs - 1))); + tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 * (num_regs))); + tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1); + tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2); + RTX_FRAME_RELATED_P (tmp0) = 1; + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3)); + XVECEXP (par, 0, 0) = tmp0; + XVECEXP (par, 0, 1) = tmp1; + XVECEXP (par, 0, 2) = tmp2; + insn = emit_insn (par); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + else + { + mem1 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + 4 * i)); + mem2 = gen_frame_mem (Pmode, plus_constant (Pmode, + stack_pointer_rtx, + 4 * (i + 1))); + tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1); + tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2); + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = tmp1; + XVECEXP (par, 0, 1) = tmp2; + emit_insn (par); + } + + /* Create unwind information. This is an approximation. */ + tmp1 = gen_rtx_SET (VOIDmode, + gen_frame_mem (Pmode, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * i)), + reg1); + tmp2 = gen_rtx_SET (VOIDmode, + gen_frame_mem (Pmode, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * (i + 1))), + reg2); + + RTX_FRAME_RELATED_P (tmp1) = 1; + RTX_FRAME_RELATED_P (tmp2) = 1; + XVECEXP (dwarf, 0, i + 1) = tmp1; + XVECEXP (dwarf, 0, i + 2) = tmp2; + i += 2; + regno = regno2 + 1; + } + else + regno++; + + return; +} + +/* STRD in ARM mode requires consecutive registers. This function emits STRD + whenever possible, otherwise it emits single-word stores. The first store + also allocates stack space for all saved registers, using writeback with + post-addressing mode. All other stores use offset addressing. If no STRD + can be emitted, this function emits a sequence of single-word stores, + and not an STM as before, because single-word stores provide more freedom + scheduling and can be turned into an STM by peephole optimizations. */ +static void +arm_emit_strd_push (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j, dwarf_index = 0; + int offset = 0; + rtx dwarf = NULL_RTX; + rtx insn = NULL_RTX; + rtx tmp, mem; + + /* TODO: A more efficient code can be emitted by changing the + layout, e.g., first push all pairs that can use STRD to keep the + stack aligned, and then push all other registers. */ + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + gcc_assert (num_regs > 0); + + /* Create sequence for DWARF info. */ + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* For dwarf info, we generate explicit stack update. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + /* Save registers. */ + offset = - 4 * num_regs; + j = 0; + while (j <= LAST_ARM_REGNUM) + if (saved_regs_mask & (1 << j)) + { + if ((j % 2 == 0) + && (saved_regs_mask & (1 << (j + 1)))) + { + /* Current register and previous register form register pair for + which STRD can be generated. */ + if (offset < 0) + { + /* Allocate stack space for all saved registers. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, offset); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + mem = gen_frame_mem (DImode, tmp); + offset = 0; + } + else if (offset > 0) + mem = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (DImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + + /* Record the first store insn. */ + if (dwarf_index == 1) + insn = tmp; + + /* Generate dwarf info. */ + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset + 4)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + offset += 8; + j += 2; + } + else + { + /* Emit a single word store. */ + if (offset < 0) + { + /* Allocate stack space for all saved registers. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, offset); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + mem = gen_frame_mem (SImode, tmp); + offset = 0; + } + else if (offset > 0) + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (SImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + + /* Record the first store insn. */ + if (dwarf_index == 1) + insn = tmp; + + /* Generate dwarf info. */ + mem = gen_frame_mem (SImode, + plus_constant(Pmode, + stack_pointer_rtx, + offset)); + tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_index++) = tmp; + + offset += 4; + j += 1; + } + } + else + j++; + + /* Attach dwarf info to the first insn we generate. */ + gcc_assert (insn != NULL_RTX); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Generate and emit an insn that we will recognize as a push_multi. + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of + MASK for registers that should be annotated for DWARF2 frame unwind + information. */ +static rtx +emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask) +{ + int num_regs = 0; + int num_dwarf_regs = 0; + int i, j; + rtx par; + rtx dwarf; + int dwarf_par_index; + rtx tmp, reg; + + /* We don't record the PC in the dwarf frame information. */ + dwarf_regs_mask &= ~(1 << PC_REGNUM); + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + { + if (mask & (1 << i)) + num_regs++; + if (dwarf_regs_mask & (1 << i)) + num_dwarf_regs++; + } + + gcc_assert (num_regs && num_regs <= 16); + gcc_assert ((dwarf_regs_mask & ~mask) == 0); + + /* For the body of the insn we are going to generate an UNSPEC in + parallel with several USEs. This allows the insn to be recognized + by the push_multi pattern in the arm.md file. + + The body of the insn looks something like this: + + (parallel [ + (set (mem:BLK (pre_modify:SI (reg:SI sp) + (const_int:SI ))) + (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) + (use (reg:SI XX)) + (use (reg:SI YY)) + ... + ]) + + For the frame note however, we try to be more explicit and actually + show each register being stored into the stack frame, plus a (single) + decrement of the stack pointer. We do it this way in order to be + friendly to the stack unwinding code, which only wants to see a single + stack decrement per instruction. The RTL we generate for the note looks + something like this: + + (sequence [ + (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) + (set (mem:SI (reg:SI sp)) (reg:SI r4)) + (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX)) + (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY)) + ... + ]) + + FIXME:: In an ideal world the PRE_MODIFY would not exist and + instead we'd have a parallel expression detailing all + the stores to the various memory addresses so that debug + information is more up-to-date. Remember however while writing + this to take care of the constraints with the push instruction. + + Note also that this has to be taken care of for the VFP registers. + + For more see PR43399. */ + + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); + dwarf_par_index = 1; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + { + if (mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + + XVECEXP (par, 0, 0) + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (BLKmode, + gen_rtx_PRE_MODIFY (Pmode, + stack_pointer_rtx, + plus_constant + (Pmode, stack_pointer_rtx, + -4 * num_regs)) + ), + gen_rtx_UNSPEC (BLKmode, + gen_rtvec (1, reg), + UNSPEC_PUSH_MULT)); + + if (dwarf_regs_mask & (1 << i)) + { + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (SImode, stack_pointer_rtx), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; + } + + break; + } + } + + for (j = 1, i++; j < num_regs; i++) + { + if (mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + + XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg); + + if (dwarf_regs_mask & (1 << i)) + { + tmp + = gen_rtx_SET (VOIDmode, + gen_frame_mem + (SImode, + plus_constant (Pmode, stack_pointer_rtx, + 4 * j)), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; + } + + j++; + } + } + + par = emit_insn (par); + + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); + + return par; +} + +/* Add a REG_CFA_ADJUST_CFA REG note to INSN. + SIZE is the offset to be adjusted. + DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */ +static void +arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src) +{ + rtx dwarf; + + RTX_FRAME_RELATED_P (insn) = 1; + dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size)); + add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf); +} + +/* Generate and emit an insn pattern that we will recognize as a pop_multi. + SAVED_REGS_MASK shows which registers need to be restored. + + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. */ +static void +arm_emit_multi_reg_pop (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j; + rtx par; + rtx dwarf = NULL_RTX; + rtx tmp, reg; + bool return_in_pc; + int offset_adj; + int emit_update; + + return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false; + offset_adj = return_in_pc ? 1 : 0; + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + + /* If SP is in reglist, then we don't emit SP update insn. */ + emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1; + + /* The parallel needs to hold num_regs SETs + and one SET for the stack update. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj)); + + if (return_in_pc) + { + tmp = ret_rtx; + XVECEXP (par, 0, 0) = tmp; + } + + if (emit_update) + { + /* Increment the stack pointer, based on there being + num_regs 4-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + 4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, offset_adj) = tmp; + } + + /* Now restore every reg, which may include PC. */ + for (j = 0, i = 0; j < num_regs; i++) + if (saved_regs_mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + if ((num_regs == 1) && emit_update && !return_in_pc) + { + /* Emit single load with writeback. */ + tmp = gen_frame_mem (SImode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)); + tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp)); + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + return; + } + + tmp = gen_rtx_SET (VOIDmode, + reg, + gen_frame_mem + (SImode, + plus_constant (Pmode, stack_pointer_rtx, 4 * j))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, j + emit_update + offset_adj) = tmp; + + /* We need to maintain a sequence for DWARF info too. As dwarf info + should not have PC, skip PC. */ + if (i != PC_REGNUM) + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + j++; + } + + if (return_in_pc) + par = emit_jump_insn (par); + else + par = emit_insn (par); + + REG_NOTES (par) = dwarf; + if (!return_in_pc) + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs, + stack_pointer_rtx, stack_pointer_rtx); +} + +/* Generate and emit an insn pattern that we will recognize as a pop_multi + of NUM_REGS consecutive VFP regs, starting at FIRST_REG. + + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. */ +static void +arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) +{ + int i, j; + rtx par; + rtx dwarf = NULL_RTX; + rtx tmp, reg; + + gcc_assert (num_regs && num_regs <= 32); + + /* Workaround ARM10 VFPr1 bug. */ + if (num_regs == 2 && !arm_arch6) + { + if (first_reg == 15) + first_reg--; + + num_regs++; + } + + /* We can emit at most 16 D-registers in a single pop_multi instruction, and + there could be up to 32 D-registers to restore. + If there are more than 16 D-registers, make two recursive calls, + each of which emits one pop_multi instruction. */ + if (num_regs > 16) + { + arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg); + arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg); + return; + } + + /* The parallel needs to hold num_regs SETs + and one SET for the stack update. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* Increment the stack pointer, based on there being + num_regs 8-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + base_reg, + plus_constant (Pmode, base_reg, 8 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, 0) = tmp; + + /* Now show every reg that will be restored, using a SET for each. */ + for (j = 0, i=first_reg; j < num_regs; i += 2) + { + reg = gen_rtx_REG (DFmode, i); + + tmp = gen_rtx_SET (VOIDmode, + reg, + gen_frame_mem + (DFmode, + plus_constant (Pmode, base_reg, 8 * j))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, j + 1) = tmp; + + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + j++; + } + + par = emit_insn (par); + REG_NOTES (par) = dwarf; + + /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */ + if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM) + { + RTX_FRAME_RELATED_P (par) = 1; + add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx); + } + else + arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs, + base_reg, base_reg); +} + +/* Generate and emit a pattern that will be recognized as LDRD pattern. If even + number of registers are being popped, multiple LDRD patterns are created for + all register pairs. If odd number of registers are popped, last register is + loaded by using LDR pattern. */ +static void +thumb2_emit_ldrd_pop (unsigned long saved_regs_mask) +{ + int num_regs = 0; + int i, j; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp, reg, tmp1; + bool return_in_pc; + + return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false; + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + + /* We cannot generate ldrd for PC. Hence, reduce the count if PC is + to be popped. So, if num_regs is even, now it will become odd, + and we can generate pop with PC. If num_regs is odd, it will be + even now, and ldr with return can be generated for PC. */ + if (return_in_pc) + num_regs--; + + gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); + + /* Var j iterates over all the registers to gather all the registers in + saved_regs_mask. Var i gives index of saved registers in stack frame. + A PARALLEL RTX of register-pair is created here, so that pattern for + LDRD can be matched. As PC is always last register to be popped, and + we have already decremented num_regs if PC, we don't have to worry + about PC in this loop. */ + for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++) + if (saved_regs_mask & (1 << j)) + { + /* Create RTX for memory load. */ + reg = gen_rtx_REG (SImode, j); + tmp = gen_rtx_SET (SImode, + reg, + gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, 4 * i))); + RTX_FRAME_RELATED_P (tmp) = 1; + + if (i % 2 == 0) + { + /* When saved-register index (i) is even, the RTX to be emitted is + yet to be created. Hence create it first. The LDRD pattern we + are generating is : + [ (SET (reg_t0) (MEM (PLUS (SP) (NUM)))) + (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ] + where target registers need not be consecutive. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + dwarf = NULL_RTX; + } + + /* ith register is added in PARALLEL RTX. If i is even, the reg_i is + added as 0th element and if i is odd, reg_i is added as 1st element + of LDRD pattern shown above. */ + XVECEXP (par, 0, (i % 2)) = tmp; + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + if ((i % 2) == 1) + { + /* When saved-register index (i) is odd, RTXs for both the registers + to be loaded are generated in above given LDRD pattern, and the + pattern can be emitted now. */ + par = emit_insn (par); + REG_NOTES (par) = dwarf; + RTX_FRAME_RELATED_P (par) = 1; + } + + i++; + } + + /* If the number of registers pushed is odd AND return_in_pc is false OR + number of registers are even AND return_in_pc is true, last register is + popped using LDR. It can be PC as well. Hence, adjust the stack first and + then LDR with post increment. */ + + /* Increment the stack pointer, based on there being + num_regs 4-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, 4 * i)); + RTX_FRAME_RELATED_P (tmp) = 1; + tmp = emit_insn (tmp); + if (!return_in_pc) + { + arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i, + stack_pointer_rtx, stack_pointer_rtx); + } + + dwarf = NULL_RTX; + + if (((num_regs % 2) == 1 && !return_in_pc) + || ((num_regs % 2) == 0 && return_in_pc)) + { + /* Scan for the single register to be popped. Skip until the saved + register is found. */ + for (; (saved_regs_mask & (1 << j)) == 0; j++); + + /* Gen LDR with post increment here. */ + tmp1 = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (tmp1, get_frame_alias_set ()); + + reg = gen_rtx_REG (SImode, j); + tmp = gen_rtx_SET (SImode, reg, tmp1); + RTX_FRAME_RELATED_P (tmp) = 1; + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + if (return_in_pc) + { + /* If return_in_pc, j must be PC_REGNUM. */ + gcc_assert (j == PC_REGNUM); + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = ret_rtx; + XVECEXP (par, 0, 1) = tmp; + par = emit_jump_insn (par); + } + else + { + par = emit_insn (tmp); + REG_NOTES (par) = dwarf; + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); + } + + } + else if ((num_regs % 2) == 1 && return_in_pc) + { + /* There are 2 registers to be popped. So, generate the pattern + pop_multiple_with_stack_update_and_return to pop in PC. */ + arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1))); + } + + return; +} + +/* LDRD in ARM mode needs consecutive registers as operands. This function + emits LDRD whenever possible, otherwise it emits single-word loads. It uses + offset addressing and then generates one separate stack udpate. This provides + more scheduling freedom, compared to writeback on every load. However, + if the function returns using load into PC directly + (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated + before the last load. TODO: Add a peephole optimization to recognize + the new epilogue sequence as an LDM instruction whenever possible. TODO: Add + peephole optimization to merge the load at stack-offset zero + with the stack update instruction using load with writeback + in post-index addressing mode. */ +static void +arm_emit_ldrd_pop (unsigned long saved_regs_mask) +{ + int j = 0; + int offset = 0; + rtx par = NULL_RTX; + rtx dwarf = NULL_RTX; + rtx tmp, mem; + + /* Restore saved registers. */ + gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM)))); + j = 0; + while (j <= LAST_ARM_REGNUM) + if (saved_regs_mask & (1 << j)) + { + if ((j % 2) == 0 + && (saved_regs_mask & (1 << (j + 1))) + && (j + 1) != PC_REGNUM) + { + /* Current register and next register form register pair for which + LDRD can be generated. PC is always the last register popped, and + we handle it separately. */ + if (offset > 0) + mem = gen_frame_mem (DImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (DImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem); + tmp = emit_insn (tmp); + RTX_FRAME_RELATED_P (tmp) = 1; + + /* Generate dwarf info. */ + + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j), + NULL_RTX); + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j + 1), + dwarf); + + REG_NOTES (tmp) = dwarf; + + offset += 8; + j += 2; + } + else if (j != PC_REGNUM) + { + /* Emit a single word load. */ + if (offset > 0) + mem = gen_frame_mem (SImode, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + else + mem = gen_frame_mem (SImode, stack_pointer_rtx); + + tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem); + tmp = emit_insn (tmp); + RTX_FRAME_RELATED_P (tmp) = 1; + + /* Generate dwarf info. */ + REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, j), + NULL_RTX); + + offset += 4; + j += 1; + } + else /* j == PC_REGNUM */ + j++; + } + else + j++; + + /* Update the stack. */ + if (offset > 0) + { + tmp = gen_rtx_SET (Pmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + offset)); + tmp = emit_insn (tmp); + arm_add_cfa_adjust_cfa_note (tmp, offset, + stack_pointer_rtx, stack_pointer_rtx); + offset = 0; + } + + if (saved_regs_mask & (1 << PC_REGNUM)) + { + /* Only PC is to be popped. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (par, 0, 0) = ret_rtx; + tmp = gen_rtx_SET (SImode, + gen_rtx_REG (SImode, PC_REGNUM), + gen_frame_mem (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, 1) = tmp; + par = emit_jump_insn (par); + + /* Generate dwarf info. */ + dwarf = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, PC_REGNUM), + NULL_RTX); + REG_NOTES (par) = dwarf; + arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); + } +} + +/* Calculate the size of the return value that is passed in registers. */ +static unsigned +arm_size_return_regs (void) +{ + enum machine_mode mode; + + if (crtl->return_rtx != 0) + mode = GET_MODE (crtl->return_rtx); + else + mode = DECL_MODE (DECL_RESULT (current_function_decl)); + + return GET_MODE_SIZE (mode); +} + +/* Return true if the current function needs to save/restore LR. */ +static bool +thumb_force_lr_save (void) +{ + return !cfun->machine->lr_save_eliminated + && (!leaf_function_p () + || thumb_far_jump_used_p () + || df_regs_ever_live_p (LR_REGNUM)); +} + +/* We do not know if r3 will be available because + we do have an indirect tailcall happening in this + particular case. */ +static bool +is_indirect_tailcall_p (rtx call) +{ + rtx pat = PATTERN (call); + + /* Indirect tail call. */ + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET) + pat = SET_SRC (pat); + + pat = XEXP (XEXP (pat, 0), 0); + return REG_P (pat); +} + +/* Return true if r3 is used by any of the tail call insns in the + current function. */ +static bool +any_sibcall_could_use_r3 (void) +{ + edge_iterator ei; + edge e; + + if (!crtl->tail_call_emit) + return false; + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + if (e->flags & EDGE_SIBCALL) + { + rtx call = BB_END (e->src); + if (!CALL_P (call)) + call = prev_nonnote_nondebug_insn (call); + gcc_assert (CALL_P (call) && SIBLING_CALL_P (call)); + if (find_regno_fusage (call, USE, 3) + || is_indirect_tailcall_p (call)) + return true; + } + return false; +} + + +/* Compute the distance from register FROM to register TO. + These can be the arg pointer (26), the soft frame pointer (25), + the stack pointer (13) or the hard frame pointer (11). + In thumb mode r7 is used as the soft frame pointer, if needed. + Typical stack layout looks like this: + + old stack pointer -> | | + ---- + | | \ + | | saved arguments for + | | vararg functions + | | / + -- + hard FP & arg pointer -> | | \ + | | stack + | | frame + | | / + -- + | | \ + | | call saved + | | registers + soft frame pointer -> | | / + -- + | | \ + | | local + | | variables + locals base pointer -> | | / + -- + | | \ + | | outgoing + | | arguments + current stack pointer -> | | / + -- + + For a given function some or all of these stack components + may not be needed, giving rise to the possibility of + eliminating some of the registers. + + The values returned by this function must reflect the behavior + of arm_expand_prologue() and arm_compute_save_reg_mask(). + + The sign of the number returned reflects the direction of stack + growth, so the values are positive for all eliminations except + from the soft frame pointer to the hard frame pointer. + + SFP may point just inside the local variables block to ensure correct + alignment. */ + + +/* Calculate stack offsets. These are used to calculate register elimination + offsets and in prologue/epilogue code. Also calculates which registers + should be saved. */ + +static arm_stack_offsets * +arm_get_frame_offsets (void) +{ + struct arm_stack_offsets *offsets; + unsigned long func_type; + int leaf; + int saved; + int core_saved; + HOST_WIDE_INT frame_size; + int i; + + offsets = &cfun->machine->stack_offsets; + + /* We need to know if we are a leaf function. Unfortunately, it + is possible to be called after start_sequence has been called, + which causes get_insns to return the insns for the sequence, + not the function, which will cause leaf_function_p to return + the incorrect result. + + to know about leaf functions once reload has completed, and the + frame size cannot be changed after that time, so we can safely + use the cached value. */ + + if (reload_completed) + return offsets; + + /* Initially this is the size of the local variables. It will translated + into an offset once we have determined the size of preceding data. */ + frame_size = ROUND_UP_WORD (get_frame_size ()); + + leaf = leaf_function_p (); + + /* Space for variadic functions. */ + offsets->saved_args = crtl->args.pretend_args_size; + + /* In Thumb mode this is incorrect, but never used. */ + offsets->frame + = (offsets->saved_args + + arm_compute_static_chain_stack_bytes () + + (frame_pointer_needed ? 4 : 0)); + + if (TARGET_32BIT) + { + unsigned int regno; + + offsets->saved_regs_mask = arm_compute_save_reg_mask (); + core_saved = bit_count (offsets->saved_regs_mask) * 4; + saved = core_saved; + + /* We know that SP will be doubleword aligned on entry, and we must + preserve that condition at any subroutine call. We also require the + soft frame pointer to be doubleword aligned. */ + + if (TARGET_REALLY_IWMMXT) + { + /* Check for the call-saved iWMMXt registers. */ + for (regno = FIRST_IWMMXT_REGNUM; + regno <= LAST_IWMMXT_REGNUM; + regno++) + if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) + saved += 8; + } + + func_type = arm_current_func_type (); + /* Space for saved VFP registers. */ + if (! IS_VOLATILE (func_type) + && TARGET_HARD_FLOAT && TARGET_VFP) + saved += arm_get_vfp_saved_size (); + } + else /* TARGET_THUMB1 */ + { + offsets->saved_regs_mask = thumb1_compute_save_reg_mask (); + core_saved = bit_count (offsets->saved_regs_mask) * 4; + saved = core_saved; + if (TARGET_BACKTRACE) + saved += 16; + } + + /* Saved registers include the stack frame. */ + offsets->saved_regs + = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved; + offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE; + + /* A leaf function does not need any stack alignment if it has nothing + on the stack. */ + if (leaf && frame_size == 0 + /* However if it calls alloca(), we have a dynamically allocated + block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */ + && ! cfun->calls_alloca) + { + offsets->outgoing_args = offsets->soft_frame; + offsets->locals_base = offsets->soft_frame; + return offsets; + } + + /* Ensure SFP has the correct alignment. */ + if (ARM_DOUBLEWORD_ALIGN + && (offsets->soft_frame & 7)) + { + offsets->soft_frame += 4; + /* Try to align stack by pushing an extra reg. Don't bother doing this + when there is a stack frame as the alignment will be rolled into + the normal stack adjustment. */ + if (frame_size + crtl->outgoing_args_size == 0) + { + int reg = -1; + + /* If it is safe to use r3, then do so. This sometimes + generates better code on Thumb-2 by avoiding the need to + use 32-bit push/pop instructions. */ + if (! any_sibcall_could_use_r3 () + && arm_size_return_regs () <= 12 + && (offsets->saved_regs_mask & (1 << 3)) == 0 + && (TARGET_THUMB2 + || !(TARGET_LDRD && current_tune->prefer_ldrd_strd))) + { + reg = 3; + } + else + for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) + { + /* Avoid fixed registers; they may be changed at + arbitrary times so it's unsafe to restore them + during the epilogue. */ + if (!fixed_regs[i] + && (offsets->saved_regs_mask & (1 << i)) == 0) + { + reg = i; + break; + } + } + + if (reg != -1) + { + offsets->saved_regs += 4; + offsets->saved_regs_mask |= (1 << reg); + } + } + } + + offsets->locals_base = offsets->soft_frame + frame_size; + offsets->outgoing_args = (offsets->locals_base + + crtl->outgoing_args_size); + + if (ARM_DOUBLEWORD_ALIGN) + { + /* Ensure SP remains doubleword aligned. */ + if (offsets->outgoing_args & 7) + offsets->outgoing_args += 4; + gcc_assert (!(offsets->outgoing_args & 7)); + } + + return offsets; +} + + +/* Calculate the relative offsets for the different stack pointers. Positive + offsets are in the direction of stack growth. */ + +HOST_WIDE_INT +arm_compute_initial_elimination_offset (unsigned int from, unsigned int to) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + + /* OK, now we have enough information to compute the distances. + There must be an entry in these switch tables for each pair + of registers in ELIMINABLE_REGS, even if some of the entries + seem to be redundant or useless. */ + switch (from) + { + case ARG_POINTER_REGNUM: + switch (to) + { + case THUMB_HARD_FRAME_POINTER_REGNUM: + return 0; + + case FRAME_POINTER_REGNUM: + /* This is the reverse of the soft frame pointer + to hard frame pointer elimination below. */ + return offsets->soft_frame - offsets->saved_args; + + case ARM_HARD_FRAME_POINTER_REGNUM: + /* This is only non-zero in the case where the static chain register + is stored above the frame. */ + return offsets->frame - offsets->saved_args - 4; + + case STACK_POINTER_REGNUM: + /* If nothing has been pushed on the stack at all + then this will return -4. This *is* correct! */ + return offsets->outgoing_args - (offsets->saved_args + 4); + + default: + gcc_unreachable (); + } + gcc_unreachable (); + + case FRAME_POINTER_REGNUM: + switch (to) + { + case THUMB_HARD_FRAME_POINTER_REGNUM: + return 0; + + case ARM_HARD_FRAME_POINTER_REGNUM: + /* The hard frame pointer points to the top entry in the + stack frame. The soft frame pointer to the bottom entry + in the stack frame. If there is no stack frame at all, + then they are identical. */ + + return offsets->frame - offsets->soft_frame; + + case STACK_POINTER_REGNUM: + return offsets->outgoing_args - offsets->soft_frame; + + default: + gcc_unreachable (); + } + gcc_unreachable (); + + default: + /* You cannot eliminate from the stack pointer. + In theory you could eliminate from the hard frame + pointer to the stack pointer, but this will never + happen, since if a stack frame is not needed the + hard frame pointer will never be used. */ + gcc_unreachable (); + } +} + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. Frame pointer elimination is automatically handled. + + All eliminations are permissible. Note that ARG_POINTER_REGNUM and + HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame + pointer, we must eliminate FRAME_POINTER_REGNUM into + HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or + ARG_POINTER_REGNUM. */ + +bool +arm_can_eliminate (const int from, const int to) +{ + return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false : + (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false : + (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false : + (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false : + true); +} + +/* Emit RTL to save coprocessor registers on function entry. Returns the + number of bytes pushed. */ + +static int +arm_save_coproc_regs(void) +{ + int saved_size = 0; + unsigned reg; + unsigned start_reg; + rtx insn; + + for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) + if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) + { + insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); + insn = gen_rtx_MEM (V2SImode, insn); + insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); + RTX_FRAME_RELATED_P (insn) = 1; + saved_size += 8; + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + start_reg = FIRST_VFP_REGNUM; + + for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) + { + if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) + && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) + { + if (start_reg != reg) + saved_size += vfp_emit_fstmd (start_reg, + (reg - start_reg) / 2); + start_reg = reg + 2; + } + } + if (start_reg != reg) + saved_size += vfp_emit_fstmd (start_reg, + (reg - start_reg) / 2); + } + return saved_size; +} + + +/* Set the Thumb frame pointer from the stack pointer. */ + +static void +thumb_set_frame_pointer (arm_stack_offsets *offsets) +{ + HOST_WIDE_INT amount; + rtx insn, dwarf; + + amount = offsets->outgoing_args - offsets->locals_base; + if (amount < 1024) + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, GEN_INT (amount))); + else + { + emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount))); + /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1 + expects the first two operands to be the same. */ + if (TARGET_THUMB2) + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + else + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + stack_pointer_rtx)); + } + dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, amount)); + RTX_FRAME_RELATED_P (dwarf) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Generate the prologue instructions for entry into an ARM or Thumb-2 + function. */ +void +arm_expand_prologue (void) +{ + rtx amount; + rtx insn; + rtx ip_rtx; + unsigned long live_regs_mask; + unsigned long func_type; + int fp_offset = 0; + int saved_pretend_args = 0; + int saved_regs = 0; + unsigned HOST_WIDE_INT args_to_push; + arm_stack_offsets *offsets; + + func_type = arm_current_func_type (); + + /* Naked functions don't have prologues. */ + if (IS_NAKED (func_type)) + return; + + /* Make a copy of c_f_p_a_s as we may need to modify it locally. */ + args_to_push = crtl->args.pretend_args_size; + + /* Compute which register we will have to save onto the stack. */ + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + + ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + + if (IS_STACKALIGN (func_type)) + { + rtx r0, r1; + + /* Handle a word-aligned stack pointer. We generate the following: + + mov r0, sp + bic r1, r0, #7 + mov sp, r1 + + mov sp, r0 + bx lr + + The unwinder doesn't need to know about the stack realignment. + Just tell it we saved SP in r0. */ + gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0); + + r0 = gen_rtx_REG (SImode, 0); + r1 = gen_rtx_REG (SImode, 1); + + insn = emit_insn (gen_movsi (r0, stack_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, NULL); + + emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7))); + + /* ??? The CFA changes here, which may cause GDB to conclude that it + has entered a different function. That said, the unwind info is + correct, individually, before and after this instruction because + we've described the save of SP, which will override the default + handling of SP as restoring from the CFA. */ + emit_insn (gen_movsi (stack_pointer_rtx, r1)); + } + + /* For APCS frames, if IP register is clobbered + when creating frame, save that register in a special + way. */ + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + { + if (IS_INTERRUPT (func_type)) + { + /* Interrupt functions must not corrupt any registers. + Creating a frame pointer however, corrupts the IP + register, so we must push it first. */ + emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM); + + /* Do not set RTX_FRAME_RELATED_P on this insn. + The dwarf stack unwinding code only wants to see one + stack decrement per function, and this is not it. If + this instruction is labeled as being part of the frame + creation sequence then dwarf2out_frame_debug_expr will + die when it encounters the assignment of IP to FP + later on, since the use of SP here establishes SP as + the CFA register and not IP. + + Anyway this instruction is not really part of the stack + frame creation although it is part of the prologue. */ + } + else if (IS_NESTED (func_type)) + { + /* The static chain register is the same as the IP register + used as a scratch register during stack frame creation. + To get around this need to find somewhere to store IP + whilst the frame is being created. We try the following + places in order: + + 1. The last argument register r3 if it is available. + 2. A slot on the stack above the frame if there are no + arguments to push onto the stack. + 3. Register r3 again, after pushing the argument registers + onto the stack, if this is a varargs function. + 4. The last slot on the stack created for the arguments to + push, if this isn't a varargs function. + + Note - we only need to tell the dwarf2 backend about the SP + adjustment in the second variant; the static chain register + doesn't need to be unwound, as it doesn't contain a value + inherited from the caller. */ + + if (!arm_r3_live_at_start_p ()) + insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); + else if (args_to_push == 0) + { + rtx addr, dwarf; + + gcc_assert(arm_compute_static_chain_stack_bytes() == 4); + saved_regs += 4; + + addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); + insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx); + fp_offset = 4; + + /* Just tell the dwarf backend that we adjusted SP. */ + dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -fp_offset)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + else + { + /* Store the args on the stack. */ + if (cfun->machine->uses_anonymous_args) + { + insn + = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf, + (0xf0 >> (args_to_push / 4)) & 0xf); + emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx); + saved_pretend_args = 1; + } + else + { + rtx addr, dwarf; + + if (args_to_push == 4) + addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); + else + addr + = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -args_to_push)); + + insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx); + + /* Just tell the dwarf backend that we adjusted SP. */ + dwarf + = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -args_to_push)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + + RTX_FRAME_RELATED_P (insn) = 1; + fp_offset = args_to_push; + args_to_push = 0; + } + } + + insn = emit_set_insn (ip_rtx, + plus_constant (Pmode, stack_pointer_rtx, + fp_offset)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (args_to_push) + { + /* Push the argument registers, or reserve space for them. */ + if (cfun->machine->uses_anonymous_args) + insn = emit_multi_reg_push + ((0xf0 >> (args_to_push / 4)) & 0xf, + (0xf0 >> (args_to_push / 4)) & 0xf); + else + insn = emit_insn + (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (- args_to_push))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* If this is an interrupt service routine, and the link register + is going to be pushed, and we're not generating extra + push of IP (needed when frame is needed and frame layout if apcs), + subtracting four from LR now will mean that the function return + can be done with a single instruction. */ + if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ) + && (live_regs_mask & (1 << LR_REGNUM)) != 0 + && !(frame_pointer_needed && TARGET_APCS_FRAME) + && TARGET_ARM) + { + rtx lr = gen_rtx_REG (SImode, LR_REGNUM); + + emit_set_insn (lr, plus_constant (SImode, lr, -4)); + } + + if (live_regs_mask) + { + unsigned long dwarf_regs_mask = live_regs_mask; + + saved_regs += bit_count (live_regs_mask) * 4; + if (optimize_size && !frame_pointer_needed + && saved_regs == offsets->saved_regs - offsets->saved_args) + { + /* If no coprocessor registers are being pushed and we don't have + to worry about a frame pointer then push extra registers to + create the stack frame. This is done is a way that does not + alter the frame layout, so is independent of the epilogue. */ + int n; + int frame; + n = 0; + while (n < 8 && (live_regs_mask & (1 << n)) == 0) + n++; + frame = offsets->outgoing_args - (offsets->saved_args + saved_regs); + if (frame && n * 4 >= frame) + { + n = frame / 4; + live_regs_mask |= (1 << n) - 1; + saved_regs += frame; + } + } + + if (TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { + gcc_checking_assert (live_regs_mask == dwarf_regs_mask); + if (TARGET_THUMB2) + thumb2_emit_strd_push (live_regs_mask); + else if (TARGET_ARM + && !TARGET_APCS_FRAME + && !IS_INTERRUPT (func_type)) + arm_emit_strd_push (live_regs_mask); + else + { + insn = emit_multi_reg_push (live_regs_mask, live_regs_mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + else + { + insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (! IS_VOLATILE (func_type)) + saved_regs += arm_save_coproc_regs (); + + if (frame_pointer_needed && TARGET_ARM) + { + /* Create the new frame pointer. */ + if (TARGET_APCS_FRAME) + { + insn = GEN_INT (-(4 + args_to_push + fp_offset)); + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn)); + RTX_FRAME_RELATED_P (insn) = 1; + + if (IS_NESTED (func_type)) + { + /* Recover the static chain register. */ + if (!arm_r3_live_at_start_p () || saved_pretend_args) + insn = gen_rtx_REG (SImode, 3); + else + { + insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4); + insn = gen_frame_mem (SImode, insn); + } + emit_set_insn (ip_rtx, insn); + /* Add a USE to stop propagate_one_insn() from barfing. */ + emit_insn (gen_force_register_use (ip_rtx)); + } + } + else + { + insn = GEN_INT (saved_regs - 4); + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + stack_pointer_rtx, insn)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (flag_stack_usage_info) + current_function_static_stack_size + = offsets->outgoing_args - offsets->saved_args; + + if (offsets->outgoing_args != offsets->saved_args + saved_regs) + { + /* This add can produce multiple insns for a large constant, so we + need to get tricky. */ + rtx last = get_last_insn (); + + amount = GEN_INT (offsets->saved_args + saved_regs + - offsets->outgoing_args); + + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + amount)); + do + { + last = last ? NEXT_INSN (last) : get_insns (); + RTX_FRAME_RELATED_P (last) = 1; + } + while (last != insn); + + /* If the frame pointer is needed, emit a special barrier that + will prevent the scheduler from moving stores to the frame + before the stack adjustment. */ + if (frame_pointer_needed) + insn = emit_insn (gen_stack_tie (stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + + + if (frame_pointer_needed && TARGET_THUMB2) + thumb_set_frame_pointer (offsets); + + if (flag_pic && arm_pic_register != INVALID_REGNUM) + { + unsigned long mask; + + mask = live_regs_mask; + mask &= THUMB2_WORK_REGS; + if (!IS_NESTED (func_type)) + mask |= (1 << IP_REGNUM); + arm_load_pic_register (mask); + } + + /* If we are profiling, make sure no instructions are scheduled before + the call to mcount. Similarly if the user has requested no + scheduling in the prolog. Similarly if we want non-call exceptions + using the EABI unwinder, to prevent faulting instructions from being + swapped with a stack adjustment. */ + if (crtl->profile || !TARGET_SCHED_PROLOG + || (arm_except_unwind_info (&global_options) == UI_TARGET + && cfun->can_throw_non_call_exceptions)) + emit_insn (gen_blockage ()); + + /* If the link register is being kept alive, with the return address in it, + then make sure that it does not get reused by the ce2 pass. */ + if ((live_regs_mask & (1 << LR_REGNUM)) == 0) + cfun->machine->lr_save_eliminated = 1; +} + +/* Print condition code to STREAM. Helper function for arm_print_operand. */ +static void +arm_print_condition (FILE *stream) +{ + if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4) + { + /* Branch conversion is not implemented for Thumb-2. */ + if (TARGET_THUMB) + { + output_operand_lossage ("predicated Thumb instruction"); + return; + } + if (current_insn_predicate != NULL) + { + output_operand_lossage + ("predicated instruction in conditional sequence"); + return; + } + + fputs (arm_condition_codes[arm_current_cc], stream); + } + else if (current_insn_predicate) + { + enum arm_cond_code code; + + if (TARGET_THUMB1) + { + output_operand_lossage ("predicated Thumb instruction"); + return; + } + + code = get_arm_condition_code (current_insn_predicate); + fputs (arm_condition_codes[code], stream); + } +} + + +/* If CODE is 'd', then the X is a condition operand and the instruction + should only be executed if the condition is true. + if CODE is 'D', then the X is a condition operand and the instruction + should only be executed if the condition is false: however, if the mode + of the comparison is CCFPEmode, then always execute the instruction -- we + do this because in these circumstances !GE does not necessarily imply LT; + in these cases the instruction pattern will take care to make sure that + an instruction containing %d will follow, thereby undoing the effects of + doing this instruction unconditionally. + If CODE is 'N' then X is a floating point operand that must be negated + before output. + If CODE is 'B' then output a bitwise inverted value of X (a const int). + If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */ +static void +arm_print_operand (FILE *stream, rtx x, int code) +{ + switch (code) + { + case '@': + fputs (ASM_COMMENT_START, stream); + return; + + case '_': + fputs (user_label_prefix, stream); + return; + + case '|': + fputs (REGISTER_PREFIX, stream); + return; + + case '?': + arm_print_condition (stream); + return; + + case '(': + /* Nothing in unified syntax, otherwise the current condition code. */ + if (!TARGET_UNIFIED_ASM) + arm_print_condition (stream); + break; + + case ')': + /* The current condition code in unified syntax, otherwise nothing. */ + if (TARGET_UNIFIED_ASM) + arm_print_condition (stream); + break; + + case '.': + /* The current condition code for a condition code setting instruction. + Preceded by 's' in unified syntax, otherwise followed by 's'. */ + if (TARGET_UNIFIED_ASM) + { + fputc('s', stream); + arm_print_condition (stream); + } + else + { + arm_print_condition (stream); + fputc('s', stream); + } + return; + + case '!': + /* If the instruction is conditionally executed then print + the current condition code, otherwise print 's'. */ + gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM); + if (current_insn_predicate) + arm_print_condition (stream); + else + fputc('s', stream); + break; + + /* %# is a "break" sequence. It doesn't output anything, but is used to + separate e.g. operand numbers from following text, if that text consists + of further digits which we don't want to be part of the operand + number. */ + case '#': + return; + + case 'N': + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + r = real_value_negate (&r); + fprintf (stream, "%s", fp_const_from_val (&r)); + } + return; + + /* An integer or symbol address without a preceding # sign. */ + case 'c': + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + break; + + case SYMBOL_REF: + output_addr_const (stream, x); + break; + + case CONST: + if (GET_CODE (XEXP (x, 0)) == PLUS + && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) + { + output_addr_const (stream, x); + break; + } + /* Fall through. */ + + default: + output_operand_lossage ("Unsupported operand for code '%c'", code); + } + return; + + /* An integer that we want to print in HEX. */ + case 'x': + switch (GET_CODE (x)) + { + case CONST_INT: + fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x)); + break; + + default: + output_operand_lossage ("Unsupported operand for code '%c'", code); + } + return; + + case 'B': + if (CONST_INT_P (x)) + { + HOST_WIDE_INT val; + val = ARM_SIGN_EXTEND (~INTVAL (x)); + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val); + } + else + { + putc ('~', stream); + output_addr_const (stream, x); + } + return; + + case 'L': + /* The low 16 bits of an immediate constant. */ + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); + return; + + case 'i': + fprintf (stream, "%s", arithmetic_instr (x, 1)); + return; + + case 'I': + fprintf (stream, "%s", arithmetic_instr (x, 0)); + return; + + case 'S': + { + HOST_WIDE_INT val; + const char *shift; + + shift = shift_op (x, &val); + + if (shift) + { + fprintf (stream, ", %s ", shift); + if (val == -1) + arm_print_operand (stream, XEXP (x, 1), 0); + else + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); + } + } + return; + + /* An explanation of the 'Q', 'R' and 'H' register operands: + + In a pair of registers containing a DI or DF value the 'Q' + operand returns the register number of the register containing + the least significant part of the value. The 'R' operand returns + the register number of the register containing the most + significant part of the value. + + The 'H' operand returns the higher of the two register numbers. + On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the + same as the 'Q' operand, since the most significant part of the + value is held in the lower number register. The reverse is true + on systems where WORDS_BIG_ENDIAN is false. + + The purpose of these operands is to distinguish between cases + where the endian-ness of the values is important (for example + when they are added together), and cases where the endian-ness + is irrelevant, but the order of register operations is important. + For example when loading a value from memory into a register + pair, the endian-ness does not matter. Provided that the value + from the lower memory address is put into the lower numbered + register, and the value from the higher address is put into the + higher numbered register, the load will work regardless of whether + the value being loaded is big-wordian or little-wordian. The + order of the two register loads can matter however, if the address + of the memory location is actually held in one of the registers + being overwritten by the load. + + The 'Q' and 'R' constraints are also available for 64-bit + constants. */ + case 'Q': + if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) + { + rtx part = gen_lowpart (SImode, x); + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); + return; + } + + if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)); + return; + + case 'R': + if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) + { + enum machine_mode mode = GET_MODE (x); + rtx part; + + if (mode == VOIDmode) + mode = DImode; + part = gen_highpart_mode (SImode, mode, x); + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); + return; + } + + if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)); + return; + + case 'H': + if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + 1); + return; + + case 'J': + if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2)); + return; + + case 'K': + if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3)); + return; + + case 'm': + asm_fprintf (stream, "%r", + REG_P (XEXP (x, 0)) + ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0))); + return; + + case 'M': + asm_fprintf (stream, "{%r-%r}", + REGNO (x), + REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1); + return; + + /* Like 'M', but writing doubleword vector registers, for use by Neon + insns. */ + case 'h': + { + int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2; + int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2; + if (numregs == 1) + asm_fprintf (stream, "{d%d}", regno); + else + asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1); + } + return; + + case 'd': + /* CONST_TRUE_RTX means always -- that's the default. */ + if (x == const_true_rtx) + return; + + if (!COMPARISON_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fputs (arm_condition_codes[get_arm_condition_code (x)], + stream); + return; + + case 'D': + /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever + want to do that. */ + if (x == const_true_rtx) + { + output_operand_lossage ("instruction never executed"); + return; + } + if (!COMPARISON_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE + (get_arm_condition_code (x))], + stream); + return; + + case 's': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + /* Former Maverick support, removed after GCC-4.7. */ + output_operand_lossage ("obsolete Maverick format code '%c'", code); + return; + + case 'U': + if (!REG_P (x) + || REGNO (x) < FIRST_IWMMXT_GR_REGNUM + || REGNO (x) > LAST_IWMMXT_GR_REGNUM) + /* Bad value for wCG register number. */ + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + else + fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM); + return; + + /* Print an iWMMXt control register name. */ + case 'w': + if (!CONST_INT_P (x) + || INTVAL (x) < 0 + || INTVAL (x) >= 16) + /* Bad value for wC register number. */ + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + else + { + static const char * wc_reg_names [16] = + { + "wCID", "wCon", "wCSSF", "wCASF", + "wC4", "wC5", "wC6", "wC7", + "wCGR0", "wCGR1", "wCGR2", "wCGR3", + "wC12", "wC13", "wC14", "wC15" + }; + + fputs (wc_reg_names [INTVAL (x)], stream); + } + return; + + /* Print the high single-precision register of a VFP double-precision + register. */ + case 'p': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 8 || !REG_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_DOUBLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1); + } + return; + + /* Print a VFP/Neon double precision or quad precision register name. */ + case 'P': + case 'q': + { + int mode = GET_MODE (x); + int is_quad = (code == 'q'); + int regno; + + if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + if (!REG_P (x) + || !IS_VFP_REGNUM (REGNO (x))) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno)) + || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno))) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + fprintf (stream, "%c%d", is_quad ? 'q' : 'd', + (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1)); + } + return; + + /* These two codes print the low/high doubleword register of a Neon quad + register, respectively. For pair-structure types, can also print + low/high quadword registers. */ + case 'e': + case 'f': + { + int mode = GET_MODE (x); + int regno; + + if ((GET_MODE_SIZE (mode) != 16 + && GET_MODE_SIZE (mode) != 32) || !REG_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!NEON_REGNO_OK_FOR_QUAD (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + if (GET_MODE_SIZE (mode) == 16) + fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1) + + (code == 'f' ? 1 : 0)); + else + fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2) + + (code == 'f' ? 1 : 0)); + } + return; + + /* Print a VFPv3 floating-point constant, represented as an integer + index. */ + case 'G': + { + int index = vfp3_const_double_index (x); + gcc_assert (index != -1); + fprintf (stream, "%d", index); + } + return; + + /* Print bits representing opcode features for Neon. + + Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed + and polynomials as unsigned. + + Bit 1 is 1 for floats and polynomials, 0 for ordinary integers. + + Bit 2 is 1 for rounding functions, 0 otherwise. */ + + /* Identify the type as 's', 'u', 'p' or 'f'. */ + case 'T': + { + HOST_WIDE_INT bits = INTVAL (x); + fputc ("uspf"[bits & 3], stream); + } + return; + + /* Likewise, but signed and unsigned integers are both 'i'. */ + case 'F': + { + HOST_WIDE_INT bits = INTVAL (x); + fputc ("iipf"[bits & 3], stream); + } + return; + + /* As for 'T', but emit 'u' instead of 'p'. */ + case 't': + { + HOST_WIDE_INT bits = INTVAL (x); + fputc ("usuf"[bits & 3], stream); + } + return; + + /* Bit 2: rounding (vs none). */ + case 'O': + { + HOST_WIDE_INT bits = INTVAL (x); + fputs ((bits & 4) != 0 ? "r" : "", stream); + } + return; + + /* Memory operand for vld1/vst1 instruction. */ + case 'A': + { + rtx addr; + bool postinc = FALSE; + unsigned align, memsize, align_bits; + + gcc_assert (MEM_P (x)); + addr = XEXP (x, 0); + if (GET_CODE (addr) == POST_INC) + { + postinc = 1; + addr = XEXP (addr, 0); + } + asm_fprintf (stream, "[%r", REGNO (addr)); + + /* We know the alignment of this access, so we can emit a hint in the + instruction (for some alignments) as an aid to the memory subsystem + of the target. */ + align = MEM_ALIGN (x) >> 3; + memsize = MEM_SIZE (x); + + /* Only certain alignment specifiers are supported by the hardware. */ + if (memsize == 32 && (align % 32) == 0) + align_bits = 256; + else if ((memsize == 16 || memsize == 32) && (align % 16) == 0) + align_bits = 128; + else if (memsize >= 8 && (align % 8) == 0) + align_bits = 64; + else + align_bits = 0; + + if (align_bits != 0) + asm_fprintf (stream, ":%d", align_bits); + + asm_fprintf (stream, "]"); + + if (postinc) + fputs("!", stream); + } + return; + + case 'C': + { + rtx addr; + + gcc_assert (MEM_P (x)); + addr = XEXP (x, 0); + gcc_assert (REG_P (addr)); + asm_fprintf (stream, "[%r]", REGNO (addr)); + } + return; + + /* Translate an S register number into a D register number and element index. */ + case 'y': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 4 || !REG_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno / 2, regno % 2); + } + return; + + case 'v': + gcc_assert (CONST_DOUBLE_P (x)); + int result; + result = vfp3_const_double_for_fract_bits (x); + if (result == 0) + result = vfp3_const_double_for_bits (x); + fprintf (stream, "#%d", result); + return; + + /* Register specifier for vld1.16/vst1.16. Translate the S register + number into a D register number and element index. */ + case 'z': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 2 || !REG_P (x)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); + } + return; + + default: + if (x == 0) + { + output_operand_lossage ("missing operand"); + return; + } + + switch (GET_CODE (x)) + { + case REG: + asm_fprintf (stream, "%r", REGNO (x)); + break; + + case MEM: + output_memory_reference_mode = GET_MODE (x); + output_address (XEXP (x, 0)); + break; + + case CONST_DOUBLE: + if (TARGET_NEON) + { + char fpstr[20]; + real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x), + sizeof (fpstr), 0, 1); + fprintf (stream, "#%s", fpstr); + } + else + fprintf (stream, "#%s", fp_immediate_constant (x)); + break; + + default: + gcc_assert (GET_CODE (x) != NEG); + fputc ('#', stream); + if (GET_CODE (x) == HIGH) + { + fputs (":lower16:", stream); + x = XEXP (x, 0); + } + + output_addr_const (stream, x); + break; + } + } +} + +/* Target hook for printing a memory address. */ +static void +arm_print_operand_address (FILE *stream, rtx x) +{ + if (TARGET_32BIT) + { + int is_minus = GET_CODE (x) == MINUS; + + if (REG_P (x)) + asm_fprintf (stream, "[%r]", REGNO (x)); + else if (GET_CODE (x) == PLUS || is_minus) + { + rtx base = XEXP (x, 0); + rtx index = XEXP (x, 1); + HOST_WIDE_INT offset = 0; + if (!REG_P (base) + || (REG_P (index) && REGNO (index) == SP_REGNUM)) + { + /* Ensure that BASE is a register. */ + /* (one of them must be). */ + /* Also ensure the SP is not used as in index register. */ + rtx temp = base; + base = index; + index = temp; + } + switch (GET_CODE (index)) + { + case CONST_INT: + offset = INTVAL (index); + if (is_minus) + offset = -offset; + asm_fprintf (stream, "[%r, #%wd]", + REGNO (base), offset); + break; + + case REG: + asm_fprintf (stream, "[%r, %s%r]", + REGNO (base), is_minus ? "-" : "", + REGNO (index)); + break; + + case MULT: + case ASHIFTRT: + case LSHIFTRT: + case ASHIFT: + case ROTATERT: + { + asm_fprintf (stream, "[%r, %s%r", + REGNO (base), is_minus ? "-" : "", + REGNO (XEXP (index, 0))); + arm_print_operand (stream, index, 'S'); + fputs ("]", stream); + break; + } + + default: + gcc_unreachable (); + } + } + else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC + || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC) + { + extern enum machine_mode output_memory_reference_mode; + + gcc_assert (REG_P (XEXP (x, 0))); + + if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC) + asm_fprintf (stream, "[%r, #%s%d]!", + REGNO (XEXP (x, 0)), + GET_CODE (x) == PRE_DEC ? "-" : "", + GET_MODE_SIZE (output_memory_reference_mode)); + else + asm_fprintf (stream, "[%r], #%s%d", + REGNO (XEXP (x, 0)), + GET_CODE (x) == POST_DEC ? "-" : "", + GET_MODE_SIZE (output_memory_reference_mode)); + } + else if (GET_CODE (x) == PRE_MODIFY) + { + asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0))); + if (CONST_INT_P (XEXP (XEXP (x, 1), 1))) + asm_fprintf (stream, "#%wd]!", + INTVAL (XEXP (XEXP (x, 1), 1))); + else + asm_fprintf (stream, "%r]!", + REGNO (XEXP (XEXP (x, 1), 1))); + } + else if (GET_CODE (x) == POST_MODIFY) + { + asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0))); + if (CONST_INT_P (XEXP (XEXP (x, 1), 1))) + asm_fprintf (stream, "#%wd", + INTVAL (XEXP (XEXP (x, 1), 1))); + else + asm_fprintf (stream, "%r", + REGNO (XEXP (XEXP (x, 1), 1))); + } + else output_addr_const (stream, x); + } + else + { + if (REG_P (x)) + asm_fprintf (stream, "[%r]", REGNO (x)); + else if (GET_CODE (x) == POST_INC) + asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0))); + else if (GET_CODE (x) == PLUS) + { + gcc_assert (REG_P (XEXP (x, 0))); + if (CONST_INT_P (XEXP (x, 1))) + asm_fprintf (stream, "[%r, #%wd]", + REGNO (XEXP (x, 0)), + INTVAL (XEXP (x, 1))); + else + asm_fprintf (stream, "[%r, %r]", + REGNO (XEXP (x, 0)), + REGNO (XEXP (x, 1))); + } + else + output_addr_const (stream, x); + } +} + +/* Target hook for indicating whether a punctuation character for + TARGET_PRINT_OPERAND is valid. */ +static bool +arm_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '@' || code == '|' || code == '.' + || code == '(' || code == ')' || code == '#' + || (TARGET_32BIT && (code == '?')) + || (TARGET_THUMB2 && (code == '!')) + || (TARGET_THUMB && (code == '_'))); +} + +/* Target hook for assembling integer objects. The ARM version needs to + handle word-sized values specially. */ +static bool +arm_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + enum machine_mode mode; + + if (size == UNITS_PER_WORD && aligned_p) + { + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, x); + + /* Mark symbols as position independent. We only do this in the + .text segment, not in the .data segment. */ + if (NEED_GOT_RELOC && flag_pic && making_const_table && + (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)) + { + /* See legitimize_pic_address for an explanation of the + TARGET_VXWORKS_RTP check. */ + if (!arm_pic_data_is_text_relative + || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x))) + fputs ("(GOT)", asm_out_file); + else + fputs ("(GOTOFF)", asm_out_file); + } + fputc ('\n', asm_out_file); + return true; + } + + mode = GET_MODE (x); + + if (arm_vector_mode_supported_p (mode)) + { + int i, units; + + gcc_assert (GET_CODE (x) == CONST_VECTOR); + + units = CONST_VECTOR_NUNITS (x); + size = GET_MODE_SIZE (GET_MODE_INNER (mode)); + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + for (i = 0; i < units; i++) + { + rtx elt = CONST_VECTOR_ELT (x, i); + assemble_integer + (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1); + } + else + for (i = 0; i < units; i++) + { + rtx elt = CONST_VECTOR_ELT (x, i); + REAL_VALUE_TYPE rval; + + REAL_VALUE_FROM_CONST_DOUBLE (rval, elt); + + assemble_real + (rval, GET_MODE_INNER (mode), + i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT); + } + + return true; + } + + return default_assemble_integer (x, size, aligned_p); +} + +static void +arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor) +{ + section *s; + + if (!TARGET_AAPCS_BASED) + { + (is_ctor ? + default_named_section_asm_out_constructor + : default_named_section_asm_out_destructor) (symbol, priority); + return; + } + + /* Put these in the .init_array section, using a special relocation. */ + if (priority != DEFAULT_INIT_PRIORITY) + { + char buf[18]; + sprintf (buf, "%s.%.5u", + is_ctor ? ".init_array" : ".fini_array", + priority); + s = get_section (buf, SECTION_WRITE, NULL_TREE); + } + else if (is_ctor) + s = ctors_section; + else + s = dtors_section; + + switch_to_section (s); + assemble_align (POINTER_SIZE); + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, symbol); + fputs ("(target1)\n", asm_out_file); +} + +/* Add a function to the list of static constructors. */ + +static void +arm_elf_asm_constructor (rtx symbol, int priority) +{ + arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true); +} + +/* Add a function to the list of static destructors. */ + +static void +arm_elf_asm_destructor (rtx symbol, int priority) +{ + arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false); +} + +/* A finite state machine takes care of noticing whether or not instructions + can be conditionally executed, and thus decrease execution time and code + size by deleting branch instructions. The fsm is controlled by + final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */ + +/* The state of the fsm controlling condition codes are: + 0: normal, do nothing special + 1: make ASM_OUTPUT_OPCODE not output this instruction + 2: make ASM_OUTPUT_OPCODE not output this instruction + 3: make instructions conditional + 4: make instructions conditional + + State transitions (state->state by whom under condition): + 0 -> 1 final_prescan_insn if the `target' is a label + 0 -> 2 final_prescan_insn if the `target' is an unconditional branch + 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch + 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch + 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached + (the target label has CODE_LABEL_NUMBER equal to arm_target_label). + 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached + (the target insn is arm_target_insn). + + If the jump clobbers the conditions then we use states 2 and 4. + + A similar thing can be done with conditional return insns. + + XXX In case the `target' is an unconditional branch, this conditionalising + of the instructions always reduces code size, but not always execution + time. But then, I want to reduce the code size to somewhere near what + /bin/cc produces. */ + +/* In addition to this, state is maintained for Thumb-2 COND_EXEC + instructions. When a COND_EXEC instruction is seen the subsequent + instructions are scanned so that multiple conditional instructions can be + combined into a single IT block. arm_condexec_count and arm_condexec_mask + specify the length and true/false mask for the IT block. These will be + decremented/zeroed by arm_asm_output_opcode as the insns are output. */ + +/* Returns the index of the ARM condition code string in + `arm_condition_codes', or ARM_NV if the comparison is invalid. + COMPARISON should be an rtx like `(eq (...) (...))'. */ + +enum arm_cond_code +maybe_get_arm_condition_code (rtx comparison) +{ + enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); + enum arm_cond_code code; + enum rtx_code comp_code = GET_CODE (comparison); + + if (GET_MODE_CLASS (mode) != MODE_CC) + mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0), + XEXP (comparison, 1)); + + switch (mode) + { + case CC_DNEmode: code = ARM_NE; goto dominance; + case CC_DEQmode: code = ARM_EQ; goto dominance; + case CC_DGEmode: code = ARM_GE; goto dominance; + case CC_DGTmode: code = ARM_GT; goto dominance; + case CC_DLEmode: code = ARM_LE; goto dominance; + case CC_DLTmode: code = ARM_LT; goto dominance; + case CC_DGEUmode: code = ARM_CS; goto dominance; + case CC_DGTUmode: code = ARM_HI; goto dominance; + case CC_DLEUmode: code = ARM_LS; goto dominance; + case CC_DLTUmode: code = ARM_CC; + + dominance: + if (comp_code == EQ) + return ARM_INVERSE_CONDITION_CODE (code); + if (comp_code == NE) + return code; + return ARM_NV; + + case CC_NOOVmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GE: return ARM_PL; + case LT: return ARM_MI; + default: return ARM_NV; + } + + case CC_Zmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + default: return ARM_NV; + } + + case CC_Nmode: + switch (comp_code) + { + case NE: return ARM_MI; + case EQ: return ARM_PL; + default: return ARM_NV; + } + + case CCFPEmode: + case CCFPmode: + /* We can handle all cases except UNEQ and LTGT. */ + switch (comp_code) + { + case GE: return ARM_GE; + case GT: return ARM_GT; + case LE: return ARM_LS; + case LT: return ARM_MI; + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case ORDERED: return ARM_VC; + case UNORDERED: return ARM_VS; + case UNLT: return ARM_LT; + case UNLE: return ARM_LE; + case UNGT: return ARM_HI; + case UNGE: return ARM_PL; + /* UNEQ and LTGT do not have a representation. */ + case UNEQ: /* Fall through. */ + case LTGT: /* Fall through. */ + default: return ARM_NV; + } + + case CC_SWPmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GE: return ARM_LE; + case GT: return ARM_LT; + case LE: return ARM_GE; + case LT: return ARM_GT; + case GEU: return ARM_LS; + case GTU: return ARM_CC; + case LEU: return ARM_CS; + case LTU: return ARM_HI; + default: return ARM_NV; + } + + case CC_Cmode: + switch (comp_code) + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; + default: return ARM_NV; + } + + case CC_CZmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GEU: return ARM_CS; + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; + default: return ARM_NV; + } + + case CC_NCVmode: + switch (comp_code) + { + case GE: return ARM_GE; + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; + default: return ARM_NV; + } + + case CCmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GE: return ARM_GE; + case GT: return ARM_GT; + case LE: return ARM_LE; + case LT: return ARM_LT; + case GEU: return ARM_CS; + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; + default: return ARM_NV; + } + + default: gcc_unreachable (); + } +} + +/* Like maybe_get_arm_condition_code, but never return ARM_NV. */ +static enum arm_cond_code +get_arm_condition_code (rtx comparison) +{ + enum arm_cond_code code = maybe_get_arm_condition_code (comparison); + gcc_assert (code != ARM_NV); + return code; +} + +/* Tell arm_asm_output_opcode to output IT blocks for conditionally executed + instructions. */ +void +thumb2_final_prescan_insn (rtx insn) +{ + rtx first_insn = insn; + rtx body = PATTERN (insn); + rtx predicate; + enum arm_cond_code code; + int n; + int mask; + int max; + + /* max_insns_skipped in the tune was already taken into account in the + cost model of ifcvt pass when generating COND_EXEC insns. At this stage + just emit the IT blocks as we can. It does not make sense to split + the IT blocks. */ + max = MAX_INSN_PER_IT_BLOCK; + + /* Remove the previous insn from the count of insns to be output. */ + if (arm_condexec_count) + arm_condexec_count--; + + /* Nothing to do if we are already inside a conditional block. */ + if (arm_condexec_count) + return; + + if (GET_CODE (body) != COND_EXEC) + return; + + /* Conditional jumps are implemented directly. */ + if (JUMP_P (insn)) + return; + + predicate = COND_EXEC_TEST (body); + arm_current_cc = get_arm_condition_code (predicate); + + n = get_attr_ce_count (insn); + arm_condexec_count = 1; + arm_condexec_mask = (1 << n) - 1; + arm_condexec_masklen = n; + /* See if subsequent instructions can be combined into the same block. */ + for (;;) + { + insn = next_nonnote_insn (insn); + + /* Jumping into the middle of an IT block is illegal, so a label or + barrier terminates the block. */ + if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn)) + break; + + body = PATTERN (insn); + /* USE and CLOBBER aren't really insns, so just skip them. */ + if (GET_CODE (body) == USE + || GET_CODE (body) == CLOBBER) + continue; + + /* ??? Recognize conditional jumps, and combine them with IT blocks. */ + if (GET_CODE (body) != COND_EXEC) + break; + /* Maximum number of conditionally executed instructions in a block. */ + n = get_attr_ce_count (insn); + if (arm_condexec_masklen + n > max) + break; + + predicate = COND_EXEC_TEST (body); + code = get_arm_condition_code (predicate); + mask = (1 << n) - 1; + if (arm_current_cc == code) + arm_condexec_mask |= (mask << arm_condexec_masklen); + else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code)) + break; + + arm_condexec_count++; + arm_condexec_masklen += n; + + /* A jump must be the last instruction in a conditional block. */ + if (JUMP_P (insn)) + break; + } + /* Restore recog_data (getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call). */ + extract_constrain_insn_cached (first_insn); +} + +void +arm_final_prescan_insn (rtx insn) +{ + /* BODY will hold the body of INSN. */ + rtx body = PATTERN (insn); + + /* This will be 1 if trying to repeat the trick, and things need to be + reversed if it appears to fail. */ + int reverse = 0; + + /* If we start with a return insn, we only succeed if we find another one. */ + int seeking_return = 0; + enum rtx_code return_code = UNKNOWN; + + /* START_INSN will hold the insn from where we start looking. This is the + first insn after the following code_label if REVERSE is true. */ + rtx start_insn = insn; + + /* If in state 4, check if the target branch is reached, in order to + change back to state 0. */ + if (arm_ccfsm_state == 4) + { + if (insn == arm_target_insn) + { + arm_target_insn = NULL; + arm_ccfsm_state = 0; + } + return; + } + + /* If in state 3, it is possible to repeat the trick, if this insn is an + unconditional branch to a label, and immediately following this branch + is the previous target label which is only used once, and the label this + branch jumps to is not too far off. */ + if (arm_ccfsm_state == 3) + { + if (simplejump_p (insn)) + { + start_insn = next_nonnote_insn (start_insn); + if (BARRIER_P (start_insn)) + { + /* XXX Isn't this always a barrier? */ + start_insn = next_nonnote_insn (start_insn); + } + if (LABEL_P (start_insn) + && CODE_LABEL_NUMBER (start_insn) == arm_target_label + && LABEL_NUSES (start_insn) == 1) + reverse = TRUE; + else + return; + } + else if (ANY_RETURN_P (body)) + { + start_insn = next_nonnote_insn (start_insn); + if (BARRIER_P (start_insn)) + start_insn = next_nonnote_insn (start_insn); + if (LABEL_P (start_insn) + && CODE_LABEL_NUMBER (start_insn) == arm_target_label + && LABEL_NUSES (start_insn) == 1) + { + reverse = TRUE; + seeking_return = 1; + return_code = GET_CODE (body); + } + else + return; + } + else + return; + } + + gcc_assert (!arm_ccfsm_state || reverse); + if (!JUMP_P (insn)) + return; + + /* This jump might be paralleled with a clobber of the condition codes + the jump should always come first */ + if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0) + body = XVECEXP (body, 0, 0); + + if (reverse + || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)) + { + int insns_skipped; + int fail = FALSE, succeed = FALSE; + /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */ + int then_not_else = TRUE; + rtx this_insn = start_insn, label = 0; + + /* Register the insn jumped to. */ + if (reverse) + { + if (!seeking_return) + label = XEXP (SET_SRC (body), 0); + } + else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF) + label = XEXP (XEXP (SET_SRC (body), 1), 0); + else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF) + { + label = XEXP (XEXP (SET_SRC (body), 2), 0); + then_not_else = FALSE; + } + else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1))) + { + seeking_return = 1; + return_code = GET_CODE (XEXP (SET_SRC (body), 1)); + } + else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2))) + { + seeking_return = 1; + return_code = GET_CODE (XEXP (SET_SRC (body), 2)); + then_not_else = FALSE; + } + else + gcc_unreachable (); + + /* See how many insns this branch skips, and what kind of insns. If all + insns are okay, and the label or unconditional branch to the same + label is not too far away, succeed. */ + for (insns_skipped = 0; + !fail && !succeed && insns_skipped++ < max_insns_skipped;) + { + rtx scanbody; + + this_insn = next_nonnote_insn (this_insn); + if (!this_insn) + break; + + switch (GET_CODE (this_insn)) + { + case CODE_LABEL: + /* Succeed if it is the target label, otherwise fail since + control falls in from somewhere else. */ + if (this_insn == label) + { + arm_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case BARRIER: + /* Succeed if the following insn is the target label. + Otherwise fail. + If return insns are used then the last insn in a function + will be a barrier. */ + this_insn = next_nonnote_insn (this_insn); + if (this_insn && this_insn == label) + { + arm_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case CALL_INSN: + /* The AAPCS says that conditional calls should not be + used since they make interworking inefficient (the + linker can't transform BL into BLX). That's + only a problem if the machine has BLX. */ + if (arm_arch5) + { + fail = TRUE; + break; + } + + /* Succeed if the following insn is the target label, or + if the following two insns are a barrier and the + target label. */ + this_insn = next_nonnote_insn (this_insn); + if (this_insn && BARRIER_P (this_insn)) + this_insn = next_nonnote_insn (this_insn); + + if (this_insn && this_insn == label + && insns_skipped < max_insns_skipped) + { + arm_ccfsm_state = 1; + succeed = TRUE; + } + else + fail = TRUE; + break; + + case JUMP_INSN: + /* If this is an unconditional branch to the same label, succeed. + If it is to another label, do nothing. If it is conditional, + fail. */ + /* XXX Probably, the tests for SET and the PC are + unnecessary. */ + + scanbody = PATTERN (this_insn); + if (GET_CODE (scanbody) == SET + && GET_CODE (SET_DEST (scanbody)) == PC) + { + if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF + && XEXP (SET_SRC (scanbody), 0) == label && !reverse) + { + arm_ccfsm_state = 2; + succeed = TRUE; + } + else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE) + fail = TRUE; + } + /* Fail if a conditional return is undesirable (e.g. on a + StrongARM), but still allow this if optimizing for size. */ + else if (GET_CODE (scanbody) == return_code + && !use_return_insn (TRUE, NULL) + && !optimize_size) + fail = TRUE; + else if (GET_CODE (scanbody) == return_code) + { + arm_ccfsm_state = 2; + succeed = TRUE; + } + else if (GET_CODE (scanbody) == PARALLEL) + { + switch (get_attr_conds (this_insn)) + { + case CONDS_NOCOND: + break; + default: + fail = TRUE; + break; + } + } + else + fail = TRUE; /* Unrecognized jump (e.g. epilogue). */ + + break; + + case INSN: + /* Instructions using or affecting the condition codes make it + fail. */ + scanbody = PATTERN (this_insn); + if (!(GET_CODE (scanbody) == SET + || GET_CODE (scanbody) == PARALLEL) + || get_attr_conds (this_insn) != CONDS_NOCOND) + fail = TRUE; + break; + + default: + break; + } + } + if (succeed) + { + if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse)) + arm_target_label = CODE_LABEL_NUMBER (label); + else + { + gcc_assert (seeking_return || arm_ccfsm_state == 2); + + while (this_insn && GET_CODE (PATTERN (this_insn)) == USE) + { + this_insn = next_nonnote_insn (this_insn); + gcc_assert (!this_insn + || (!BARRIER_P (this_insn) + && !LABEL_P (this_insn))); + } + if (!this_insn) + { + /* Oh, dear! we ran off the end.. give up. */ + extract_constrain_insn_cached (insn); + arm_ccfsm_state = 0; + arm_target_insn = NULL; + return; + } + arm_target_insn = this_insn; + } + + /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from + what it was. */ + if (!reverse) + arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0)); + + if (reverse || then_not_else) + arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc); + } + + /* Restore recog_data (getting the attributes of other insns can + destroy this array, but final.c assumes that it remains intact + across this call. */ + extract_constrain_insn_cached (insn); + } +} + +/* Output IT instructions. */ +void +thumb2_asm_output_opcode (FILE * stream) +{ + char buff[5]; + int n; + + if (arm_condexec_mask) + { + for (n = 0; n < arm_condexec_masklen; n++) + buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e'; + buff[n] = 0; + asm_fprintf(stream, "i%s\t%s\n\t", buff, + arm_condition_codes[arm_current_cc]); + arm_condexec_mask = 0; + } +} + +/* Returns true if REGNO is a valid register + for holding a quantity of type MODE. */ +int +arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) +{ + if (GET_MODE_CLASS (mode) == MODE_CC) + return (regno == CC_REGNUM + || (TARGET_HARD_FLOAT && TARGET_VFP + && regno == VFPCC_REGNUM)); + + if (TARGET_THUMB1) + /* For the Thumb we only allow values bigger than SImode in + registers 0 - 6, so that there is always a second low + register available to hold the upper part of the value. + We probably we ought to ensure that the register is the + start of an even numbered register pair. */ + return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM); + + if (TARGET_HARD_FLOAT && TARGET_VFP + && IS_VFP_REGNUM (regno)) + { + if (mode == SFmode || mode == SImode) + return VFP_REGNO_OK_FOR_SINGLE (regno); + + if (mode == DFmode) + return VFP_REGNO_OK_FOR_DOUBLE (regno); + + /* VFP registers can hold HFmode values, but there is no point in + putting them there unless we have hardware conversion insns. */ + if (mode == HFmode) + return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); + + if (TARGET_NEON) + return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) + || (VALID_NEON_QREG_MODE (mode) + && NEON_REGNO_OK_FOR_QUAD (regno)) + || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2)) + || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3)) + || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4)) + || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6)) + || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)); + + return FALSE; + } + + if (TARGET_REALLY_IWMMXT) + { + if (IS_IWMMXT_GR_REGNUM (regno)) + return mode == SImode; + + if (IS_IWMMXT_REGNUM (regno)) + return VALID_IWMMXT_REG_MODE (mode); + } + + /* We allow almost any value to be stored in the general registers. + Restrict doubleword quantities to even register pairs so that we can + use ldrd. Do not allow very large Neon structure opaque modes in + general registers; they would use too many. */ + if (regno <= LAST_ARM_REGNUM) + return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) + && ARM_NUM_REGS (mode) <= 4; + + if (regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + /* We only allow integers in the fake hard registers. */ + return GET_MODE_CLASS (mode) == MODE_INT; + + return FALSE; +} + +/* Implement MODES_TIEABLE_P. */ + +bool +arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +{ + if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) + return true; + + /* We specifically want to allow elements of "structure" modes to + be tieable to the structure. This more general condition allows + other rarer situations too. */ + if (TARGET_NEON + && (VALID_NEON_DREG_MODE (mode1) + || VALID_NEON_QREG_MODE (mode1) + || VALID_NEON_STRUCT_MODE (mode1)) + && (VALID_NEON_DREG_MODE (mode2) + || VALID_NEON_QREG_MODE (mode2) + || VALID_NEON_STRUCT_MODE (mode2))) + return true; + + return false; +} + +/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are + not used in arm mode. */ + +enum reg_class +arm_regno_class (int regno) +{ + if (TARGET_THUMB1) + { + if (regno == STACK_POINTER_REGNUM) + return STACK_REG; + if (regno == CC_REGNUM) + return CC_REG; + if (regno < 8) + return LO_REGS; + return HI_REGS; + } + + if (TARGET_THUMB2 && regno < 8) + return LO_REGS; + + if ( regno <= LAST_ARM_REGNUM + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS; + + if (regno == CC_REGNUM || regno == VFPCC_REGNUM) + return TARGET_THUMB2 ? CC_REG : NO_REGS; + + if (IS_VFP_REGNUM (regno)) + { + if (regno <= D7_VFP_REGNUM) + return VFP_D0_D7_REGS; + else if (regno <= LAST_LO_VFP_REGNUM) + return VFP_LO_REGS; + else + return VFP_HI_REGS; + } + + if (IS_IWMMXT_REGNUM (regno)) + return IWMMXT_REGS; + + if (IS_IWMMXT_GR_REGNUM (regno)) + return IWMMXT_GR_REGS; + + return NO_REGS; +} + +/* Handle a special case when computing the offset + of an argument from the frame pointer. */ +int +arm_debugger_arg_offset (int value, rtx addr) +{ + rtx insn; + + /* We are only interested if dbxout_parms() failed to compute the offset. */ + if (value != 0) + return 0; + + /* We can only cope with the case where the address is held in a register. */ + if (!REG_P (addr)) + return 0; + + /* If we are using the frame pointer to point at the argument, then + an offset of 0 is correct. */ + if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM) + return 0; + + /* If we are using the stack pointer to point at the + argument, then an offset of 0 is correct. */ + /* ??? Check this is consistent with thumb2 frame layout. */ + if ((TARGET_THUMB || !frame_pointer_needed) + && REGNO (addr) == SP_REGNUM) + return 0; + + /* Oh dear. The argument is pointed to by a register rather + than being held in a register, or being stored at a known + offset from the frame pointer. Since GDB only understands + those two kinds of argument we must translate the address + held in the register into an offset from the frame pointer. + We do this by searching through the insns for the function + looking to see where this register gets its value. If the + register is initialized from the frame pointer plus an offset + then we are in luck and we can continue, otherwise we give up. + + This code is exercised by producing debugging information + for a function with arguments like this: + + double func (double a, double b, int c, double d) {return d;} + + Without this code the stab for parameter 'd' will be set to + an offset of 0 from the frame pointer, rather than 8. */ + + /* The if() statement says: + + If the insn is a normal instruction + and if the insn is setting the value in a register + and if the register being set is the register holding the address of the argument + and if the address is computing by an addition + that involves adding to a register + which is the frame pointer + a constant integer + + then... */ + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if ( NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) == SET + && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr) + && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS + && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0)) + && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM + && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1)) + ) + { + value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1)); + + break; + } + } + + if (value == 0) + { + debug_rtx (addr); + warning (0, "unable to compute real location of stacked parameter"); + value = 8; /* XXX magic hack */ + } + + return value; +} + +typedef enum { + T_V8QI, + T_V4HI, + T_V4HF, + T_V2SI, + T_V2SF, + T_DI, + T_V16QI, + T_V8HI, + T_V4SI, + T_V4SF, + T_V2DI, + T_TI, + T_EI, + T_OI, + T_MAX /* Size of enum. Keep last. */ +} neon_builtin_type_mode; + +#define TYPE_MODE_BIT(X) (1 << (X)) + +#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \ + | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \ + | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI)) +#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \ + | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \ + | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI)) + +#define v8qi_UP T_V8QI +#define v4hi_UP T_V4HI +#define v4hf_UP T_V4HF +#define v2si_UP T_V2SI +#define v2sf_UP T_V2SF +#define di_UP T_DI +#define v16qi_UP T_V16QI +#define v8hi_UP T_V8HI +#define v4si_UP T_V4SI +#define v4sf_UP T_V4SF +#define v2di_UP T_V2DI +#define ti_UP T_TI +#define ei_UP T_EI +#define oi_UP T_OI + +#define UP(X) X##_UP + +typedef enum { + NEON_BINOP, + NEON_TERNOP, + NEON_UNOP, + NEON_GETLANE, + NEON_SETLANE, + NEON_CREATE, + NEON_RINT, + NEON_DUP, + NEON_DUPLANE, + NEON_COMBINE, + NEON_SPLIT, + NEON_LANEMUL, + NEON_LANEMULL, + NEON_LANEMULH, + NEON_LANEMAC, + NEON_SCALARMUL, + NEON_SCALARMULL, + NEON_SCALARMULH, + NEON_SCALARMAC, + NEON_CONVERT, + NEON_FLOAT_WIDEN, + NEON_FLOAT_NARROW, + NEON_FIXCONV, + NEON_SELECT, + NEON_RESULTPAIR, + NEON_REINTERP, + NEON_VTBL, + NEON_VTBX, + NEON_LOAD1, + NEON_LOAD1LANE, + NEON_STORE1, + NEON_STORE1LANE, + NEON_LOADSTRUCT, + NEON_LOADSTRUCTLANE, + NEON_STORESTRUCT, + NEON_STORESTRUCTLANE, + NEON_LOGICBINOP, + NEON_SHIFTINSERT, + NEON_SHIFTIMM, + NEON_SHIFTACC +} neon_itype; + +typedef struct { + const char *name; + const neon_itype itype; + const neon_builtin_type_mode mode; + const enum insn_code code; + unsigned int fcode; +} neon_builtin_datum; + +#define CF(N,X) CODE_FOR_neon_##N##X + +#define VAR1(T, N, A) \ + {#N, NEON_##T, UP (A), CF (N, A), 0} +#define VAR2(T, N, A, B) \ + VAR1 (T, N, A), \ + {#N, NEON_##T, UP (B), CF (N, B), 0} +#define VAR3(T, N, A, B, C) \ + VAR2 (T, N, A, B), \ + {#N, NEON_##T, UP (C), CF (N, C), 0} +#define VAR4(T, N, A, B, C, D) \ + VAR3 (T, N, A, B, C), \ + {#N, NEON_##T, UP (D), CF (N, D), 0} +#define VAR5(T, N, A, B, C, D, E) \ + VAR4 (T, N, A, B, C, D), \ + {#N, NEON_##T, UP (E), CF (N, E), 0} +#define VAR6(T, N, A, B, C, D, E, F) \ + VAR5 (T, N, A, B, C, D, E), \ + {#N, NEON_##T, UP (F), CF (N, F), 0} +#define VAR7(T, N, A, B, C, D, E, F, G) \ + VAR6 (T, N, A, B, C, D, E, F), \ + {#N, NEON_##T, UP (G), CF (N, G), 0} +#define VAR8(T, N, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, A, B, C, D, E, F, G), \ + {#N, NEON_##T, UP (H), CF (N, H), 0} +#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, A, B, C, D, E, F, G, H), \ + {#N, NEON_##T, UP (I), CF (N, I), 0} +#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ + {#N, NEON_##T, UP (J), CF (N, J), 0} + +/* The NEON builtin data can be found in arm_neon_builtins.def. + The mode entries in the following table correspond to the "key" type of the + instruction variant, i.e. equivalent to that which would be specified after + the assembler mnemonic, which usually refers to the last vector operand. + (Signed/unsigned/polynomial types are not differentiated between though, and + are all mapped onto the same mode for a given element size.) The modes + listed per instruction should be the same as those defined for that + instruction's pattern in neon.md. */ + +static neon_builtin_datum neon_builtin_data[] = +{ +#include "arm_neon_builtins.def" +}; + +#undef CF +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 + +#define CF(N,X) ARM_BUILTIN_NEON_##N##X +#define VAR1(T, N, A) \ + CF (N, A) +#define VAR2(T, N, A, B) \ + VAR1 (T, N, A), \ + CF (N, B) +#define VAR3(T, N, A, B, C) \ + VAR2 (T, N, A, B), \ + CF (N, C) +#define VAR4(T, N, A, B, C, D) \ + VAR3 (T, N, A, B, C), \ + CF (N, D) +#define VAR5(T, N, A, B, C, D, E) \ + VAR4 (T, N, A, B, C, D), \ + CF (N, E) +#define VAR6(T, N, A, B, C, D, E, F) \ + VAR5 (T, N, A, B, C, D, E), \ + CF (N, F) +#define VAR7(T, N, A, B, C, D, E, F, G) \ + VAR6 (T, N, A, B, C, D, E, F), \ + CF (N, G) +#define VAR8(T, N, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, A, B, C, D, E, F, G), \ + CF (N, H) +#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, A, B, C, D, E, F, G, H), \ + CF (N, I) +#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, A, B, C, D, E, F, G, H, I), \ + CF (N, J) +enum arm_builtins +{ + ARM_BUILTIN_GETWCGR0, + ARM_BUILTIN_GETWCGR1, + ARM_BUILTIN_GETWCGR2, + ARM_BUILTIN_GETWCGR3, + + ARM_BUILTIN_SETWCGR0, + ARM_BUILTIN_SETWCGR1, + ARM_BUILTIN_SETWCGR2, + ARM_BUILTIN_SETWCGR3, + + ARM_BUILTIN_WZERO, + + ARM_BUILTIN_WAVG2BR, + ARM_BUILTIN_WAVG2HR, + ARM_BUILTIN_WAVG2B, + ARM_BUILTIN_WAVG2H, + + ARM_BUILTIN_WACCB, + ARM_BUILTIN_WACCH, + ARM_BUILTIN_WACCW, + + ARM_BUILTIN_WMACS, + ARM_BUILTIN_WMACSZ, + ARM_BUILTIN_WMACU, + ARM_BUILTIN_WMACUZ, + + ARM_BUILTIN_WSADB, + ARM_BUILTIN_WSADBZ, + ARM_BUILTIN_WSADH, + ARM_BUILTIN_WSADHZ, + + ARM_BUILTIN_WALIGNI, + ARM_BUILTIN_WALIGNR0, + ARM_BUILTIN_WALIGNR1, + ARM_BUILTIN_WALIGNR2, + ARM_BUILTIN_WALIGNR3, + + ARM_BUILTIN_TMIA, + ARM_BUILTIN_TMIAPH, + ARM_BUILTIN_TMIABB, + ARM_BUILTIN_TMIABT, + ARM_BUILTIN_TMIATB, + ARM_BUILTIN_TMIATT, + + ARM_BUILTIN_TMOVMSKB, + ARM_BUILTIN_TMOVMSKH, + ARM_BUILTIN_TMOVMSKW, + + ARM_BUILTIN_TBCSTB, + ARM_BUILTIN_TBCSTH, + ARM_BUILTIN_TBCSTW, + + ARM_BUILTIN_WMADDS, + ARM_BUILTIN_WMADDU, + + ARM_BUILTIN_WPACKHSS, + ARM_BUILTIN_WPACKWSS, + ARM_BUILTIN_WPACKDSS, + ARM_BUILTIN_WPACKHUS, + ARM_BUILTIN_WPACKWUS, + ARM_BUILTIN_WPACKDUS, + + ARM_BUILTIN_WADDB, + ARM_BUILTIN_WADDH, + ARM_BUILTIN_WADDW, + ARM_BUILTIN_WADDSSB, + ARM_BUILTIN_WADDSSH, + ARM_BUILTIN_WADDSSW, + ARM_BUILTIN_WADDUSB, + ARM_BUILTIN_WADDUSH, + ARM_BUILTIN_WADDUSW, + ARM_BUILTIN_WSUBB, + ARM_BUILTIN_WSUBH, + ARM_BUILTIN_WSUBW, + ARM_BUILTIN_WSUBSSB, + ARM_BUILTIN_WSUBSSH, + ARM_BUILTIN_WSUBSSW, + ARM_BUILTIN_WSUBUSB, + ARM_BUILTIN_WSUBUSH, + ARM_BUILTIN_WSUBUSW, + + ARM_BUILTIN_WAND, + ARM_BUILTIN_WANDN, + ARM_BUILTIN_WOR, + ARM_BUILTIN_WXOR, + + ARM_BUILTIN_WCMPEQB, + ARM_BUILTIN_WCMPEQH, + ARM_BUILTIN_WCMPEQW, + ARM_BUILTIN_WCMPGTUB, + ARM_BUILTIN_WCMPGTUH, + ARM_BUILTIN_WCMPGTUW, + ARM_BUILTIN_WCMPGTSB, + ARM_BUILTIN_WCMPGTSH, + ARM_BUILTIN_WCMPGTSW, + + ARM_BUILTIN_TEXTRMSB, + ARM_BUILTIN_TEXTRMSH, + ARM_BUILTIN_TEXTRMSW, + ARM_BUILTIN_TEXTRMUB, + ARM_BUILTIN_TEXTRMUH, + ARM_BUILTIN_TEXTRMUW, + ARM_BUILTIN_TINSRB, + ARM_BUILTIN_TINSRH, + ARM_BUILTIN_TINSRW, + + ARM_BUILTIN_WMAXSW, + ARM_BUILTIN_WMAXSH, + ARM_BUILTIN_WMAXSB, + ARM_BUILTIN_WMAXUW, + ARM_BUILTIN_WMAXUH, + ARM_BUILTIN_WMAXUB, + ARM_BUILTIN_WMINSW, + ARM_BUILTIN_WMINSH, + ARM_BUILTIN_WMINSB, + ARM_BUILTIN_WMINUW, + ARM_BUILTIN_WMINUH, + ARM_BUILTIN_WMINUB, + + ARM_BUILTIN_WMULUM, + ARM_BUILTIN_WMULSM, + ARM_BUILTIN_WMULUL, + + ARM_BUILTIN_PSADBH, + ARM_BUILTIN_WSHUFH, + + ARM_BUILTIN_WSLLH, + ARM_BUILTIN_WSLLW, + ARM_BUILTIN_WSLLD, + ARM_BUILTIN_WSRAH, + ARM_BUILTIN_WSRAW, + ARM_BUILTIN_WSRAD, + ARM_BUILTIN_WSRLH, + ARM_BUILTIN_WSRLW, + ARM_BUILTIN_WSRLD, + ARM_BUILTIN_WRORH, + ARM_BUILTIN_WRORW, + ARM_BUILTIN_WRORD, + ARM_BUILTIN_WSLLHI, + ARM_BUILTIN_WSLLWI, + ARM_BUILTIN_WSLLDI, + ARM_BUILTIN_WSRAHI, + ARM_BUILTIN_WSRAWI, + ARM_BUILTIN_WSRADI, + ARM_BUILTIN_WSRLHI, + ARM_BUILTIN_WSRLWI, + ARM_BUILTIN_WSRLDI, + ARM_BUILTIN_WRORHI, + ARM_BUILTIN_WRORWI, + ARM_BUILTIN_WRORDI, + + ARM_BUILTIN_WUNPCKIHB, + ARM_BUILTIN_WUNPCKIHH, + ARM_BUILTIN_WUNPCKIHW, + ARM_BUILTIN_WUNPCKILB, + ARM_BUILTIN_WUNPCKILH, + ARM_BUILTIN_WUNPCKILW, + + ARM_BUILTIN_WUNPCKEHSB, + ARM_BUILTIN_WUNPCKEHSH, + ARM_BUILTIN_WUNPCKEHSW, + ARM_BUILTIN_WUNPCKEHUB, + ARM_BUILTIN_WUNPCKEHUH, + ARM_BUILTIN_WUNPCKEHUW, + ARM_BUILTIN_WUNPCKELSB, + ARM_BUILTIN_WUNPCKELSH, + ARM_BUILTIN_WUNPCKELSW, + ARM_BUILTIN_WUNPCKELUB, + ARM_BUILTIN_WUNPCKELUH, + ARM_BUILTIN_WUNPCKELUW, + + ARM_BUILTIN_WABSB, + ARM_BUILTIN_WABSH, + ARM_BUILTIN_WABSW, + + ARM_BUILTIN_WADDSUBHX, + ARM_BUILTIN_WSUBADDHX, + + ARM_BUILTIN_WABSDIFFB, + ARM_BUILTIN_WABSDIFFH, + ARM_BUILTIN_WABSDIFFW, + + ARM_BUILTIN_WADDCH, + ARM_BUILTIN_WADDCW, + + ARM_BUILTIN_WAVG4, + ARM_BUILTIN_WAVG4R, + + ARM_BUILTIN_WMADDSX, + ARM_BUILTIN_WMADDUX, + + ARM_BUILTIN_WMADDSN, + ARM_BUILTIN_WMADDUN, + + ARM_BUILTIN_WMULWSM, + ARM_BUILTIN_WMULWUM, + + ARM_BUILTIN_WMULWSMR, + ARM_BUILTIN_WMULWUMR, + + ARM_BUILTIN_WMULWL, + + ARM_BUILTIN_WMULSMR, + ARM_BUILTIN_WMULUMR, + + ARM_BUILTIN_WQMULM, + ARM_BUILTIN_WQMULMR, + + ARM_BUILTIN_WQMULWM, + ARM_BUILTIN_WQMULWMR, + + ARM_BUILTIN_WADDBHUSM, + ARM_BUILTIN_WADDBHUSL, + + ARM_BUILTIN_WQMIABB, + ARM_BUILTIN_WQMIABT, + ARM_BUILTIN_WQMIATB, + ARM_BUILTIN_WQMIATT, + + ARM_BUILTIN_WQMIABBN, + ARM_BUILTIN_WQMIABTN, + ARM_BUILTIN_WQMIATBN, + ARM_BUILTIN_WQMIATTN, + + ARM_BUILTIN_WMIABB, + ARM_BUILTIN_WMIABT, + ARM_BUILTIN_WMIATB, + ARM_BUILTIN_WMIATT, + + ARM_BUILTIN_WMIABBN, + ARM_BUILTIN_WMIABTN, + ARM_BUILTIN_WMIATBN, + ARM_BUILTIN_WMIATTN, + + ARM_BUILTIN_WMIAWBB, + ARM_BUILTIN_WMIAWBT, + ARM_BUILTIN_WMIAWTB, + ARM_BUILTIN_WMIAWTT, + + ARM_BUILTIN_WMIAWBBN, + ARM_BUILTIN_WMIAWBTN, + ARM_BUILTIN_WMIAWTBN, + ARM_BUILTIN_WMIAWTTN, + + ARM_BUILTIN_WMERGE, + + ARM_BUILTIN_CRC32B, + ARM_BUILTIN_CRC32H, + ARM_BUILTIN_CRC32W, + ARM_BUILTIN_CRC32CB, + ARM_BUILTIN_CRC32CH, + ARM_BUILTIN_CRC32CW, + +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + +#define CRYPTO1(L, U, M1, M2) \ + ARM_BUILTIN_CRYPTO_##U, +#define CRYPTO2(L, U, M1, M2, M3) \ + ARM_BUILTIN_CRYPTO_##U, +#define CRYPTO3(L, U, M1, M2, M3, M4) \ + ARM_BUILTIN_CRYPTO_##U, + +#include "crypto.def" + +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + +#include "arm_neon_builtins.def" + + ,ARM_BUILTIN_MAX +}; + +#define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) + +#undef CF +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 + +static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX]; + +#define NUM_DREG_TYPES 5 +#define NUM_QREG_TYPES 6 + +static void +arm_init_neon_builtins (void) +{ + unsigned int i, fcode; + tree decl; + + tree neon_intQI_type_node; + tree neon_intHI_type_node; + tree neon_floatHF_type_node; + tree neon_polyQI_type_node; + tree neon_polyHI_type_node; + tree neon_intSI_type_node; + tree neon_intDI_type_node; + tree neon_intUTI_type_node; + tree neon_float_type_node; + + tree intQI_pointer_node; + tree intHI_pointer_node; + tree intSI_pointer_node; + tree intDI_pointer_node; + tree float_pointer_node; + + tree const_intQI_node; + tree const_intHI_node; + tree const_intSI_node; + tree const_intDI_node; + tree const_float_node; + + tree const_intQI_pointer_node; + tree const_intHI_pointer_node; + tree const_intSI_pointer_node; + tree const_intDI_pointer_node; + tree const_float_pointer_node; + + tree V8QI_type_node; + tree V4HI_type_node; + tree V4HF_type_node; + tree V2SI_type_node; + tree V2SF_type_node; + tree V16QI_type_node; + tree V8HI_type_node; + tree V4SI_type_node; + tree V4SF_type_node; + tree V2DI_type_node; + + tree intUQI_type_node; + tree intUHI_type_node; + tree intUSI_type_node; + tree intUDI_type_node; + + tree intEI_type_node; + tree intOI_type_node; + tree intCI_type_node; + tree intXI_type_node; + + tree V8QI_pointer_node; + tree V4HI_pointer_node; + tree V2SI_pointer_node; + tree V2SF_pointer_node; + tree V16QI_pointer_node; + tree V8HI_pointer_node; + tree V4SI_pointer_node; + tree V4SF_pointer_node; + tree V2DI_pointer_node; + + tree void_ftype_pv8qi_v8qi_v8qi; + tree void_ftype_pv4hi_v4hi_v4hi; + tree void_ftype_pv2si_v2si_v2si; + tree void_ftype_pv2sf_v2sf_v2sf; + tree void_ftype_pdi_di_di; + tree void_ftype_pv16qi_v16qi_v16qi; + tree void_ftype_pv8hi_v8hi_v8hi; + tree void_ftype_pv4si_v4si_v4si; + tree void_ftype_pv4sf_v4sf_v4sf; + tree void_ftype_pv2di_v2di_v2di; + + tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; + tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; + tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; + + /* Create distinguished type nodes for NEON vector element types, + and pointers to values of such types, so we can detect them later. */ + neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); + neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); + neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode)); + neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode)); + neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode)); + neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode)); + neon_float_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; + layout_type (neon_float_type_node); + neon_floatHF_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode); + layout_type (neon_floatHF_type_node); + + /* Define typedefs which exactly correspond to the modes we are basing vector + types on. If you change these names you'll need to change + the table used by arm_mangle_type too. */ + (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node, + "__builtin_neon_qi"); + (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, + "__builtin_neon_hi"); + (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node, + "__builtin_neon_hf"); + (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, + "__builtin_neon_si"); + (*lang_hooks.types.register_builtin_type) (neon_float_type_node, + "__builtin_neon_sf"); + (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node, + "__builtin_neon_di"); + (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node, + "__builtin_neon_poly8"); + (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node, + "__builtin_neon_poly16"); + + intQI_pointer_node = build_pointer_type (neon_intQI_type_node); + intHI_pointer_node = build_pointer_type (neon_intHI_type_node); + intSI_pointer_node = build_pointer_type (neon_intSI_type_node); + intDI_pointer_node = build_pointer_type (neon_intDI_type_node); + float_pointer_node = build_pointer_type (neon_float_type_node); + + /* Next create constant-qualified versions of the above types. */ + const_intQI_node = build_qualified_type (neon_intQI_type_node, + TYPE_QUAL_CONST); + const_intHI_node = build_qualified_type (neon_intHI_type_node, + TYPE_QUAL_CONST); + const_intSI_node = build_qualified_type (neon_intSI_type_node, + TYPE_QUAL_CONST); + const_intDI_node = build_qualified_type (neon_intDI_type_node, + TYPE_QUAL_CONST); + const_float_node = build_qualified_type (neon_float_type_node, + TYPE_QUAL_CONST); + + const_intQI_pointer_node = build_pointer_type (const_intQI_node); + const_intHI_pointer_node = build_pointer_type (const_intHI_node); + const_intSI_pointer_node = build_pointer_type (const_intSI_node); + const_intDI_pointer_node = build_pointer_type (const_intDI_node); + const_float_pointer_node = build_pointer_type (const_float_node); + + /* Now create vector types based on our NEON element types. */ + /* 64-bit vectors. */ + V8QI_type_node = + build_vector_type_for_mode (neon_intQI_type_node, V8QImode); + V4HI_type_node = + build_vector_type_for_mode (neon_intHI_type_node, V4HImode); + V4HF_type_node = + build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); + V2SI_type_node = + build_vector_type_for_mode (neon_intSI_type_node, V2SImode); + V2SF_type_node = + build_vector_type_for_mode (neon_float_type_node, V2SFmode); + /* 128-bit vectors. */ + V16QI_type_node = + build_vector_type_for_mode (neon_intQI_type_node, V16QImode); + V8HI_type_node = + build_vector_type_for_mode (neon_intHI_type_node, V8HImode); + V4SI_type_node = + build_vector_type_for_mode (neon_intSI_type_node, V4SImode); + V4SF_type_node = + build_vector_type_for_mode (neon_float_type_node, V4SFmode); + V2DI_type_node = + build_vector_type_for_mode (neon_intDI_type_node, V2DImode); + + /* Unsigned integer types for various mode sizes. */ + intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); + intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); + intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); + intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); + neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); + + + (*lang_hooks.types.register_builtin_type) (intUQI_type_node, + "__builtin_neon_uqi"); + (*lang_hooks.types.register_builtin_type) (intUHI_type_node, + "__builtin_neon_uhi"); + (*lang_hooks.types.register_builtin_type) (intUSI_type_node, + "__builtin_neon_usi"); + (*lang_hooks.types.register_builtin_type) (intUDI_type_node, + "__builtin_neon_udi"); + (*lang_hooks.types.register_builtin_type) (intUDI_type_node, + "__builtin_neon_poly64"); + (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node, + "__builtin_neon_poly128"); + + /* Opaque integer types for structures of vectors. */ + intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode)); + intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode)); + intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode)); + intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode)); + + (*lang_hooks.types.register_builtin_type) (intTI_type_node, + "__builtin_neon_ti"); + (*lang_hooks.types.register_builtin_type) (intEI_type_node, + "__builtin_neon_ei"); + (*lang_hooks.types.register_builtin_type) (intOI_type_node, + "__builtin_neon_oi"); + (*lang_hooks.types.register_builtin_type) (intCI_type_node, + "__builtin_neon_ci"); + (*lang_hooks.types.register_builtin_type) (intXI_type_node, + "__builtin_neon_xi"); + + /* Pointers to vector types. */ + V8QI_pointer_node = build_pointer_type (V8QI_type_node); + V4HI_pointer_node = build_pointer_type (V4HI_type_node); + V2SI_pointer_node = build_pointer_type (V2SI_type_node); + V2SF_pointer_node = build_pointer_type (V2SF_type_node); + V16QI_pointer_node = build_pointer_type (V16QI_type_node); + V8HI_pointer_node = build_pointer_type (V8HI_type_node); + V4SI_pointer_node = build_pointer_type (V4SI_type_node); + V4SF_pointer_node = build_pointer_type (V4SF_type_node); + V2DI_pointer_node = build_pointer_type (V2DI_type_node); + + /* Operations which return results as pairs. */ + void_ftype_pv8qi_v8qi_v8qi = + build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, + V8QI_type_node, NULL); + void_ftype_pv4hi_v4hi_v4hi = + build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, + V4HI_type_node, NULL); + void_ftype_pv2si_v2si_v2si = + build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, + V2SI_type_node, NULL); + void_ftype_pv2sf_v2sf_v2sf = + build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, + V2SF_type_node, NULL); + void_ftype_pdi_di_di = + build_function_type_list (void_type_node, intDI_pointer_node, + neon_intDI_type_node, neon_intDI_type_node, NULL); + void_ftype_pv16qi_v16qi_v16qi = + build_function_type_list (void_type_node, V16QI_pointer_node, + V16QI_type_node, V16QI_type_node, NULL); + void_ftype_pv8hi_v8hi_v8hi = + build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, + V8HI_type_node, NULL); + void_ftype_pv4si_v4si_v4si = + build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, + V4SI_type_node, NULL); + void_ftype_pv4sf_v4sf_v4sf = + build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, + V4SF_type_node, NULL); + void_ftype_pv2di_v2di_v2di = + build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, + V2DI_type_node, NULL); + + if (TARGET_CRYPTO && TARGET_HARD_FLOAT) + { + tree V4USI_type_node = + build_vector_type_for_mode (intUSI_type_node, V4SImode); + + tree V16UQI_type_node = + build_vector_type_for_mode (intUQI_type_node, V16QImode); + + tree v16uqi_ftype_v16uqi + = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE); + + tree v16uqi_ftype_v16uqi_v16uqi + = build_function_type_list (V16UQI_type_node, V16UQI_type_node, + V16UQI_type_node, NULL_TREE); + + tree v4usi_ftype_v4usi + = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE); + + tree v4usi_ftype_v4usi_v4usi + = build_function_type_list (V4USI_type_node, V4USI_type_node, + V4USI_type_node, NULL_TREE); + + tree v4usi_ftype_v4usi_v4usi_v4usi + = build_function_type_list (V4USI_type_node, V4USI_type_node, + V4USI_type_node, V4USI_type_node, NULL_TREE); + + tree uti_ftype_udi_udi + = build_function_type_list (neon_intUTI_type_node, intUDI_type_node, + intUDI_type_node, NULL_TREE); + + #undef CRYPTO1 + #undef CRYPTO2 + #undef CRYPTO3 + #undef C + #undef N + #undef CF + #undef FT1 + #undef FT2 + #undef FT3 + + #define C(U) \ + ARM_BUILTIN_CRYPTO_##U + #define N(L) \ + "__builtin_arm_crypto_"#L + #define FT1(R, A) \ + R##_ftype_##A + #define FT2(R, A1, A2) \ + R##_ftype_##A1##_##A2 + #define FT3(R, A1, A2, A3) \ + R##_ftype_##A1##_##A2##_##A3 + #define CRYPTO1(L, U, R, A) \ + arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \ + C (U), BUILT_IN_MD, \ + NULL, NULL_TREE); + #define CRYPTO2(L, U, R, A1, A2) \ + arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \ + C (U), BUILT_IN_MD, \ + NULL, NULL_TREE); + + #define CRYPTO3(L, U, R, A1, A2, A3) \ + arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \ + C (U), BUILT_IN_MD, \ + NULL, NULL_TREE); + #include "crypto.def" + + #undef CRYPTO1 + #undef CRYPTO2 + #undef CRYPTO3 + #undef C + #undef N + #undef FT1 + #undef FT2 + #undef FT3 + } + dreg_types[0] = V8QI_type_node; + dreg_types[1] = V4HI_type_node; + dreg_types[2] = V2SI_type_node; + dreg_types[3] = V2SF_type_node; + dreg_types[4] = neon_intDI_type_node; + + qreg_types[0] = V16QI_type_node; + qreg_types[1] = V8HI_type_node; + qreg_types[2] = V4SI_type_node; + qreg_types[3] = V4SF_type_node; + qreg_types[4] = V2DI_type_node; + qreg_types[5] = neon_intUTI_type_node; + + for (i = 0; i < NUM_QREG_TYPES; i++) + { + int j; + for (j = 0; j < NUM_QREG_TYPES; j++) + { + if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES) + reinterp_ftype_dreg[i][j] + = build_function_type_list (dreg_types[i], dreg_types[j], NULL); + + reinterp_ftype_qreg[i][j] + = build_function_type_list (qreg_types[i], qreg_types[j], NULL); + } + } + + for (i = 0, fcode = ARM_BUILTIN_NEON_BASE; + i < ARRAY_SIZE (neon_builtin_data); + i++, fcode++) + { + neon_builtin_datum *d = &neon_builtin_data[i]; + + const char* const modenames[] = { + "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di", + "v16qi", "v8hi", "v4si", "v4sf", "v2di", + "ti", "ei", "oi" + }; + char namebuf[60]; + tree ftype = NULL; + int is_load = 0, is_store = 0; + + gcc_assert (ARRAY_SIZE (modenames) == T_MAX); + + d->fcode = fcode; + + switch (d->itype) + { + case NEON_LOAD1: + case NEON_LOAD1LANE: + case NEON_LOADSTRUCT: + case NEON_LOADSTRUCTLANE: + is_load = 1; + /* Fall through. */ + case NEON_STORE1: + case NEON_STORE1LANE: + case NEON_STORESTRUCT: + case NEON_STORESTRUCTLANE: + if (!is_load) + is_store = 1; + /* Fall through. */ + case NEON_UNOP: + case NEON_RINT: + case NEON_BINOP: + case NEON_LOGICBINOP: + case NEON_SHIFTINSERT: + case NEON_TERNOP: + case NEON_GETLANE: + case NEON_SETLANE: + case NEON_CREATE: + case NEON_DUP: + case NEON_DUPLANE: + case NEON_SHIFTIMM: + case NEON_SHIFTACC: + case NEON_COMBINE: + case NEON_SPLIT: + case NEON_CONVERT: + case NEON_FIXCONV: + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: + case NEON_LANEMAC: + case NEON_SCALARMUL: + case NEON_SCALARMULL: + case NEON_SCALARMULH: + case NEON_SCALARMAC: + case NEON_SELECT: + case NEON_VTBL: + case NEON_VTBX: + { + int k; + tree return_type = void_type_node, args = void_list_node; + + /* Build a function type directly from the insn_data for + this builtin. The build_function_type() function takes + care of removing duplicates for us. */ + for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--) + { + tree eltype; + + if (is_load && k == 1) + { + /* Neon load patterns always have the memory + operand in the operand 1 position. */ + gcc_assert (insn_data[d->code].operand[k].predicate + == neon_struct_operand); + + switch (d->mode) + { + case T_V8QI: + case T_V16QI: + eltype = const_intQI_pointer_node; + break; + + case T_V4HI: + case T_V8HI: + eltype = const_intHI_pointer_node; + break; + + case T_V2SI: + case T_V4SI: + eltype = const_intSI_pointer_node; + break; + + case T_V2SF: + case T_V4SF: + eltype = const_float_pointer_node; + break; + + case T_DI: + case T_V2DI: + eltype = const_intDI_pointer_node; + break; + + default: gcc_unreachable (); + } + } + else if (is_store && k == 0) + { + /* Similarly, Neon store patterns use operand 0 as + the memory location to store to. */ + gcc_assert (insn_data[d->code].operand[k].predicate + == neon_struct_operand); + + switch (d->mode) + { + case T_V8QI: + case T_V16QI: + eltype = intQI_pointer_node; + break; + + case T_V4HI: + case T_V8HI: + eltype = intHI_pointer_node; + break; + + case T_V2SI: + case T_V4SI: + eltype = intSI_pointer_node; + break; + + case T_V2SF: + case T_V4SF: + eltype = float_pointer_node; + break; + + case T_DI: + case T_V2DI: + eltype = intDI_pointer_node; + break; + + default: gcc_unreachable (); + } + } + else + { + switch (insn_data[d->code].operand[k].mode) + { + case VOIDmode: eltype = void_type_node; break; + /* Scalars. */ + case QImode: eltype = neon_intQI_type_node; break; + case HImode: eltype = neon_intHI_type_node; break; + case SImode: eltype = neon_intSI_type_node; break; + case SFmode: eltype = neon_float_type_node; break; + case DImode: eltype = neon_intDI_type_node; break; + case TImode: eltype = intTI_type_node; break; + case EImode: eltype = intEI_type_node; break; + case OImode: eltype = intOI_type_node; break; + case CImode: eltype = intCI_type_node; break; + case XImode: eltype = intXI_type_node; break; + /* 64-bit vectors. */ + case V8QImode: eltype = V8QI_type_node; break; + case V4HImode: eltype = V4HI_type_node; break; + case V2SImode: eltype = V2SI_type_node; break; + case V2SFmode: eltype = V2SF_type_node; break; + /* 128-bit vectors. */ + case V16QImode: eltype = V16QI_type_node; break; + case V8HImode: eltype = V8HI_type_node; break; + case V4SImode: eltype = V4SI_type_node; break; + case V4SFmode: eltype = V4SF_type_node; break; + case V2DImode: eltype = V2DI_type_node; break; + default: gcc_unreachable (); + } + } + + if (k == 0 && !is_store) + return_type = eltype; + else + args = tree_cons (NULL_TREE, eltype, args); + } + + ftype = build_function_type (return_type, args); + } + break; + + case NEON_RESULTPAIR: + { + switch (insn_data[d->code].operand[1].mode) + { + case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; + case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; + case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; + case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; + case DImode: ftype = void_ftype_pdi_di_di; break; + case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; + case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; + case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; + case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; + case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; + default: gcc_unreachable (); + } + } + break; + + case NEON_REINTERP: + { + /* We iterate over NUM_DREG_TYPES doubleword types, + then NUM_QREG_TYPES quadword types. + V4HF is not a type used in reinterpret, so we translate + d->mode to the correct index in reinterp_ftype_dreg. */ + bool qreg_p + = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8; + int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0)) + % NUM_QREG_TYPES; + switch (insn_data[d->code].operand[0].mode) + { + case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; + case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break; + case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break; + case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break; + case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break; + case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break; + case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break; + case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break; + case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break; + case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break; + case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break; + default: gcc_unreachable (); + } + } + break; + case NEON_FLOAT_WIDEN: + { + tree eltype = NULL_TREE; + tree return_type = NULL_TREE; + + switch (insn_data[d->code].operand[1].mode) + { + case V4HFmode: + eltype = V4HF_type_node; + return_type = V4SF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } + case NEON_FLOAT_NARROW: + { + tree eltype = NULL_TREE; + tree return_type = NULL_TREE; + + switch (insn_data[d->code].operand[1].mode) + { + case V4SFmode: + eltype = V4SF_type_node; + return_type = V4HF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } + default: + gcc_unreachable (); + } + + gcc_assert (ftype != NULL); + + sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]); + + decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, + NULL_TREE); + arm_builtin_decls[fcode] = decl; + } +} + +#undef NUM_DREG_TYPES +#undef NUM_QREG_TYPES + +#define def_mbuiltin(MASK, NAME, TYPE, CODE) \ + do \ + { \ + if ((MASK) & insn_flags) \ + { \ + tree bdecl; \ + bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \ + BUILT_IN_MD, NULL, NULL_TREE); \ + arm_builtin_decls[CODE] = bdecl; \ + } \ + } \ + while (0) + +struct builtin_description +{ + const unsigned int mask; + const enum insn_code icode; + const char * const name; + const enum arm_builtins code; + const enum rtx_code comparison; + const unsigned int flag; +}; + +static const struct builtin_description bdesc_2arg[] = +{ +#define IWMMXT_BUILTIN(code, string, builtin) \ + { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ + ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + +#define IWMMXT2_BUILTIN(code, string, builtin) \ + { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \ + ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + + IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) + IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) + IWMMXT_BUILTIN (addv2si3, "waddw", WADDW) + IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB) + IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH) + IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW) + IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB) + IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH) + IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW) + IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB) + IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH) + IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW) + IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB) + IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH) + IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW) + IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB) + IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH) + IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW) + IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL) + IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM) + IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM) + IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB) + IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH) + IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW) + IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB) + IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH) + IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW) + IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB) + IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH) + IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW) + IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB) + IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB) + IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH) + IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH) + IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW) + IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW) + IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB) + IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB) + IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH) + IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH) + IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW) + IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW) + IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND) + IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN) + IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR) + IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR) + IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B) + IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H) + IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR) + IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR) + IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB) + IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH) + IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW) + IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB) + IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH) + IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW) + IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX) + IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX) + IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB) + IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH) + IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW) + IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4) + IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R) + IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM) + IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM) + IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR) + IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR) + IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL) + IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR) + IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR) + IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM) + IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR) + IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM) + IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR) + IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0) + IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1) + IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2) + IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3) + +#define IWMMXT_BUILTIN2(code, builtin) \ + { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + +#define IWMMXT2_BUILTIN2(code, builtin) \ + { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, + + IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM) + IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL) + IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS) + IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS) + IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS) + IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS) + IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) + IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) + +#define CRC32_BUILTIN(L, U) \ + {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ + UNKNOWN, 0}, + CRC32_BUILTIN (crc32b, CRC32B) + CRC32_BUILTIN (crc32h, CRC32H) + CRC32_BUILTIN (crc32w, CRC32W) + CRC32_BUILTIN (crc32cb, CRC32CB) + CRC32_BUILTIN (crc32ch, CRC32CH) + CRC32_BUILTIN (crc32cw, CRC32CW) +#undef CRC32_BUILTIN + + +#define CRYPTO_BUILTIN(L, U) \ + {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \ + UNKNOWN, 0}, +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 +#define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U) +#define CRYPTO1(L, U, R, A) +#define CRYPTO3(L, U, R, A1, A2, A3) +#include "crypto.def" +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + +}; + +static const struct builtin_description bdesc_1arg[] = +{ + IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB) + IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH) + IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW) + IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB) + IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH) + IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW) + IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB) + IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH) + IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW) + IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB) + IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH) + IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW) + IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB) + IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH) + IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW) + IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB) + IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH) + IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW) + IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB) + IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH) + IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW) + IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB) + IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH) + IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW) + +#define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U) +#define CRYPTO2(L, U, R, A1, A2) +#define CRYPTO3(L, U, R, A1, A2, A3) +#include "crypto.def" +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 +}; + +static const struct builtin_description bdesc_3arg[] = +{ +#define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U) +#define CRYPTO1(L, U, R, A) +#define CRYPTO2(L, U, R, A1, A2) +#include "crypto.def" +#undef CRYPTO1 +#undef CRYPTO2 +#undef CRYPTO3 + }; +#undef CRYPTO_BUILTIN + +/* Set up all the iWMMXt builtins. This is not called if + TARGET_IWMMXT is zero. */ + +static void +arm_init_iwmmxt_builtins (void) +{ + const struct builtin_description * d; + size_t i; + + tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); + tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); + tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode); + + tree v8qi_ftype_v8qi_v8qi_int + = build_function_type_list (V8QI_type_node, + V8QI_type_node, V8QI_type_node, + integer_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_int + = build_function_type_list (V4HI_type_node, + V4HI_type_node, integer_type_node, NULL_TREE); + tree v2si_ftype_v2si_int + = build_function_type_list (V2SI_type_node, + V2SI_type_node, integer_type_node, NULL_TREE); + tree v2si_ftype_di_di + = build_function_type_list (V2SI_type_node, + long_long_integer_type_node, + long_long_integer_type_node, + NULL_TREE); + tree di_ftype_di_int + = build_function_type_list (long_long_integer_type_node, + long_long_integer_type_node, + integer_type_node, NULL_TREE); + tree di_ftype_di_int_int + = build_function_type_list (long_long_integer_type_node, + long_long_integer_type_node, + integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_v8qi + = build_function_type_list (integer_type_node, + V8QI_type_node, NULL_TREE); + tree int_ftype_v4hi + = build_function_type_list (integer_type_node, + V4HI_type_node, NULL_TREE); + tree int_ftype_v2si + = build_function_type_list (integer_type_node, + V2SI_type_node, NULL_TREE); + tree int_ftype_v8qi_int + = build_function_type_list (integer_type_node, + V8QI_type_node, integer_type_node, NULL_TREE); + tree int_ftype_v4hi_int + = build_function_type_list (integer_type_node, + V4HI_type_node, integer_type_node, NULL_TREE); + tree int_ftype_v2si_int + = build_function_type_list (integer_type_node, + V2SI_type_node, integer_type_node, NULL_TREE); + tree v8qi_ftype_v8qi_int_int + = build_function_type_list (V8QI_type_node, + V8QI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_int_int + = build_function_type_list (V4HI_type_node, + V4HI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v2si_ftype_v2si_int_int + = build_function_type_list (V2SI_type_node, + V2SI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + /* Miscellaneous. */ + tree v8qi_ftype_v4hi_v4hi + = build_function_type_list (V8QI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree v4hi_ftype_v2si_v2si + = build_function_type_list (V4HI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); + tree v8qi_ftype_v4hi_v8qi + = build_function_type_list (V8QI_type_node, + V4HI_type_node, V8QI_type_node, NULL_TREE); + tree v2si_ftype_v4hi_v4hi + = build_function_type_list (V2SI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree v2si_ftype_v8qi_v8qi + = build_function_type_list (V2SI_type_node, + V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_di + = build_function_type_list (V4HI_type_node, + V4HI_type_node, long_long_integer_type_node, + NULL_TREE); + tree v2si_ftype_v2si_di + = build_function_type_list (V2SI_type_node, + V2SI_type_node, long_long_integer_type_node, + NULL_TREE); + tree di_ftype_void + = build_function_type_list (long_long_unsigned_type_node, NULL_TREE); + tree int_ftype_void + = build_function_type_list (integer_type_node, NULL_TREE); + tree di_ftype_v8qi + = build_function_type_list (long_long_integer_type_node, + V8QI_type_node, NULL_TREE); + tree di_ftype_v4hi + = build_function_type_list (long_long_integer_type_node, + V4HI_type_node, NULL_TREE); + tree di_ftype_v2si + = build_function_type_list (long_long_integer_type_node, + V2SI_type_node, NULL_TREE); + tree v2si_ftype_v4hi + = build_function_type_list (V2SI_type_node, + V4HI_type_node, NULL_TREE); + tree v4hi_ftype_v8qi + = build_function_type_list (V4HI_type_node, + V8QI_type_node, NULL_TREE); + tree v8qi_ftype_v8qi + = build_function_type_list (V8QI_type_node, + V8QI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi + = build_function_type_list (V4HI_type_node, + V4HI_type_node, NULL_TREE); + tree v2si_ftype_v2si + = build_function_type_list (V2SI_type_node, + V2SI_type_node, NULL_TREE); + + tree di_ftype_di_v4hi_v4hi + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + V4HI_type_node, V4HI_type_node, + NULL_TREE); + + tree di_ftype_v4hi_v4hi + = build_function_type_list (long_long_unsigned_type_node, + V4HI_type_node,V4HI_type_node, + NULL_TREE); + + tree v2si_ftype_v2si_v4hi_v4hi + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V4HI_type_node, + V4HI_type_node, NULL_TREE); + + tree v2si_ftype_v2si_v8qi_v8qi + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V8QI_type_node, + V8QI_type_node, NULL_TREE); + + tree di_ftype_di_v2si_v2si + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + V2SI_type_node, V2SI_type_node, + NULL_TREE); + + tree di_ftype_di_di_int + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + integer_type_node, NULL_TREE); + + tree void_ftype_int + = build_function_type_list (void_type_node, + integer_type_node, NULL_TREE); + + tree v8qi_ftype_char + = build_function_type_list (V8QI_type_node, + signed_char_type_node, NULL_TREE); + + tree v4hi_ftype_short + = build_function_type_list (V4HI_type_node, + short_integer_type_node, NULL_TREE); + + tree v2si_ftype_int + = build_function_type_list (V2SI_type_node, + integer_type_node, NULL_TREE); + + /* Normal vector binops. */ + tree v8qi_ftype_v8qi_v8qi + = build_function_type_list (V8QI_type_node, + V8QI_type_node, V8QI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_v4hi + = build_function_type_list (V4HI_type_node, + V4HI_type_node,V4HI_type_node, NULL_TREE); + tree v2si_ftype_v2si_v2si + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); + tree di_ftype_di_di + = build_function_type_list (long_long_unsigned_type_node, + long_long_unsigned_type_node, + long_long_unsigned_type_node, + NULL_TREE); + + /* Add all builtins that are more or less simple operations on two + operands. */ + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + { + /* Use one of the operands; the target can have a different mode for + mask-generating compares. */ + enum machine_mode mode; + tree type; + + if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2)) + continue; + + mode = insn_data[d->icode].operand[1].mode; + + switch (mode) + { + case V8QImode: + type = v8qi_ftype_v8qi_v8qi; + break; + case V4HImode: + type = v4hi_ftype_v4hi_v4hi; + break; + case V2SImode: + type = v2si_ftype_v2si_v2si; + break; + case DImode: + type = di_ftype_di_di; + break; + + default: + gcc_unreachable (); + } + + def_mbuiltin (d->mask, d->name, type, d->code); + } + + /* Add the remaining MMX insns with somewhat more complicated types. */ +#define iwmmx_mbuiltin(NAME, TYPE, CODE) \ + def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \ + ARM_BUILTIN_ ## CODE) + +#define iwmmx2_mbuiltin(NAME, TYPE, CODE) \ + def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \ + ARM_BUILTIN_ ## CODE) + + iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO); + iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0); + iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1); + iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2); + iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3); + iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0); + iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1); + iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2); + iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3); + + iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH); + iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW); + iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD); + iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI); + iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI); + iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI); + + iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH); + iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW); + iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD); + iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI); + iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI); + iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI); + + iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH); + iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW); + iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD); + iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI); + iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI); + iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI); + + iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH); + iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW); + iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD); + iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI); + iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI); + iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI); + + iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH); + + iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB); + iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH); + iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS); + iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX); + iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN); + iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU); + iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX); + iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN); + iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ); + iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ); + + iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB); + iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH); + iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW); + iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB); + iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH); + iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW); + iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB); + iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH); + iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW); + + iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB); + iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH); + iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW); + + iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB); + iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH); + iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW); + + iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM); + iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL); + + iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS); + iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS); + iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS); + iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS); + iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS); + iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS); + + iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB); + iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH); + iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW); + iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB); + iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH); + iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW); + iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB); + iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH); + iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW); + iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB); + iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH); + iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW); + + iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS); + iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ); + iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU); + iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ); + + iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI); + iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA); + iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH); + iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB); + iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT); + iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB); + iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT); + + iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB); + iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH); + iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW); + + iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB); + iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT); + iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB); + iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT); + + iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN); + iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN); + iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN); + iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN); + + iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB); + iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT); + iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB); + iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT); + + iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN); + iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN); + iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN); + iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN); + + iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB); + iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT); + iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB); + iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT); + + iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN); + iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN); + iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN); + iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN); + + iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE); + + iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB); + iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH); + iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW); + +#undef iwmmx_mbuiltin +#undef iwmmx2_mbuiltin +} + +static void +arm_init_fp16_builtins (void) +{ + tree fp16_type = make_node (REAL_TYPE); + TYPE_PRECISION (fp16_type) = 16; + layout_type (fp16_type); + (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); +} + +static void +arm_init_crc32_builtins () +{ + tree si_ftype_si_qi + = build_function_type_list (unsigned_intSI_type_node, + unsigned_intSI_type_node, + unsigned_intQI_type_node, NULL_TREE); + tree si_ftype_si_hi + = build_function_type_list (unsigned_intSI_type_node, + unsigned_intSI_type_node, + unsigned_intHI_type_node, NULL_TREE); + tree si_ftype_si_si + = build_function_type_list (unsigned_intSI_type_node, + unsigned_intSI_type_node, + unsigned_intSI_type_node, NULL_TREE); + + arm_builtin_decls[ARM_BUILTIN_CRC32B] + = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi, + ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32H] + = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi, + ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32W] + = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si, + ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32CB] + = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi, + ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32CH] + = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi, + ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_CRC32CW] + = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si, + ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE); +} + +static void +arm_init_builtins (void) +{ + if (TARGET_REALLY_IWMMXT) + arm_init_iwmmxt_builtins (); + + if (TARGET_NEON) + arm_init_neon_builtins (); + + if (arm_fp16_format) + arm_init_fp16_builtins (); + + if (TARGET_CRC32) + arm_init_crc32_builtins (); +} + +/* Return the ARM builtin for CODE. */ + +static tree +arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= ARM_BUILTIN_MAX) + return error_mark_node; + + return arm_builtin_decls[code]; +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_parameter_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("function parameters cannot have __fp16 type"); + return NULL; +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_return_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("functions cannot return __fp16 type"); + return NULL; +} + +/* Implement TARGET_PROMOTED_TYPE. */ + +static tree +arm_promoted_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return float_type_node; + return NULL_TREE; +} + +/* Implement TARGET_CONVERT_TO_TYPE. + Specifically, this hook implements the peculiarity of the ARM + half-precision floating-point C semantics that requires conversions between + __fp16 to or from double to do an intermediate conversion to float. */ + +static tree +arm_convert_to_type (tree type, tree expr) +{ + tree fromtype = TREE_TYPE (expr); + if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) + return NULL_TREE; + if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) + || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) + return convert (type, convert (float_type_node, expr)); + return NULL_TREE; +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. + This simply adds HFmode as a supported mode; even though we don't + implement arithmetic on this type directly, it's supported by + optabs conversions, much the way the double-word arithmetic is + special-cased in the default hook. */ + +static bool +arm_scalar_mode_supported_p (enum machine_mode mode) +{ + if (mode == HFmode) + return (arm_fp16_format != ARM_FP16_FORMAT_NONE); + else if (ALL_FIXED_POINT_MODE_P (mode)) + return true; + else + return default_scalar_mode_supported_p (mode); +} + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ + +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x != const0_rtx) + return x; + x = gen_reg_rtx (mode); + + emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x + : gen_rtx_SUBREG (DImode, x, 0))); + return x; +} + +/* Function to expand ternary builtins. */ +static rtx +arm_expand_ternop_builtin (enum insn_code icode, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + rtx op2 = expand_normal (arg2); + rtx op3 = NULL_RTX; + + /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select + lane operand depending on endianness. */ + bool builtin_sha1cpm_p = false; + + if (insn_data[icode].n_operands == 5) + { + gcc_assert (icode == CODE_FOR_crypto_sha1c + || icode == CODE_FOR_crypto_sha1p + || icode == CODE_FOR_crypto_sha1m); + builtin_sha1cpm_p = true; + } + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + enum machine_mode mode2 = insn_data[icode].operand[3].mode; + + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + if (VECTOR_MODE_P (mode2)) + op2 = safe_vector_operand (op2, mode2); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) + && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) + && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + if (builtin_sha1cpm_p) + op3 = GEN_INT (TARGET_BIG_END ? 1 : 0); + + if (builtin_sha1cpm_p) + pat = GEN_FCN (icode) (target, op0, op1, op2, op3); + else + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of arm_expand_builtin to take care of binop insns. */ + +static rtx +arm_expand_binop_builtin (enum insn_code icode, + tree exp, rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) + && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Subroutine of arm_expand_builtin to take care of unop insns. */ + +static rtx +arm_expand_unop_builtin (enum insn_code icode, + tree exp, rtx target, int do_load) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + rtx op1 = NULL_RTX; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + bool builtin_sha1h_p = false; + + if (insn_data[icode].n_operands == 3) + { + gcc_assert (icode == CODE_FOR_crypto_sha1h); + builtin_sha1h_p = true; + } + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + if (do_load) + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + else + { + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + } + if (builtin_sha1h_p) + op1 = GEN_INT (TARGET_BIG_END ? 1 : 0); + + if (builtin_sha1h_p) + pat = GEN_FCN (icode) (target, op0, op1); + else + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +typedef enum { + NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, + NEON_ARG_MEMORY, + NEON_ARG_STOP +} builtin_arg; + +#define NEON_MAX_BUILTIN_ARGS 5 + +/* EXP is a pointer argument to a Neon load or store intrinsic. Derive + and return an expression for the accessed memory. + + The intrinsic function operates on a block of registers that has + mode REG_MODE. This block contains vectors of type TYPE_MODE. The + function references the memory at EXP of type TYPE and in mode + MEM_MODE; this mode may be BLKmode if no more suitable mode is + available. */ + +static tree +neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode, + enum machine_mode reg_mode, + neon_builtin_type_mode type_mode) +{ + HOST_WIDE_INT reg_size, vector_size, nvectors, nelems; + tree elem_type, upper_bound, array_type; + + /* Work out the size of the register block in bytes. */ + reg_size = GET_MODE_SIZE (reg_mode); + + /* Work out the size of each vector in bytes. */ + gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG)); + vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8); + + /* Work out how many vectors there are. */ + gcc_assert (reg_size % vector_size == 0); + nvectors = reg_size / vector_size; + + /* Work out the type of each element. */ + gcc_assert (POINTER_TYPE_P (type)); + elem_type = TREE_TYPE (type); + + /* Work out how many elements are being loaded or stored. + MEM_MODE == REG_MODE implies a one-to-one mapping between register + and memory elements; anything else implies a lane load or store. */ + if (mem_mode == reg_mode) + nelems = vector_size * nvectors / int_size_in_bytes (elem_type); + else + nelems = nvectors; + + /* Create a type that describes the full access. */ + upper_bound = build_int_cst (size_type_node, nelems - 1); + array_type = build_array_type (elem_type, build_index_type (upper_bound)); + + /* Dereference EXP using that type. */ + return fold_build2 (MEM_REF, array_type, exp, + build_int_cst (build_pointer_type (array_type), 0)); +} + +/* Expand a Neon builtin. */ +static rtx +arm_expand_neon_args (rtx target, int icode, int have_retval, + neon_builtin_type_mode type_mode, + tree exp, int fcode, ...) +{ + va_list ap; + rtx pat; + tree arg[NEON_MAX_BUILTIN_ARGS]; + rtx op[NEON_MAX_BUILTIN_ARGS]; + tree arg_type; + tree formals; + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode[NEON_MAX_BUILTIN_ARGS]; + enum machine_mode other_mode; + int argc = 0; + int opno; + + if (have_retval + && (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode))) + target = gen_reg_rtx (tmode); + + va_start (ap, fcode); + + formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode])); + + for (;;) + { + builtin_arg thisarg = (builtin_arg) va_arg (ap, int); + + if (thisarg == NEON_ARG_STOP) + break; + else + { + opno = argc + have_retval; + mode[argc] = insn_data[icode].operand[opno].mode; + arg[argc] = CALL_EXPR_ARG (exp, argc); + arg_type = TREE_VALUE (formals); + if (thisarg == NEON_ARG_MEMORY) + { + other_mode = insn_data[icode].operand[1 - opno].mode; + arg[argc] = neon_dereference_pointer (arg[argc], arg_type, + mode[argc], other_mode, + type_mode); + } + + /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P + be returned. */ + op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode, + (thisarg == NEON_ARG_MEMORY + ? EXPAND_MEMORY : EXPAND_NORMAL)); + + switch (thisarg) + { + case NEON_ARG_COPY_TO_REG: + /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/ + if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + op[argc] = copy_to_mode_reg (mode[argc], op[argc]); + break; + + case NEON_ARG_CONSTANT: + /* FIXME: This error message is somewhat unhelpful. */ + if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + error ("argument must be a constant"); + break; + + case NEON_ARG_MEMORY: + /* Check if expand failed. */ + if (op[argc] == const0_rtx) + return 0; + gcc_assert (MEM_P (op[argc])); + PUT_MODE (op[argc], mode[argc]); + /* ??? arm_neon.h uses the same built-in functions for signed + and unsigned accesses, casting where necessary. This isn't + alias safe. */ + set_mem_alias_set (op[argc], 0); + if (!(*insn_data[icode].operand[opno].predicate) + (op[argc], mode[argc])) + op[argc] = (replace_equiv_address + (op[argc], force_reg (Pmode, XEXP (op[argc], 0)))); + break; + + case NEON_ARG_STOP: + gcc_unreachable (); + } + + argc++; + formals = TREE_CHAIN (formals); + } + } + + va_end (ap); + + if (have_retval) + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (target, op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + else + switch (argc) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + + case 5: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); + break; + + default: + gcc_unreachable (); + } + + if (!pat) + return 0; + + emit_insn (pat); + + return target; +} + +/* Expand a Neon builtin. These are "special" because they don't have symbolic + constants defined per-instruction or per instruction-variant. Instead, the + required info is looked up in the table neon_builtin_data. */ +static rtx +arm_expand_neon_builtin (int fcode, tree exp, rtx target) +{ + neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE]; + neon_itype itype = d->itype; + enum insn_code icode = d->code; + neon_builtin_type_mode type_mode = d->mode; + + switch (itype) + { + case NEON_UNOP: + case NEON_CONVERT: + case NEON_DUPLANE: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_BINOP: + case NEON_SETLANE: + case NEON_SCALARMUL: + case NEON_SCALARMULL: + case NEON_SCALARMULH: + case NEON_SHIFTINSERT: + case NEON_LOGICBINOP: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_TERNOP: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_GETLANE: + case NEON_FIXCONV: + case NEON_SHIFTIMM: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_CREATE: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_DUP: + case NEON_RINT: + case NEON_SPLIT: + case NEON_FLOAT_WIDEN: + case NEON_FLOAT_NARROW: + case NEON_REINTERP: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_COMBINE: + case NEON_VTBL: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_RESULTPAIR: + return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_LANEMAC: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SHIFTACC: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SCALARMAC: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_CONSTANT, NEON_ARG_STOP); + + case NEON_SELECT: + case NEON_VTBX: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, + NEON_ARG_STOP); + + case NEON_LOAD1: + case NEON_LOADSTRUCT: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_MEMORY, NEON_ARG_STOP); + + case NEON_LOAD1LANE: + case NEON_LOADSTRUCTLANE: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + + case NEON_STORE1: + case NEON_STORESTRUCT: + return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + + case NEON_STORE1LANE: + case NEON_STORESTRUCTLANE: + return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, + NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, + NEON_ARG_STOP); + } + + gcc_unreachable (); +} + +/* Emit code to reinterpret one Neon type as another, without altering bits. */ +void +neon_reinterpret (rtx dest, rtx src) +{ + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); +} + +/* Emit code to place a Neon pair result in memory locations (with equal + registers). */ +void +neon_emit_pair_result_insn (enum machine_mode mode, + rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, + rtx op1, rtx op2) +{ + rtx mem = gen_rtx_MEM (mode, destaddr); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + + emit_insn (intfn (tmp1, op1, op2, tmp2)); + + emit_move_insn (mem, tmp1); + mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); + emit_move_insn (mem, tmp2); +} + +/* Set up OPERANDS for a register copy from SRC to DEST, taking care + not to early-clobber SRC registers in the process. + + We assume that the operands described by SRC and DEST represent a + decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the + number of components into which the copy has been decomposed. */ +void +neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count) +{ + unsigned int i; + + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + || REGNO (operands[0]) < REGNO (operands[1])) + { + for (i = 0; i < count; i++) + { + operands[2 * i] = dest[i]; + operands[2 * i + 1] = src[i]; + } + } + else + { + for (i = 0; i < count; i++) + { + operands[2 * i] = dest[count - i - 1]; + operands[2 * i + 1] = src[count - i - 1]; + } + } +} + +/* Split operands into moves from op[1] + op[2] into op[0]. */ + +void +neon_split_vcombine (rtx operands[3]) +{ + unsigned int dest = REGNO (operands[0]); + unsigned int src1 = REGNO (operands[1]); + unsigned int src2 = REGNO (operands[2]); + enum machine_mode halfmode = GET_MODE (operands[1]); + unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode); + rtx destlo, desthi; + + if (src1 == dest && src2 == dest + halfregs) + { + /* No-op move. Can't split to nothing; emit something. */ + emit_note (NOTE_INSN_DELETED); + return; + } + + /* Preserve register attributes for variable tracking. */ + destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0); + desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs, + GET_MODE_SIZE (halfmode)); + + /* Special case of reversed high/low parts. Use VSWP. */ + if (src2 == dest && src1 == dest + halfregs) + { + rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]); + rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y))); + return; + } + + if (!reg_overlap_mentioned_p (operands[2], destlo)) + { + /* Try to avoid unnecessary moves if part of the result + is in the right place already. */ + if (src1 != dest) + emit_move_insn (destlo, operands[1]); + if (src2 != dest + halfregs) + emit_move_insn (desthi, operands[2]); + } + else + { + if (src2 != dest + halfregs) + emit_move_insn (desthi, operands[2]); + if (src1 != dest) + emit_move_insn (destlo, operands[1]); + } +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +arm_expand_builtin (tree exp, + rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + const struct builtin_description * d; + enum insn_code icode; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0; + tree arg1; + tree arg2; + rtx op0; + rtx op1; + rtx op2; + rtx pat; + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + size_t i; + enum machine_mode tmode; + enum machine_mode mode0; + enum machine_mode mode1; + enum machine_mode mode2; + int opint; + int selector; + int mask; + int imm; + + if (fcode >= ARM_BUILTIN_NEON_BASE) + return arm_expand_neon_builtin (fcode, exp, target); + + switch (fcode) + { + case ARM_BUILTIN_TEXTRMSB: + case ARM_BUILTIN_TEXTRMUB: + case ARM_BUILTIN_TEXTRMSH: + case ARM_BUILTIN_TEXTRMUH: + case ARM_BUILTIN_TEXTRMSW: + case ARM_BUILTIN_TEXTRMUW: + icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb + : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub + : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh + : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh + : CODE_FOR_iwmmxt_textrmw); + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + { + /* @@@ better error message */ + error ("selector must be an immediate"); + return gen_reg_rtx (tmode); + } + + opint = INTVAL (op1); + if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB) + { + if (opint > 7 || opint < 0) + error ("the range of selector should be in 0 to 7"); + } + else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH) + { + if (opint > 3 || opint < 0) + error ("the range of selector should be in 0 to 3"); + } + else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */ + { + if (opint > 1 || opint < 0) + error ("the range of selector should be in 0 to 1"); + } + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_WALIGNI: + /* If op2 is immediate, call walighi, else call walighr. */ + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + if (CONST_INT_P (op2)) + { + icode = CODE_FOR_iwmmxt_waligni; + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2)); + selector = INTVAL (op2); + if (selector > 7 || selector < 0) + error ("the range of selector should be in 0 to 7"); + } + else + { + icode = CODE_FOR_iwmmxt_walignr; + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (!(*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + } + if (target == 0 + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (!pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_TINSRB: + case ARM_BUILTIN_TINSRH: + case ARM_BUILTIN_TINSRW: + case ARM_BUILTIN_WMERGE: + icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb + : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh + : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge + : CODE_FOR_iwmmxt_tinsrw); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + { + error ("selector must be an immediate"); + return const0_rtx; + } + if (icode == CODE_FOR_iwmmxt_wmerge) + { + selector = INTVAL (op2); + if (selector > 7 || selector < 0) + error ("the range of selector should be in 0 to 7"); + } + if ((icode == CODE_FOR_iwmmxt_tinsrb) + || (icode == CODE_FOR_iwmmxt_tinsrh) + || (icode == CODE_FOR_iwmmxt_tinsrw)) + { + mask = 0x01; + selector= INTVAL (op2); + if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7)) + error ("the range of selector should be in 0 to 7"); + else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3)) + error ("the range of selector should be in 0 to 3"); + else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1)) + error ("the range of selector should be in 0 to 1"); + mask <<= selector; + op2 = GEN_INT (mask); + } + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_SETWCGR0: + case ARM_BUILTIN_SETWCGR1: + case ARM_BUILTIN_SETWCGR2: + case ARM_BUILTIN_SETWCGR3: + icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0 + : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1 + : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2 + : CODE_FOR_iwmmxt_setwcgr3); + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + mode0 = insn_data[icode].operand[0].mode; + if (!(*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + pat = GEN_FCN (icode) (op0); + if (!pat) + return 0; + emit_insn (pat); + return 0; + + case ARM_BUILTIN_GETWCGR0: + case ARM_BUILTIN_GETWCGR1: + case ARM_BUILTIN_GETWCGR2: + case ARM_BUILTIN_GETWCGR3: + icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0 + : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1 + : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2 + : CODE_FOR_iwmmxt_getwcgr3); + tmode = insn_data[icode].operand[0].mode; + if (target == 0 + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target); + if (!pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_WSHUFH: + icode = CODE_FOR_iwmmxt_wshufh; + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + tmode = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + op0 = copy_to_mode_reg (mode1, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) + { + error ("mask must be an immediate"); + return const0_rtx; + } + selector = INTVAL (op1); + if (selector < 0 || selector > 255) + error ("the range of mask should be in 0 to 255"); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_WMADDS: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target); + case ARM_BUILTIN_WMADDSX: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target); + case ARM_BUILTIN_WMADDSN: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target); + case ARM_BUILTIN_WMADDU: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target); + case ARM_BUILTIN_WMADDUX: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target); + case ARM_BUILTIN_WMADDUN: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target); + case ARM_BUILTIN_WSADBZ: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target); + case ARM_BUILTIN_WSADHZ: + return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target); + + /* Several three-argument builtins. */ + case ARM_BUILTIN_WMACS: + case ARM_BUILTIN_WMACU: + case ARM_BUILTIN_TMIA: + case ARM_BUILTIN_TMIAPH: + case ARM_BUILTIN_TMIATT: + case ARM_BUILTIN_TMIATB: + case ARM_BUILTIN_TMIABT: + case ARM_BUILTIN_TMIABB: + case ARM_BUILTIN_WQMIABB: + case ARM_BUILTIN_WQMIABT: + case ARM_BUILTIN_WQMIATB: + case ARM_BUILTIN_WQMIATT: + case ARM_BUILTIN_WQMIABBN: + case ARM_BUILTIN_WQMIABTN: + case ARM_BUILTIN_WQMIATBN: + case ARM_BUILTIN_WQMIATTN: + case ARM_BUILTIN_WMIABB: + case ARM_BUILTIN_WMIABT: + case ARM_BUILTIN_WMIATB: + case ARM_BUILTIN_WMIATT: + case ARM_BUILTIN_WMIABBN: + case ARM_BUILTIN_WMIABTN: + case ARM_BUILTIN_WMIATBN: + case ARM_BUILTIN_WMIATTN: + case ARM_BUILTIN_WMIAWBB: + case ARM_BUILTIN_WMIAWBT: + case ARM_BUILTIN_WMIAWTB: + case ARM_BUILTIN_WMIAWTT: + case ARM_BUILTIN_WMIAWBBN: + case ARM_BUILTIN_WMIAWBTN: + case ARM_BUILTIN_WMIAWTBN: + case ARM_BUILTIN_WMIAWTTN: + case ARM_BUILTIN_WSADB: + case ARM_BUILTIN_WSADH: + icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs + : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu + : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia + : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph + : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb + : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt + : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb + : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt + : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb + : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt + : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb + : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt + : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn + : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn + : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn + : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn + : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb + : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt + : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb + : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt + : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn + : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn + : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn + : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn + : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb + : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt + : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb + : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt + : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn + : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn + : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn + : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn + : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb + : CODE_FOR_iwmmxt_wsadh); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case ARM_BUILTIN_WZERO: + target = gen_reg_rtx (DImode); + emit_insn (gen_iwmmxt_clrdi (target)); + return target; + + case ARM_BUILTIN_WSRLHI: + case ARM_BUILTIN_WSRLWI: + case ARM_BUILTIN_WSRLDI: + case ARM_BUILTIN_WSLLHI: + case ARM_BUILTIN_WSLLWI: + case ARM_BUILTIN_WSLLDI: + case ARM_BUILTIN_WSRAHI: + case ARM_BUILTIN_WSRAWI: + case ARM_BUILTIN_WSRADI: + case ARM_BUILTIN_WRORHI: + case ARM_BUILTIN_WRORWI: + case ARM_BUILTIN_WRORDI: + case ARM_BUILTIN_WSRLH: + case ARM_BUILTIN_WSRLW: + case ARM_BUILTIN_WSRLD: + case ARM_BUILTIN_WSLLH: + case ARM_BUILTIN_WSLLW: + case ARM_BUILTIN_WSLLD: + case ARM_BUILTIN_WSRAH: + case ARM_BUILTIN_WSRAW: + case ARM_BUILTIN_WSRAD: + case ARM_BUILTIN_WRORH: + case ARM_BUILTIN_WRORW: + case ARM_BUILTIN_WRORD: + icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt + : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt + : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt + : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt + : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt + : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt + : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt + : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt + : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt + : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3 + : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3 + : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3 + : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di + : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di + : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di + : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di + : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di + : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di + : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di + : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di + : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di + : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di + : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di + : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di + : CODE_FOR_nothing); + arg1 = CALL_EXPR_ARG (exp, 1); + op1 = expand_normal (arg1); + if (GET_MODE (op1) == VOIDmode) + { + imm = INTVAL (op1); + if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI + || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW) + && (imm < 0 || imm > 32)) + { + if (fcode == ARM_BUILTIN_WRORHI) + error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code."); + else if (fcode == ARM_BUILTIN_WRORWI) + error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code."); + else if (fcode == ARM_BUILTIN_WRORH) + error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code."); + else + error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code."); + } + else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD) + && (imm < 0 || imm > 64)) + { + if (fcode == ARM_BUILTIN_WRORDI) + error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code."); + else + error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code."); + } + else if (imm < 0) + { + if (fcode == ARM_BUILTIN_WSRLHI) + error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code."); + else if (fcode == ARM_BUILTIN_WSRLWI) + error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code."); + else if (fcode == ARM_BUILTIN_WSRLDI) + error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code."); + else if (fcode == ARM_BUILTIN_WSLLHI) + error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code."); + else if (fcode == ARM_BUILTIN_WSLLWI) + error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code."); + else if (fcode == ARM_BUILTIN_WSLLDI) + error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code."); + else if (fcode == ARM_BUILTIN_WSRAHI) + error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code."); + else if (fcode == ARM_BUILTIN_WSRAWI) + error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code."); + else if (fcode == ARM_BUILTIN_WSRADI) + error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code."); + else if (fcode == ARM_BUILTIN_WSRLH) + error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code."); + else if (fcode == ARM_BUILTIN_WSRLW) + error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code."); + else if (fcode == ARM_BUILTIN_WSRLD) + error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code."); + else if (fcode == ARM_BUILTIN_WSLLH) + error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code."); + else if (fcode == ARM_BUILTIN_WSLLW) + error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code."); + else if (fcode == ARM_BUILTIN_WSLLD) + error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code."); + else if (fcode == ARM_BUILTIN_WSRAH) + error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code."); + else if (fcode == ARM_BUILTIN_WSRAW) + error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code."); + else + error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code."); + } + } + return arm_expand_binop_builtin (icode, exp, target); + + default: + break; + } + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == (const enum arm_builtins) fcode) + return arm_expand_binop_builtin (d->icode, exp, target); + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == (const enum arm_builtins) fcode) + return arm_expand_unop_builtin (d->icode, exp, target, 0); + + for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++) + if (d->code == (const enum arm_builtins) fcode) + return arm_expand_ternop_builtin (d->icode, exp, target); + + /* @@@ Should really do something sensible here. */ + return NULL_RTX; +} + +/* Return the number (counting from 0) of + the least significant set bit in MASK. */ + +inline static int +number_of_first_bit_set (unsigned mask) +{ + return ctz_hwi (mask); +} + +/* Like emit_multi_reg_push, but allowing for a different set of + registers to be described as saved. MASK is the set of registers + to be saved; REAL_REGS is the set of registers to be described as + saved. If REAL_REGS is 0, only describe the stack adjustment. */ + +static rtx +thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs) +{ + unsigned long regno; + rtx par[10], tmp, reg, insn; + int i, j; + + /* Build the parallel of the registers actually being stored. */ + for (i = 0; mask; ++i, mask &= mask - 1) + { + regno = ctz_hwi (mask); + reg = gen_rtx_REG (SImode, regno); + + if (i == 0) + tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT); + else + tmp = gen_rtx_USE (VOIDmode, reg); + + par[i] = tmp; + } + + tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + tmp = gen_frame_mem (BLKmode, tmp); + tmp = gen_rtx_SET (VOIDmode, tmp, par[0]); + par[0] = tmp; + + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par)); + insn = emit_insn (tmp); + + /* Always build the stack adjustment note for unwind info. */ + tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i); + tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp); + par[0] = tmp; + + /* Build the parallel of the registers recorded as saved for unwind. */ + for (j = 0; real_regs; ++j, real_regs &= real_regs - 1) + { + regno = ctz_hwi (real_regs); + reg = gen_rtx_REG (SImode, regno); + + tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4); + tmp = gen_frame_mem (SImode, tmp); + tmp = gen_rtx_SET (VOIDmode, tmp, reg); + RTX_FRAME_RELATED_P (tmp) = 1; + par[j + 1] = tmp; + } + + if (j == 0) + tmp = par[0]; + else + { + RTX_FRAME_RELATED_P (par[0]) = 1; + tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par)); + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp); + + return insn; +} + +/* Emit code to push or pop registers to or from the stack. F is the + assembly file. MASK is the registers to pop. */ +static void +thumb_pop (FILE *f, unsigned long mask) +{ + int regno; + int lo_mask = mask & 0xFF; + int pushed_words = 0; + + gcc_assert (mask); + + if (lo_mask == 0 && (mask & (1 << PC_REGNUM))) + { + /* Special case. Do not generate a POP PC statement here, do it in + thumb_exit() */ + thumb_exit (f, -1); + return; + } + + fprintf (f, "\tpop\t{"); + + /* Look at the low registers first. */ + for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1) + { + if (lo_mask & 1) + { + asm_fprintf (f, "%r", regno); + + if ((lo_mask & ~1) != 0) + fprintf (f, ", "); + + pushed_words++; + } + } + + if (mask & (1 << PC_REGNUM)) + { + /* Catch popping the PC. */ + if (TARGET_INTERWORK || TARGET_BACKTRACE + || crtl->calls_eh_return) + { + /* The PC is never poped directly, instead + it is popped into r3 and then BX is used. */ + fprintf (f, "}\n"); + + thumb_exit (f, -1); + + return; + } + else + { + if (mask & 0xFF) + fprintf (f, ", "); + + asm_fprintf (f, "%r", PC_REGNUM); + } + } + + fprintf (f, "}\n"); +} + +/* Generate code to return from a thumb function. + If 'reg_containing_return_addr' is -1, then the return address is + actually on the stack, at the stack pointer. */ +static void +thumb_exit (FILE *f, int reg_containing_return_addr) +{ + unsigned regs_available_for_popping; + unsigned regs_to_pop; + int pops_needed; + unsigned available; + unsigned required; + int mode; + int size; + int restore_a4 = FALSE; + + /* Compute the registers we need to pop. */ + regs_to_pop = 0; + pops_needed = 0; + + if (reg_containing_return_addr == -1) + { + regs_to_pop |= 1 << LR_REGNUM; + ++pops_needed; + } + + if (TARGET_BACKTRACE) + { + /* Restore the (ARM) frame pointer and stack pointer. */ + regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM); + pops_needed += 2; + } + + /* If there is nothing to pop then just emit the BX instruction and + return. */ + if (pops_needed == 0) + { + if (crtl->calls_eh_return) + asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); + + asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); + return; + } + /* Otherwise if we are not supporting interworking and we have not created + a backtrace structure and the function was not entered in ARM mode then + just pop the return address straight into the PC. */ + else if (!TARGET_INTERWORK + && !TARGET_BACKTRACE + && !is_called_in_ARM_mode (current_function_decl) + && !crtl->calls_eh_return) + { + asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM); + return; + } + + /* Find out how many of the (return) argument registers we can corrupt. */ + regs_available_for_popping = 0; + + /* If returning via __builtin_eh_return, the bottom three registers + all contain information needed for the return. */ + if (crtl->calls_eh_return) + size = 12; + else + { + /* If we can deduce the registers used from the function's + return value. This is more reliable that examining + df_regs_ever_live_p () because that will be set if the register is + ever used in the function, not just if the register is used + to hold a return value. */ + + if (crtl->return_rtx != 0) + mode = GET_MODE (crtl->return_rtx); + else + mode = DECL_MODE (DECL_RESULT (current_function_decl)); + + size = GET_MODE_SIZE (mode); + + if (size == 0) + { + /* In a void function we can use any argument register. + In a function that returns a structure on the stack + we can use the second and third argument registers. */ + if (mode == VOIDmode) + regs_available_for_popping = + (1 << ARG_REGISTER (1)) + | (1 << ARG_REGISTER (2)) + | (1 << ARG_REGISTER (3)); + else + regs_available_for_popping = + (1 << ARG_REGISTER (2)) + | (1 << ARG_REGISTER (3)); + } + else if (size <= 4) + regs_available_for_popping = + (1 << ARG_REGISTER (2)) + | (1 << ARG_REGISTER (3)); + else if (size <= 8) + regs_available_for_popping = + (1 << ARG_REGISTER (3)); + } + + /* Match registers to be popped with registers into which we pop them. */ + for (available = regs_available_for_popping, + required = regs_to_pop; + required != 0 && available != 0; + available &= ~(available & - available), + required &= ~(required & - required)) + -- pops_needed; + + /* If we have any popping registers left over, remove them. */ + if (available > 0) + regs_available_for_popping &= ~available; + + /* Otherwise if we need another popping register we can use + the fourth argument register. */ + else if (pops_needed) + { + /* If we have not found any free argument registers and + reg a4 contains the return address, we must move it. */ + if (regs_available_for_popping == 0 + && reg_containing_return_addr == LAST_ARG_REGNUM) + { + asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM); + reg_containing_return_addr = LR_REGNUM; + } + else if (size > 12) + { + /* Register a4 is being used to hold part of the return value, + but we have dire need of a free, low register. */ + restore_a4 = TRUE; + + asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM); + } + + if (reg_containing_return_addr != LAST_ARG_REGNUM) + { + /* The fourth argument register is available. */ + regs_available_for_popping |= 1 << LAST_ARG_REGNUM; + + --pops_needed; + } + } + + /* Pop as many registers as we can. */ + thumb_pop (f, regs_available_for_popping); + + /* Process the registers we popped. */ + if (reg_containing_return_addr == -1) + { + /* The return address was popped into the lowest numbered register. */ + regs_to_pop &= ~(1 << LR_REGNUM); + + reg_containing_return_addr = + number_of_first_bit_set (regs_available_for_popping); + + /* Remove this register for the mask of available registers, so that + the return address will not be corrupted by further pops. */ + regs_available_for_popping &= ~(1 << reg_containing_return_addr); + } + + /* If we popped other registers then handle them here. */ + if (regs_available_for_popping) + { + int frame_pointer; + + /* Work out which register currently contains the frame pointer. */ + frame_pointer = number_of_first_bit_set (regs_available_for_popping); + + /* Move it into the correct place. */ + asm_fprintf (f, "\tmov\t%r, %r\n", + ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer); + + /* (Temporarily) remove it from the mask of popped registers. */ + regs_available_for_popping &= ~(1 << frame_pointer); + regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM); + + if (regs_available_for_popping) + { + int stack_pointer; + + /* We popped the stack pointer as well, + find the register that contains it. */ + stack_pointer = number_of_first_bit_set (regs_available_for_popping); + + /* Move it into the stack register. */ + asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer); + + /* At this point we have popped all necessary registers, so + do not worry about restoring regs_available_for_popping + to its correct value: + + assert (pops_needed == 0) + assert (regs_available_for_popping == (1 << frame_pointer)) + assert (regs_to_pop == (1 << STACK_POINTER)) */ + } + else + { + /* Since we have just move the popped value into the frame + pointer, the popping register is available for reuse, and + we know that we still have the stack pointer left to pop. */ + regs_available_for_popping |= (1 << frame_pointer); + } + } + + /* If we still have registers left on the stack, but we no longer have + any registers into which we can pop them, then we must move the return + address into the link register and make available the register that + contained it. */ + if (regs_available_for_popping == 0 && pops_needed > 0) + { + regs_available_for_popping |= 1 << reg_containing_return_addr; + + asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, + reg_containing_return_addr); + + reg_containing_return_addr = LR_REGNUM; + } + + /* If we have registers left on the stack then pop some more. + We know that at most we will want to pop FP and SP. */ + if (pops_needed > 0) + { + int popped_into; + int move_to; + + thumb_pop (f, regs_available_for_popping); + + /* We have popped either FP or SP. + Move whichever one it is into the correct register. */ + popped_into = number_of_first_bit_set (regs_available_for_popping); + move_to = number_of_first_bit_set (regs_to_pop); + + asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into); + + regs_to_pop &= ~(1 << move_to); + + --pops_needed; + } + + /* If we still have not popped everything then we must have only + had one register available to us and we are now popping the SP. */ + if (pops_needed > 0) + { + int popped_into; + + thumb_pop (f, regs_available_for_popping); + + popped_into = number_of_first_bit_set (regs_available_for_popping); + + asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into); + /* + assert (regs_to_pop == (1 << STACK_POINTER)) + assert (pops_needed == 1) + */ + } + + /* If necessary restore the a4 register. */ + if (restore_a4) + { + if (reg_containing_return_addr != LR_REGNUM) + { + asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM); + reg_containing_return_addr = LR_REGNUM; + } + + asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM); + } + + if (crtl->calls_eh_return) + asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); + + /* Return to caller. */ + asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); +} + +/* Scan INSN just before assembler is output for it. + For Thumb-1, we track the status of the condition codes; this + information is used in the cbranchsi4_insn pattern. */ +void +thumb1_final_prescan_insn (rtx insn) +{ + if (flag_print_asm_name) + asm_fprintf (asm_out_file, "%@ 0x%04x\n", + INSN_ADDRESSES (INSN_UID (insn))); + /* Don't overwrite the previous setter when we get to a cbranch. */ + if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) + { + enum attr_conds conds; + + if (cfun->machine->thumb1_cc_insn) + { + if (modified_in_p (cfun->machine->thumb1_cc_op0, insn) + || modified_in_p (cfun->machine->thumb1_cc_op1, insn)) + CC_STATUS_INIT; + } + conds = get_attr_conds (insn); + if (conds == CONDS_SET) + { + rtx set = single_set (insn); + cfun->machine->thumb1_cc_insn = insn; + cfun->machine->thumb1_cc_op0 = SET_DEST (set); + cfun->machine->thumb1_cc_op1 = const0_rtx; + cfun->machine->thumb1_cc_mode = CC_NOOVmode; + if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn) + { + rtx src1 = XEXP (SET_SRC (set), 1); + if (src1 == const0_rtx) + cfun->machine->thumb1_cc_mode = CCmode; + } + else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))) + { + /* Record the src register operand instead of dest because + cprop_hardreg pass propagates src. */ + cfun->machine->thumb1_cc_op0 = SET_SRC (set); + } + } + else if (conds != CONDS_NOCOND) + cfun->machine->thumb1_cc_insn = NULL_RTX; + } + + /* Check if unexpected far jump is used. */ + if (cfun->machine->lr_save_eliminated + && get_attr_far_jump (insn) == FAR_JUMP_YES) + internal_error("Unexpected thumb1 far jump"); +} + +int +thumb_shiftable_const (unsigned HOST_WIDE_INT val) +{ + unsigned HOST_WIDE_INT mask = 0xff; + int i; + + val = val & (unsigned HOST_WIDE_INT)0xffffffffu; + if (val == 0) /* XXX */ + return 0; + + for (i = 0; i < 25; i++) + if ((val & (mask << i)) == val) + return 1; + + return 0; +} + +/* Returns nonzero if the current function contains, + or might contain a far jump. */ +static int +thumb_far_jump_used_p (void) +{ + rtx insn; + bool far_jump = false; + unsigned int func_size = 0; + + /* This test is only important for leaf functions. */ + /* assert (!leaf_function_p ()); */ + + /* If we have already decided that far jumps may be used, + do not bother checking again, and always return true even if + it turns out that they are not being used. Once we have made + the decision that far jumps are present (and that hence the link + register will be pushed onto the stack) we cannot go back on it. */ + if (cfun->machine->far_jump_used) + return 1; + + /* If this function is not being called from the prologue/epilogue + generation code then it must be being called from the + INITIAL_ELIMINATION_OFFSET macro. */ + if (!(ARM_DOUBLEWORD_ALIGN || reload_completed)) + { + /* In this case we know that we are being asked about the elimination + of the arg pointer register. If that register is not being used, + then there are no arguments on the stack, and we do not have to + worry that a far jump might force the prologue to push the link + register, changing the stack offsets. In this case we can just + return false, since the presence of far jumps in the function will + not affect stack offsets. + + If the arg pointer is live (or if it was live, but has now been + eliminated and so set to dead) then we do have to test to see if + the function might contain a far jump. This test can lead to some + false negatives, since before reload is completed, then length of + branch instructions is not known, so gcc defaults to returning their + longest length, which in turn sets the far jump attribute to true. + + A false negative will not result in bad code being generated, but it + will result in a needless push and pop of the link register. We + hope that this does not occur too often. + + If we need doubleword stack alignment this could affect the other + elimination offsets so we can't risk getting it wrong. */ + if (df_regs_ever_live_p (ARG_POINTER_REGNUM)) + cfun->machine->arg_pointer_live = 1; + else if (!cfun->machine->arg_pointer_live) + return 0; + } + + /* We should not change far_jump_used during or after reload, as there is + no chance to change stack frame layout. */ + if (reload_in_progress || reload_completed) + return 0; + + /* Check to see if the function contains a branch + insn with the far jump attribute set. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES) + { + far_jump = true; + } + func_size += get_attr_length (insn); + } + + /* Attribute far_jump will always be true for thumb1 before + shorten_branch pass. So checking far_jump attribute before + shorten_branch isn't much useful. + + Following heuristic tries to estimate more accurately if a far jump + may finally be used. The heuristic is very conservative as there is + no chance to roll-back the decision of not to use far jump. + + Thumb1 long branch offset is -2048 to 2046. The worst case is each + 2-byte insn is associated with a 4 byte constant pool. Using + function size 2048/3 as the threshold is conservative enough. */ + if (far_jump) + { + if ((func_size * 3) >= 2048) + { + /* Record the fact that we have decided that + the function does use far jumps. */ + cfun->machine->far_jump_used = 1; + return 1; + } + } + + return 0; +} + +/* Return nonzero if FUNC must be entered in ARM mode. */ +int +is_called_in_ARM_mode (tree func) +{ + gcc_assert (TREE_CODE (func) == FUNCTION_DECL); + + /* Ignore the problem about functions whose address is taken. */ + if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func)) + return TRUE; + +#ifdef ARM_PE + return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE; +#else + return FALSE; +#endif +} + +/* Given the stack offsets and register mask in OFFSETS, decide how + many additional registers to push instead of subtracting a constant + from SP. For epilogues the principle is the same except we use pop. + FOR_PROLOGUE indicates which we're generating. */ +static int +thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue) +{ + HOST_WIDE_INT amount; + unsigned long live_regs_mask = offsets->saved_regs_mask; + /* Extract a mask of the ones we can give to the Thumb's push/pop + instruction. */ + unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff); + /* Then count how many other high registers will need to be pushed. */ + unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + int n_free, reg_base, size; + + if (!for_prologue && frame_pointer_needed) + amount = offsets->locals_base - offsets->saved_regs; + else + amount = offsets->outgoing_args - offsets->saved_regs; + + /* If the stack frame size is 512 exactly, we can save one load + instruction, which should make this a win even when optimizing + for speed. */ + if (!optimize_size && amount != 512) + return 0; + + /* Can't do this if there are high registers to push. */ + if (high_regs_pushed != 0) + return 0; + + /* Shouldn't do it in the prologue if no registers would normally + be pushed at all. In the epilogue, also allow it if we'll have + a pop insn for the PC. */ + if (l_mask == 0 + && (for_prologue + || TARGET_BACKTRACE + || (live_regs_mask & 1 << LR_REGNUM) == 0 + || TARGET_INTERWORK + || crtl->args.pretend_args_size != 0)) + return 0; + + /* Don't do this if thumb_expand_prologue wants to emit instructions + between the push and the stack frame allocation. */ + if (for_prologue + && ((flag_pic && arm_pic_register != INVALID_REGNUM) + || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))) + return 0; + + reg_base = 0; + n_free = 0; + if (!for_prologue) + { + size = arm_size_return_regs (); + reg_base = ARM_NUM_INTS (size); + live_regs_mask >>= reg_base; + } + + while (reg_base + n_free < 8 && !(live_regs_mask & 1) + && (for_prologue || call_used_regs[reg_base + n_free])) + { + live_regs_mask >>= 1; + n_free++; + } + + if (n_free == 0) + return 0; + gcc_assert (amount / 4 * 4 == amount); + + if (amount >= 512 && (amount - n_free * 4) < 512) + return (amount - 508) / 4; + if (amount <= n_free * 4) + return amount / 4; + return 0; +} + +/* The bits which aren't usefully expanded as rtl. */ +const char * +thumb1_unexpanded_epilogue (void) +{ + arm_stack_offsets *offsets; + int regno; + unsigned long live_regs_mask = 0; + int high_regs_pushed = 0; + int extra_pop; + int had_to_push_lr; + int size; + + if (cfun->machine->return_used_this_function != 0) + return ""; + + if (IS_NAKED (arm_current_func_type ())) + return ""; + + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + + /* If we can deduce the registers used from the function's return value. + This is more reliable that examining df_regs_ever_live_p () because that + will be set if the register is ever used in the function, not just if + the register is used to hold a return value. */ + size = arm_size_return_regs (); + + extra_pop = thumb1_extra_regs_pushed (offsets, false); + if (extra_pop > 0) + { + unsigned long extra_mask = (1 << extra_pop) - 1; + live_regs_mask |= extra_mask << ARM_NUM_INTS (size); + } + + /* The prolog may have pushed some high registers to use as + work registers. e.g. the testsuite file: + gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c + compiles to produce: + push {r4, r5, r6, r7, lr} + mov r7, r9 + mov r6, r8 + push {r6, r7} + as part of the prolog. We have to undo that pushing here. */ + + if (high_regs_pushed) + { + unsigned long mask = live_regs_mask & 0xff; + int next_hi_reg; + + /* The available low registers depend on the size of the value we are + returning. */ + if (size <= 12) + mask |= 1 << 3; + if (size <= 8) + mask |= 1 << 2; + + if (mask == 0) + /* Oh dear! We have no low registers into which we can pop + high registers! */ + internal_error + ("no low registers available for popping high registers"); + + for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++) + if (live_regs_mask & (1 << next_hi_reg)) + break; + + while (high_regs_pushed) + { + /* Find lo register(s) into which the high register(s) can + be popped. */ + for (regno = 0; regno <= LAST_LO_REGNUM; regno++) + { + if (mask & (1 << regno)) + high_regs_pushed--; + if (high_regs_pushed == 0) + break; + } + + mask &= (2 << regno) - 1; /* A noop if regno == 8 */ + + /* Pop the values into the low register(s). */ + thumb_pop (asm_out_file, mask); + + /* Move the value(s) into the high registers. */ + for (regno = 0; regno <= LAST_LO_REGNUM; regno++) + { + if (mask & (1 << regno)) + { + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg, + regno); + + for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++) + if (live_regs_mask & (1 << next_hi_reg)) + break; + } + } + } + live_regs_mask &= ~0x0f00; + } + + had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0; + live_regs_mask &= 0xff; + + if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE) + { + /* Pop the return address into the PC. */ + if (had_to_push_lr) + live_regs_mask |= 1 << PC_REGNUM; + + /* Either no argument registers were pushed or a backtrace + structure was created which includes an adjusted stack + pointer, so just pop everything. */ + if (live_regs_mask) + thumb_pop (asm_out_file, live_regs_mask); + + /* We have either just popped the return address into the + PC or it is was kept in LR for the entire function. + Note that thumb_pop has already called thumb_exit if the + PC was in the list. */ + if (!had_to_push_lr) + thumb_exit (asm_out_file, LR_REGNUM); + } + else + { + /* Pop everything but the return address. */ + if (live_regs_mask) + thumb_pop (asm_out_file, live_regs_mask); + + if (had_to_push_lr) + { + if (size > 12) + { + /* We have no free low regs, so save one. */ + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM, + LAST_ARG_REGNUM); + } + + /* Get the return address into a temporary register. */ + thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM); + + if (size > 12) + { + /* Move the return address to lr. */ + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM, + LAST_ARG_REGNUM); + /* Restore the low register. */ + asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, + IP_REGNUM); + regno = LR_REGNUM; + } + else + regno = LAST_ARG_REGNUM; + } + else + regno = LR_REGNUM; + + /* Remove the argument registers that were pushed onto the stack. */ + asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n", + SP_REGNUM, SP_REGNUM, + crtl->args.pretend_args_size); + + thumb_exit (asm_out_file, regno); + } + + return ""; +} + +/* Functions to save and restore machine-specific function data. */ +static struct machine_function * +arm_init_machine_status (void) +{ + struct machine_function *machine; + machine = ggc_alloc_cleared_machine_function (); + +#if ARM_FT_UNKNOWN != 0 + machine->func_type = ARM_FT_UNKNOWN; +#endif + return machine; +} + +/* Return an RTX indicating where the return address to the + calling function can be found. */ +rtx +arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) +{ + if (count != 0) + return NULL_RTX; + + return get_hard_reg_initial_val (Pmode, LR_REGNUM); +} + +/* Do anything needed before RTL is emitted for each function. */ +void +arm_init_expanders (void) +{ + /* Arrange to initialize and mark the machine per-function status. */ + init_machine_status = arm_init_machine_status; + + /* This is to stop the combine pass optimizing away the alignment + adjustment of va_arg. */ + /* ??? It is claimed that this should not be necessary. */ + if (cfun) + mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY); +} + + +/* Like arm_compute_initial_elimination offset. Simpler because there + isn't an ABI specified frame pointer for Thumb. Instead, we set it + to point at the base of the local variables after static stack + space for a function has been allocated. */ + +HOST_WIDE_INT +thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to) +{ + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + + switch (from) + { + case ARG_POINTER_REGNUM: + switch (to) + { + case STACK_POINTER_REGNUM: + return offsets->outgoing_args - offsets->saved_args; + + case FRAME_POINTER_REGNUM: + return offsets->soft_frame - offsets->saved_args; + + case ARM_HARD_FRAME_POINTER_REGNUM: + return offsets->saved_regs - offsets->saved_args; + + case THUMB_HARD_FRAME_POINTER_REGNUM: + return offsets->locals_base - offsets->saved_args; + + default: + gcc_unreachable (); + } + break; + + case FRAME_POINTER_REGNUM: + switch (to) + { + case STACK_POINTER_REGNUM: + return offsets->outgoing_args - offsets->soft_frame; + + case ARM_HARD_FRAME_POINTER_REGNUM: + return offsets->saved_regs - offsets->soft_frame; + + case THUMB_HARD_FRAME_POINTER_REGNUM: + return offsets->locals_base - offsets->soft_frame; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } +} + +/* Generate the function's prologue. */ + +void +thumb1_expand_prologue (void) +{ + rtx insn; + + HOST_WIDE_INT amount; + arm_stack_offsets *offsets; + unsigned long func_type; + int regno; + unsigned long live_regs_mask; + unsigned long l_mask; + unsigned high_regs_pushed = 0; + + func_type = arm_current_func_type (); + + /* Naked functions don't have prologues. */ + if (IS_NAKED (func_type)) + return; + + if (IS_INTERRUPT (func_type)) + { + error ("interrupt Service Routines cannot be coded in Thumb mode"); + return; + } + + if (is_called_in_ARM_mode (current_function_decl)) + emit_insn (gen_prologue_thumb1_interwork ()); + + offsets = arm_get_frame_offsets (); + live_regs_mask = offsets->saved_regs_mask; + + /* Extract a mask of the ones we can give to the Thumb's push instruction. */ + l_mask = live_regs_mask & 0x40ff; + /* Then count how many other high registers will need to be pushed. */ + high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + + if (crtl->args.pretend_args_size) + { + rtx x = GEN_INT (-crtl->args.pretend_args_size); + + if (cfun->machine->uses_anonymous_args) + { + int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size); + unsigned long mask; + + mask = 1ul << (LAST_ARG_REGNUM + 1); + mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes); + + insn = thumb1_emit_multi_reg_push (mask, 0); + } + else + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, x)); + } + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (TARGET_BACKTRACE) + { + HOST_WIDE_INT offset = 0; + unsigned work_register; + rtx work_reg, x, arm_hfp_rtx; + + /* We have been asked to create a stack backtrace structure. + The code looks like this: + + 0 .align 2 + 0 func: + 0 sub SP, #16 Reserve space for 4 registers. + 2 push {R7} Push low registers. + 4 add R7, SP, #20 Get the stack pointer before the push. + 6 str R7, [SP, #8] Store the stack pointer + (before reserving the space). + 8 mov R7, PC Get hold of the start of this code + 12. + 10 str R7, [SP, #16] Store it. + 12 mov R7, FP Get hold of the current frame pointer. + 14 str R7, [SP, #4] Store it. + 16 mov R7, LR Get hold of the current return address. + 18 str R7, [SP, #12] Store it. + 20 add R7, SP, #16 Point at the start of the + backtrace structure. + 22 mov FP, R7 Put this value into the frame pointer. */ + + work_register = thumb_find_work_register (live_regs_mask); + work_reg = gen_rtx_REG (SImode, work_register); + arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM); + + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, GEN_INT (-16))); + RTX_FRAME_RELATED_P (insn) = 1; + + if (l_mask) + { + insn = thumb1_emit_multi_reg_push (l_mask, l_mask); + RTX_FRAME_RELATED_P (insn) = 1; + + offset = bit_count (l_mask) * UNITS_PER_WORD; + } + + x = GEN_INT (offset + 16 + crtl->args.pretend_args_size); + emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x)); + + x = plus_constant (Pmode, stack_pointer_rtx, offset + 4); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + /* Make sure that the instruction fetching the PC is in the right place + to calculate "start of backtrace creation code + 12". */ + /* ??? The stores using the common WORK_REG ought to be enough to + prevent the scheduler from doing anything weird. Failing that + we could always move all of the following into an UNSPEC_VOLATILE. */ + if (l_mask) + { + x = gen_rtx_REG (SImode, PC_REGNUM); + emit_move_insn (work_reg, x); + + x = plus_constant (Pmode, stack_pointer_rtx, offset + 12); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + emit_move_insn (work_reg, arm_hfp_rtx); + + x = plus_constant (Pmode, stack_pointer_rtx, offset); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + } + else + { + emit_move_insn (work_reg, arm_hfp_rtx); + + x = plus_constant (Pmode, stack_pointer_rtx, offset); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + x = gen_rtx_REG (SImode, PC_REGNUM); + emit_move_insn (work_reg, x); + + x = plus_constant (Pmode, stack_pointer_rtx, offset + 12); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + } + + x = gen_rtx_REG (SImode, LR_REGNUM); + emit_move_insn (work_reg, x); + + x = plus_constant (Pmode, stack_pointer_rtx, offset + 8); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + x = GEN_INT (offset + 12); + emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x)); + + emit_move_insn (arm_hfp_rtx, work_reg); + } + /* Optimization: If we are not pushing any low registers but we are going + to push some high registers then delay our first push. This will just + be a push of LR and we can combine it with the push of the first high + register. */ + else if ((l_mask & 0xff) != 0 + || (high_regs_pushed == 0 && l_mask)) + { + unsigned long mask = l_mask; + mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1; + insn = thumb1_emit_multi_reg_push (mask, mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (high_regs_pushed) + { + unsigned pushable_regs; + unsigned next_hi_reg; + unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn + : crtl->args.info.nregs; + unsigned arg_regs_mask = (1 << arg_regs_num) - 1; + + for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--) + if (live_regs_mask & (1 << next_hi_reg)) + break; + + /* Here we need to mask out registers used for passing arguments + even if they can be pushed. This is to avoid using them to stash the high + registers. Such kind of stash may clobber the use of arguments. */ + pushable_regs = l_mask & (~arg_regs_mask) & 0xff; + + if (pushable_regs == 0) + pushable_regs = 1 << thumb_find_work_register (live_regs_mask); + + while (high_regs_pushed > 0) + { + unsigned long real_regs_mask = 0; + + for (regno = LAST_LO_REGNUM; regno >= 0; regno --) + { + if (pushable_regs & (1 << regno)) + { + emit_move_insn (gen_rtx_REG (SImode, regno), + gen_rtx_REG (SImode, next_hi_reg)); + + high_regs_pushed --; + real_regs_mask |= (1 << next_hi_reg); + + if (high_regs_pushed) + { + for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM; + next_hi_reg --) + if (live_regs_mask & (1 << next_hi_reg)) + break; + } + else + { + pushable_regs &= ~((1 << regno) - 1); + break; + } + } + } + + /* If we had to find a work register and we have not yet + saved the LR then add it to the list of regs to push. */ + if (l_mask == (1 << LR_REGNUM)) + { + pushable_regs |= l_mask; + real_regs_mask |= l_mask; + l_mask = 0; + } + + insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + /* Load the pic register before setting the frame pointer, + so we can use r7 as a temporary work register. */ + if (flag_pic && arm_pic_register != INVALID_REGNUM) + arm_load_pic_register (live_regs_mask); + + if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) + emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), + stack_pointer_rtx); + + if (flag_stack_usage_info) + current_function_static_stack_size + = offsets->outgoing_args - offsets->saved_args; + + amount = offsets->outgoing_args - offsets->saved_regs; + amount -= 4 * thumb1_extra_regs_pushed (offsets, true); + if (amount) + { + if (amount < 512) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (- amount))); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + rtx reg, dwarf; + + /* The stack decrement is too big for an immediate value in a single + insn. In theory we could issue multiple subtracts, but after + three of them it becomes more space efficient to place the full + value in the constant pool and load into a register. (Also the + ARM debugger really likes to see only one stack decrement per + function). So instead we look for a scratch register into which + we can load the decrement, and then we subtract this from the + stack pointer. Unfortunately on the thumb the only available + scratch registers are the argument registers, and we cannot use + these as they may hold arguments to the function. Instead we + attempt to locate a call preserved register which is used by this + function. If we can find one, then we know that it will have + been pushed at the start of the prologue and so we can corrupt + it now. */ + for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++) + if (live_regs_mask & (1 << regno)) + break; + + gcc_assert(regno <= LAST_LO_REGNUM); + + reg = gen_rtx_REG (SImode, regno); + + emit_insn (gen_movsi (reg, GEN_INT (- amount))); + + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, reg)); + + dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -amount)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (frame_pointer_needed) + thumb_set_frame_pointer (offsets); + + /* If we are profiling, make sure no instructions are scheduled before + the call to mcount. Similarly if the user has requested no + scheduling in the prolog. Similarly if we want non-call exceptions + using the EABI unwinder, to prevent faulting instructions from being + swapped with a stack adjustment. */ + if (crtl->profile || !TARGET_SCHED_PROLOG + || (arm_except_unwind_info (&global_options) == UI_TARGET + && cfun->can_throw_non_call_exceptions)) + emit_insn (gen_blockage ()); + + cfun->machine->lr_save_eliminated = !thumb_force_lr_save (); + if (live_regs_mask & 0xff) + cfun->machine->lr_save_eliminated = 0; +} + +/* Generate pattern *pop_multiple_with_stack_update_and_return if single + POP instruction can be generated. LR should be replaced by PC. All + the checks required are already done by USE_RETURN_INSN (). Hence, + all we really need to check here is if single register is to be + returned, or multiple register return. */ +void +thumb2_expand_return (bool simple_return) +{ + int i, num_regs; + unsigned long saved_regs_mask; + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + + for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + if (!simple_return && saved_regs_mask) + { + if (num_regs == 1) + { + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + rtx reg = gen_rtx_REG (SImode, PC_REGNUM); + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + XVECEXP (par, 0, 0) = ret_rtx; + XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr); + RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1; + emit_jump_insn (par); + } + else + { + saved_regs_mask &= ~ (1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + arm_emit_multi_reg_pop (saved_regs_mask); + } + } + else + { + emit_jump_insn (simple_return_rtx); + } +} + +void +thumb1_expand_epilogue (void) +{ + HOST_WIDE_INT amount; + arm_stack_offsets *offsets; + int regno; + + /* Naked functions don't have prologues. */ + if (IS_NAKED (arm_current_func_type ())) + return; + + offsets = arm_get_frame_offsets (); + amount = offsets->outgoing_args - offsets->saved_regs; + + if (frame_pointer_needed) + { + emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); + amount = offsets->locals_base - offsets->saved_regs; + } + amount -= 4 * thumb1_extra_regs_pushed (offsets, false); + + gcc_assert (amount >= 0); + if (amount) + { + emit_insn (gen_blockage ()); + + if (amount < 512) + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (amount))); + else + { + /* r3 is always free in the epilogue. */ + rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); + + emit_insn (gen_movsi (reg, GEN_INT (amount))); + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg)); + } + } + + /* Emit a USE (stack_pointer_rtx), so that + the stack adjustment will not be deleted. */ + emit_insn (gen_force_register_use (stack_pointer_rtx)); + + if (crtl->profile || !TARGET_SCHED_PROLOG) + emit_insn (gen_blockage ()); + + /* Emit a clobber for each insn that will be restored in the epilogue, + so that flow2 will get register lifetimes correct. */ + for (regno = 0; regno < 13; regno++) + if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) + emit_clobber (gen_rtx_REG (SImode, regno)); + + if (! df_regs_ever_live_p (LR_REGNUM)) + emit_use (gen_rtx_REG (SImode, LR_REGNUM)); +} + +/* Epilogue code for APCS frame. */ +static void +arm_expand_epilogue_apcs_frame (bool really_return) +{ + unsigned long func_type; + unsigned long saved_regs_mask; + int num_regs = 0; + int i; + int floats_from_frame = 0; + arm_stack_offsets *offsets; + + gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM); + func_type = arm_current_func_type (); + + /* Get frame offsets for ARM. */ + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + + /* Find the offset of the floating-point save area in the frame. */ + floats_from_frame + = (offsets->saved_args + + arm_compute_static_chain_stack_bytes () + - offsets->frame); + + /* Compute how many core registers saved and how far away the floats are. */ + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + { + num_regs++; + floats_from_frame += 4; + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + int start_reg; + rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); + + /* The offset is from IP_REGNUM. */ + int saved_size = arm_get_vfp_saved_size (); + if (saved_size > 0) + { + rtx insn; + floats_from_frame += saved_size; + insn = emit_insn (gen_addsi3 (ip_rtx, + hard_frame_pointer_rtx, + GEN_INT (-floats_from_frame))); + arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame, + ip_rtx, hard_frame_pointer_rtx); + } + + /* Generate VFP register multi-pop. */ + start_reg = FIRST_VFP_REGNUM; + + for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2) + /* Look for a case where a reg does not need restoring. */ + if ((!df_regs_ever_live_p (i) || call_used_regs[i]) + && (!df_regs_ever_live_p (i + 1) + || call_used_regs[i + 1])) + { + if (start_reg != i) + arm_emit_vfp_multi_reg_pop (start_reg, + (i - start_reg) / 2, + gen_rtx_REG (SImode, + IP_REGNUM)); + start_reg = i + 2; + } + + /* Restore the remaining regs that we have discovered (or possibly + even all of them, if the conditional in the for loop never + fired). */ + if (start_reg != i) + arm_emit_vfp_multi_reg_pop (start_reg, + (i - start_reg) / 2, + gen_rtx_REG (SImode, IP_REGNUM)); + } + + if (TARGET_IWMMXT) + { + /* The frame pointer is guaranteed to be non-double-word aligned, as + it is set to double-word-aligned old_stack_pointer - 4. */ + rtx insn; + int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1); + + for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--) + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + rtx addr = gen_frame_mem (V2SImode, + plus_constant (Pmode, hard_frame_pointer_rtx, + - lrm_count * 4)); + insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr)); + REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (V2SImode, i), + NULL_RTX); + lrm_count += 2; + } + } + + /* saved_regs_mask should contain IP which contains old stack pointer + at the time of activation creation. Since SP and IP are adjacent registers, + we can restore the value directly into SP. */ + gcc_assert (saved_regs_mask & (1 << IP_REGNUM)); + saved_regs_mask &= ~(1 << IP_REGNUM); + saved_regs_mask |= (1 << SP_REGNUM); + + /* There are two registers left in saved_regs_mask - LR and PC. We + only need to restore LR (the return address), but to + save time we can load it directly into PC, unless we need a + special function exit sequence, or we are not really returning. */ + if (really_return + && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->calls_eh_return) + /* Delete LR from the register mask, so that LR on + the stack is loaded into the PC in the register mask. */ + saved_regs_mask &= ~(1 << LR_REGNUM); + else + saved_regs_mask &= ~(1 << PC_REGNUM); + + num_regs = bit_count (saved_regs_mask); + if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca) + { + rtx insn; + emit_insn (gen_blockage ()); + /* Unwind the stack to just below the saved registers. */ + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- 4 * num_regs))); + + arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs, + stack_pointer_rtx, hard_frame_pointer_rtx); + } + + arm_emit_multi_reg_pop (saved_regs_mask); + + if (IS_INTERRUPT (func_type)) + { + /* Interrupt handlers will have pushed the + IP onto the stack, so restore it now. */ + rtx insn; + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr)); + REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, IP_REGNUM), + NULL_RTX); + } + + if (!really_return || (saved_regs_mask & (1 << PC_REGNUM))) + return; + + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM))); + + if (IS_STACKALIGN (func_type)) + /* Restore the original stack pointer. Before prologue, the stack was + realigned and the original stack pointer saved in r0. For details, + see comment in arm_expand_prologue. */ + emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0))); + + emit_jump_insn (simple_return_rtx); +} + +/* Generate RTL to represent ARM epilogue. Really_return is true if the + function is not a sibcall. */ +void +arm_expand_epilogue (bool really_return) +{ + unsigned long func_type; + unsigned long saved_regs_mask; + int num_regs = 0; + int i; + int amount; + arm_stack_offsets *offsets; + + func_type = arm_current_func_type (); + + /* Naked functions don't have epilogue. Hence, generate return pattern, and + let output_return_instruction take care of instruction emission if any. */ + if (IS_NAKED (func_type) + || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)) + { + if (really_return) + emit_jump_insn (simple_return_rtx); + return; + } + + /* If we are throwing an exception, then we really must be doing a + return, so we can't tail-call. */ + gcc_assert (!crtl->calls_eh_return || really_return); + + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + { + arm_expand_epilogue_apcs_frame (really_return); + return; + } + + /* Get frame offsets for ARM. */ + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + num_regs = bit_count (saved_regs_mask); + + if (frame_pointer_needed) + { + rtx insn; + /* Restore stack pointer if necessary. */ + if (TARGET_ARM) + { + /* In ARM mode, frame pointer points to first saved register. + Restore stack pointer to last saved register. */ + amount = offsets->frame - offsets->saved_regs; + + /* Force out any pending memory operations that reference stacked data + before stack de-allocation occurs. */ + emit_insn (gen_blockage ()); + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (insn, amount, + stack_pointer_rtx, + hard_frame_pointer_rtx); + + /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not + deleted. */ + emit_insn (gen_force_register_use (stack_pointer_rtx)); + } + else + { + /* In Thumb-2 mode, the frame pointer points to the last saved + register. */ + amount = offsets->locals_base - offsets->saved_regs; + if (amount) + { + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (insn, amount, + hard_frame_pointer_rtx, + hard_frame_pointer_rtx); + } + + /* Force out any pending memory operations that reference stacked data + before stack de-allocation occurs. */ + emit_insn (gen_blockage ()); + insn = emit_insn (gen_movsi (stack_pointer_rtx, + hard_frame_pointer_rtx)); + arm_add_cfa_adjust_cfa_note (insn, 0, + stack_pointer_rtx, + hard_frame_pointer_rtx); + /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not + deleted. */ + emit_insn (gen_force_register_use (stack_pointer_rtx)); + } + } + else + { + /* Pop off outgoing args and local frame to adjust stack pointer to + last saved register. */ + amount = offsets->outgoing_args - offsets->saved_regs; + if (amount) + { + rtx tmp; + /* Force out any pending memory operations that reference stacked data + before stack de-allocation occurs. */ + emit_insn (gen_blockage ()); + tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (amount))); + arm_add_cfa_adjust_cfa_note (tmp, amount, + stack_pointer_rtx, stack_pointer_rtx); + /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is + not deleted. */ + emit_insn (gen_force_register_use (stack_pointer_rtx)); + } + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + /* Generate VFP register multi-pop. */ + int end_reg = LAST_VFP_REGNUM + 1; + + /* Scan the registers in reverse order. We need to match + any groupings made in the prologue and generate matching + vldm operations. The need to match groups is because, + unlike pop, vldm can only do consecutive regs. */ + for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2) + /* Look for a case where a reg does not need restoring. */ + if ((!df_regs_ever_live_p (i) || call_used_regs[i]) + && (!df_regs_ever_live_p (i + 1) + || call_used_regs[i + 1])) + { + /* Restore the regs discovered so far (from reg+2 to + end_reg). */ + if (end_reg > i + 2) + arm_emit_vfp_multi_reg_pop (i + 2, + (end_reg - (i + 2)) / 2, + stack_pointer_rtx); + end_reg = i; + } + + /* Restore the remaining regs that we have discovered (or possibly + even all of them, if the conditional in the for loop never + fired). */ + if (end_reg > i + 2) + arm_emit_vfp_multi_reg_pop (i + 2, + (end_reg - (i + 2)) / 2, + stack_pointer_rtx); + } + + if (TARGET_IWMMXT) + for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++) + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + rtx insn; + rtx addr = gen_rtx_MEM (V2SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr)); + REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (V2SImode, i), + NULL_RTX); + arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD, + stack_pointer_rtx, stack_pointer_rtx); + } + + if (saved_regs_mask) + { + rtx insn; + bool return_in_pc = false; + + if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED + && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL) + && !IS_STACKALIGN (func_type) + && really_return + && crtl->args.pretend_args_size == 0 + && saved_regs_mask & (1 << LR_REGNUM) + && !crtl->calls_eh_return) + { + saved_regs_mask &= ~(1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + return_in_pc = true; + } + + if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc)) + { + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + { + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + + if (i == PC_REGNUM) + { + insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (insn, 0, 0) = ret_rtx; + XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode, + gen_rtx_REG (SImode, i), + addr); + RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1; + insn = emit_jump_insn (insn); + } + else + { + insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i), + addr)); + REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE, + gen_rtx_REG (SImode, i), + NULL_RTX); + arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD, + stack_pointer_rtx, + stack_pointer_rtx); + } + } + } + else + { + if (TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { + if (TARGET_THUMB2) + thumb2_emit_ldrd_pop (saved_regs_mask); + else if (TARGET_ARM && !IS_INTERRUPT (func_type)) + arm_emit_ldrd_pop (saved_regs_mask); + else + arm_emit_multi_reg_pop (saved_regs_mask); + } + else + arm_emit_multi_reg_pop (saved_regs_mask); + } + + if (return_in_pc == true) + return; + } + + if (crtl->args.pretend_args_size) + { + int i, j; + rtx dwarf = NULL_RTX; + rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (crtl->args.pretend_args_size))); + + RTX_FRAME_RELATED_P (tmp) = 1; + + if (cfun->machine->uses_anonymous_args) + { + /* Restore pretend args. Refer arm_expand_prologue on how to save + pretend_args in stack. */ + int num_regs = crtl->args.pretend_args_size / 4; + saved_regs_mask = (0xf0 >> num_regs) & 0xf; + for (j = 0, i = 0; j < num_regs; i++) + if (saved_regs_mask & (1 << i)) + { + rtx reg = gen_rtx_REG (SImode, i); + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + j++; + } + REG_NOTES (tmp) = dwarf; + } + arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size, + stack_pointer_rtx, stack_pointer_rtx); + } + + if (!really_return) + return; + + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM))); + + if (IS_STACKALIGN (func_type)) + /* Restore the original stack pointer. Before prologue, the stack was + realigned and the original stack pointer saved in r0. For details, + see comment in arm_expand_prologue. */ + emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0))); + + emit_jump_insn (simple_return_rtx); +} + +/* Implementation of insn prologue_thumb1_interwork. This is the first + "instruction" of a function called in ARM mode. Swap to thumb mode. */ + +const char * +thumb1_output_interwork (void) +{ + const char * name; + FILE *f = asm_out_file; + + gcc_assert (MEM_P (DECL_RTL (current_function_decl))); + gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0)) + == SYMBOL_REF); + name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + + /* Generate code sequence to switch us into Thumb mode. */ + /* The .code 32 directive has already been emitted by + ASM_DECLARE_FUNCTION_NAME. */ + asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM); + + /* Generate a label, so that the debugger will notice the + change in instruction sets. This label is also used by + the assembler to bypass the ARM code when this function + is called from a Thumb encoded function elsewhere in the + same file. Hence the definition of STUB_NAME here must + agree with the definition in gas/config/tc-arm.c. */ + +#define STUB_NAME ".real_start_of" + + fprintf (f, "\t.code\t16\n"); +#ifdef ARM_PE + if (arm_dllexport_name_p (name)) + name = arm_strip_name_encoding (name); +#endif + asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name); + fprintf (f, "\t.thumb_func\n"); + asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name); + + return ""; +} + +/* Handle the case of a double word load into a low register from + a computed memory address. The computed address may involve a + register which is overwritten by the load. */ +const char * +thumb_load_double_from_address (rtx *operands) +{ + rtx addr; + rtx base; + rtx offset; + rtx arg1; + rtx arg2; + + gcc_assert (REG_P (operands[0])); + gcc_assert (MEM_P (operands[1])); + + /* Get the memory address. */ + addr = XEXP (operands[1], 0); + + /* Work out how the memory address is computed. */ + switch (GET_CODE (addr)) + { + case REG: + operands[2] = adjust_address (operands[1], SImode, 4); + + if (REGNO (operands[0]) == REGNO (addr)) + { + output_asm_insn ("ldr\t%H0, %2", operands); + output_asm_insn ("ldr\t%0, %1", operands); + } + else + { + output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldr\t%H0, %2", operands); + } + break; + + case CONST: + /* Compute
+ 4 for the high order load. */ + operands[2] = adjust_address (operands[1], SImode, 4); + + output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldr\t%H0, %2", operands); + break; + + case PLUS: + arg1 = XEXP (addr, 0); + arg2 = XEXP (addr, 1); + + if (CONSTANT_P (arg1)) + base = arg2, offset = arg1; + else + base = arg1, offset = arg2; + + gcc_assert (REG_P (base)); + + /* Catch the case of
= + */ + if (REG_P (offset)) + { + int reg_offset = REGNO (offset); + int reg_base = REGNO (base); + int reg_dest = REGNO (operands[0]); + + /* Add the base and offset registers together into the + higher destination register. */ + asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r", + reg_dest + 1, reg_base, reg_offset); + + /* Load the lower destination register from the address in + the higher destination register. */ + asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]", + reg_dest, reg_dest + 1); + + /* Load the higher destination register from its own address + plus 4. */ + asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]", + reg_dest + 1, reg_dest + 1); + } + else + { + /* Compute
+ 4 for the high order load. */ + operands[2] = adjust_address (operands[1], SImode, 4); + + /* If the computed address is held in the low order register + then load the high order register first, otherwise always + load the low order register first. */ + if (REGNO (operands[0]) == REGNO (base)) + { + output_asm_insn ("ldr\t%H0, %2", operands); + output_asm_insn ("ldr\t%0, %1", operands); + } + else + { + output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldr\t%H0, %2", operands); + } + } + break; + + case LABEL_REF: + /* With no registers to worry about we can just load the value + directly. */ + operands[2] = adjust_address (operands[1], SImode, 4); + + output_asm_insn ("ldr\t%H0, %2", operands); + output_asm_insn ("ldr\t%0, %1", operands); + break; + + default: + gcc_unreachable (); + } + + return ""; +} + +const char * +thumb_output_move_mem_multiple (int n, rtx *operands) +{ + rtx tmp; + + switch (n) + { + case 2: + if (REGNO (operands[4]) > REGNO (operands[5])) + { + tmp = operands[4]; + operands[4] = operands[5]; + operands[5] = tmp; + } + output_asm_insn ("ldmia\t%1!, {%4, %5}", operands); + output_asm_insn ("stmia\t%0!, {%4, %5}", operands); + break; + + case 3: + if (REGNO (operands[4]) > REGNO (operands[5])) + { + tmp = operands[4]; + operands[4] = operands[5]; + operands[5] = tmp; + } + if (REGNO (operands[5]) > REGNO (operands[6])) + { + tmp = operands[5]; + operands[5] = operands[6]; + operands[6] = tmp; + } + if (REGNO (operands[4]) > REGNO (operands[5])) + { + tmp = operands[4]; + operands[4] = operands[5]; + operands[5] = tmp; + } + + output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands); + output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands); + break; + + default: + gcc_unreachable (); + } + + return ""; +} + +/* Output a call-via instruction for thumb state. */ +const char * +thumb_call_via_reg (rtx reg) +{ + int regno = REGNO (reg); + rtx *labelp; + + gcc_assert (regno < LR_REGNUM); + + /* If we are in the normal text section we can use a single instance + per compilation unit. If we are doing function sections, then we need + an entry per section, since we can't rely on reachability. */ + if (in_section == text_section) + { + thumb_call_reg_needed = 1; + + if (thumb_call_via_label[regno] == NULL) + thumb_call_via_label[regno] = gen_label_rtx (); + labelp = thumb_call_via_label + regno; + } + else + { + if (cfun->machine->call_via[regno] == NULL) + cfun->machine->call_via[regno] = gen_label_rtx (); + labelp = cfun->machine->call_via + regno; + } + + output_asm_insn ("bl\t%a0", labelp); + return ""; +} + +/* Routines for generating rtl. */ +void +thumb_expand_movmemqi (rtx *operands) +{ + rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0)); + rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); + HOST_WIDE_INT len = INTVAL (operands[2]); + HOST_WIDE_INT offset = 0; + + while (len >= 12) + { + emit_insn (gen_movmem12b (out, in, out, in)); + len -= 12; + } + + if (len >= 8) + { + emit_insn (gen_movmem8b (out, in, out, in)); + len -= 8; + } + + if (len >= 4) + { + rtx reg = gen_reg_rtx (SImode); + emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in))); + emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg)); + len -= 4; + offset += 4; + } + + if (len >= 2) + { + rtx reg = gen_reg_rtx (HImode); + emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode, + plus_constant (Pmode, in, + offset)))); + emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out, + offset)), + reg)); + len -= 2; + offset += 2; + } + + if (len) + { + rtx reg = gen_reg_rtx (QImode); + emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode, + plus_constant (Pmode, in, + offset)))); + emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out, + offset)), + reg)); + } +} + +void +thumb_reload_out_hi (rtx *operands) +{ + emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2])); +} + +/* Handle reading a half-word from memory during reload. */ +void +thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED) +{ + gcc_unreachable (); +} + +/* Return the length of a function name prefix + that starts with the character 'c'. */ +static int +arm_get_strip_length (int c) +{ + switch (c) + { + ARM_NAME_ENCODING_LENGTHS + default: return 0; + } +} + +/* Return a pointer to a function's name with any + and all prefix encodings stripped from it. */ +const char * +arm_strip_name_encoding (const char *name) +{ + int skip; + + while ((skip = arm_get_strip_length (* name))) + name += skip; + + return name; +} + +/* If there is a '*' anywhere in the name's prefix, then + emit the stripped name verbatim, otherwise prepend an + underscore if leading underscores are being used. */ +void +arm_asm_output_labelref (FILE *stream, const char *name) +{ + int skip; + int verbatim = 0; + + while ((skip = arm_get_strip_length (* name))) + { + verbatim |= (*name == '*'); + name += skip; + } + + if (verbatim) + fputs (name, stream); + else + asm_fprintf (stream, "%U%s", name); +} + +/* This function is used to emit an EABI tag and its associated value. + We emit the numerical value of the tag in case the assembler does not + support textual tags. (Eg gas prior to 2.20). If requested we include + the tag name in a comment so that anyone reading the assembler output + will know which tag is being set. + + This function is not static because arm-c.c needs it too. */ + +void +arm_emit_eabi_attribute (const char *name, int num, int val) +{ + asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val); + if (flag_verbose_asm || flag_debug_asm) + asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name); + asm_fprintf (asm_out_file, "\n"); +} + +static void +arm_file_start (void) +{ + int val; + + if (TARGET_UNIFIED_ASM) + asm_fprintf (asm_out_file, "\t.syntax unified\n"); + + if (TARGET_BPABI) + { + const char *fpu_name; + if (arm_selected_arch) + { + /* armv7ve doesn't support any extensions. */ + if (strcmp (arm_selected_arch->name, "armv7ve") == 0) + { + /* Keep backward compatability for assemblers + which don't support armv7ve. */ + asm_fprintf (asm_out_file, "\t.arch armv7-a\n"); + asm_fprintf (asm_out_file, "\t.arch_extension virt\n"); + asm_fprintf (asm_out_file, "\t.arch_extension idiv\n"); + asm_fprintf (asm_out_file, "\t.arch_extension sec\n"); + asm_fprintf (asm_out_file, "\t.arch_extension mp\n"); + } + else + { + const char* pos = strchr (arm_selected_arch->name, '+'); + if (pos) + { + char buf[15]; + gcc_assert (strlen (arm_selected_arch->name) + <= sizeof (buf) / sizeof (*pos)); + strncpy (buf, arm_selected_arch->name, + (pos - arm_selected_arch->name) * sizeof (*pos)); + buf[pos - arm_selected_arch->name] = '\0'; + asm_fprintf (asm_out_file, "\t.arch %s\n", buf); + asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1); + } + else + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); + } + } + else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0) + asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8); + else + { + const char* truncated_name + = arm_rewrite_selected_cpu (arm_selected_cpu->name); + asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name); + } + + if (TARGET_SOFT_FLOAT) + { + fpu_name = "softvfp"; + } + else + { + fpu_name = arm_fpu_desc->name; + if (arm_fpu_desc->model == ARM_FP_MODEL_VFP) + { + if (TARGET_HARD_FLOAT) + arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3); + if (TARGET_HARD_FLOAT_ABI) + arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1); + } + } + asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name); + + /* Some of these attributes only apply when the corresponding features + are used. However we don't have any easy way of figuring this out. + Conservatively record the setting that would have been used. */ + + if (flag_rounding_math) + arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1); + + if (!flag_unsafe_math_optimizations) + { + arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1); + arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1); + } + if (flag_signaling_nans) + arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1); + + arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23, + flag_finite_math_only ? 1 : 3); + + arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1); + arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1); + arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26, + flag_short_enums ? 1 : 2); + + /* Tag_ABI_optimization_goals. */ + if (optimize_size) + val = 4; + else if (optimize >= 2) + val = 2; + else if (optimize) + val = 1; + else + val = 6; + arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val); + + arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34, + unaligned_access); + + if (arm_fp16_format) + arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38, + (int) arm_fp16_format); + + if (arm_lang_output_object_attributes_hook) + arm_lang_output_object_attributes_hook(); + } + + default_file_start (); +} + +static void +arm_file_end (void) +{ + int regno; + + if (NEED_INDICATE_EXEC_STACK) + /* Add .note.GNU-stack. */ + file_end_indicate_exec_stack (); + + if (! thumb_call_reg_needed) + return; + + switch_to_section (text_section); + asm_fprintf (asm_out_file, "\t.code 16\n"); + ASM_OUTPUT_ALIGN (asm_out_file, 1); + + for (regno = 0; regno < LR_REGNUM; regno++) + { + rtx label = thumb_call_via_label[regno]; + + if (label != 0) + { + targetm.asm_out.internal_label (asm_out_file, "L", + CODE_LABEL_NUMBER (label)); + asm_fprintf (asm_out_file, "\tbx\t%r\n", regno); + } + } +} + +#ifndef ARM_PE +/* Symbols in the text segment can be accessed without indirecting via the + constant pool; it may take an extra binary operation, but this is still + faster than indirecting via memory. Don't do this when not optimizing, + since we won't be calculating al of the offsets necessary to do this + simplification. */ + +static void +arm_encode_section_info (tree decl, rtx rtl, int first) +{ + if (optimize > 0 && TREE_CONSTANT (decl)) + SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1; + + default_encode_section_info (decl, rtl, first); +} +#endif /* !ARM_PE */ + +static void +arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno) +{ + if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno + && !strcmp (prefix, "L")) + { + arm_ccfsm_state = 0; + arm_target_insn = NULL; + } + default_internal_label (stream, prefix, labelno); +} + +/* Output code to add DELTA to the first argument, and then jump + to FUNCTION. Used for C++ multiple inheritance. */ +static void +arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree function) +{ + static int thunk_label = 0; + char label[256]; + char labelpc[256]; + int mi_delta = delta; + const char *const mi_op = mi_delta < 0 ? "sub" : "add"; + int shift = 0; + int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) + ? 1 : 0); + if (mi_delta < 0) + mi_delta = - mi_delta; + + final_start_function (emit_barrier (), file, 1); + + if (TARGET_THUMB1) + { + int labelno = thunk_label++; + ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno); + /* Thunks are entered in arm mode when avaiable. */ + if (TARGET_THUMB1_ONLY) + { + /* push r3 so we can use it as a temporary. */ + /* TODO: Omit this save if r3 is not used. */ + fputs ("\tpush {r3}\n", file); + fputs ("\tldr\tr3, ", file); + } + else + { + fputs ("\tldr\tr12, ", file); + } + assemble_name (file, label); + fputc ('\n', file); + if (flag_pic) + { + /* If we are generating PIC, the ldr instruction below loads + "(target - 7) - .LTHUNKPCn" into r12. The pc reads as + the address of the add + 8, so we have: + + r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8) + = target + 1. + + Note that we have "+ 1" because some versions of GNU ld + don't set the low bit of the result for R_ARM_REL32 + relocations against thumb function symbols. + On ARMv6M this is +4, not +8. */ + ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno); + assemble_name (file, labelpc); + fputs (":\n", file); + if (TARGET_THUMB1_ONLY) + { + /* This is 2 insns after the start of the thunk, so we know it + is 4-byte aligned. */ + fputs ("\tadd\tr3, pc, r3\n", file); + fputs ("\tmov r12, r3\n", file); + } + else + fputs ("\tadd\tr12, pc, r12\n", file); + } + else if (TARGET_THUMB1_ONLY) + fputs ("\tmov r12, r3\n", file); + } + if (TARGET_THUMB1_ONLY) + { + if (mi_delta > 255) + { + fputs ("\tldr\tr3, ", file); + assemble_name (file, label); + fputs ("+4\n", file); + asm_fprintf (file, "\t%s\t%r, %r, r3\n", + mi_op, this_regno, this_regno); + } + else if (mi_delta != 0) + { + asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + mi_op, this_regno, this_regno, + mi_delta); + } + } + else + { + /* TODO: Use movw/movt for large constants when available. */ + while (mi_delta != 0) + { + if ((mi_delta & (3 << shift)) == 0) + shift += 2; + else + { + asm_fprintf (file, "\t%s\t%r, %r, #%d\n", + mi_op, this_regno, this_regno, + mi_delta & (0xff << shift)); + mi_delta &= ~(0xff << shift); + shift += 8; + } + } + } + if (TARGET_THUMB1) + { + if (TARGET_THUMB1_ONLY) + fputs ("\tpop\t{r3}\n", file); + + fprintf (file, "\tbx\tr12\n"); + ASM_OUTPUT_ALIGN (file, 2); + assemble_name (file, label); + fputs (":\n", file); + if (flag_pic) + { + /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */ + rtx tem = XEXP (DECL_RTL (function), 0); + tem = plus_constant (GET_MODE (tem), tem, -7); + tem = gen_rtx_MINUS (GET_MODE (tem), + tem, + gen_rtx_SYMBOL_REF (Pmode, + ggc_strdup (labelpc))); + assemble_integer (tem, 4, BITS_PER_WORD, 1); + } + else + /* Output ".word .LTHUNKn". */ + assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1); + + if (TARGET_THUMB1_ONLY && mi_delta > 255) + assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1); + } + else + { + fputs ("\tb\t", file); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + if (NEED_PLT_RELOC) + fputs ("(PLT)", file); + fputc ('\n', file); + } + + final_end_function (); +} + +int +arm_emit_vector_const (FILE *file, rtx x) +{ + int i; + const char * pattern; + + gcc_assert (GET_CODE (x) == CONST_VECTOR); + + switch (GET_MODE (x)) + { + case V2SImode: pattern = "%08x"; break; + case V4HImode: pattern = "%04x"; break; + case V8QImode: pattern = "%02x"; break; + default: gcc_unreachable (); + } + + fprintf (file, "0x"); + for (i = CONST_VECTOR_NUNITS (x); i--;) + { + rtx element; + + element = CONST_VECTOR_ELT (x, i); + fprintf (file, pattern, INTVAL (element)); + } + + return 1; +} + +/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. + HFmode constant pool entries are actually loaded with ldr. */ +void +arm_emit_fp16_const (rtx c) +{ + REAL_VALUE_TYPE r; + long bits; + + REAL_VALUE_FROM_CONST_DOUBLE (r, c); + bits = real_to_target (NULL, &r, HFmode); + if (WORDS_BIG_ENDIAN) + assemble_zeros (2); + assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); + if (!WORDS_BIG_ENDIAN) + assemble_zeros (2); +} + +const char * +arm_output_load_gr (rtx *operands) +{ + rtx reg; + rtx offset; + rtx wcgr; + rtx sum; + + if (!MEM_P (operands [1]) + || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS + || !REG_P (reg = XEXP (sum, 0)) + || !CONST_INT_P (offset = XEXP (sum, 1)) + || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024))) + return "wldrw%?\t%0, %1"; + + /* Fix up an out-of-range load of a GR register. */ + output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg); + wcgr = operands[0]; + operands[0] = reg; + output_asm_insn ("ldr%?\t%0, %1", operands); + + operands[0] = wcgr; + operands[1] = reg; + output_asm_insn ("tmcr%?\t%0, %1", operands); + output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg); + + return ""; +} + +/* Worker function for TARGET_SETUP_INCOMING_VARARGS. + + On the ARM, PRETEND_SIZE is set in order to have the prologue push the last + named arg and all anonymous args onto the stack. + XXX I know the prologue shouldn't be pushing registers, but it is faster + that way. */ + +static void +arm_setup_incoming_varargs (cumulative_args_t pcum_v, + enum machine_mode mode, + tree type, + int *pretend_size, + int second_time ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + int nregs; + + cfun->machine->uses_anonymous_args = 1; + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + nregs = pcum->aapcs_ncrn; + if ((nregs & 1) && arm_needs_doubleword_align (mode, type)) + nregs++; + } + else + nregs = pcum->nregs; + + if (nregs < NUM_ARG_REGS) + *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; +} + +/* We can't rely on the caller doing the proper promotion when + using APCS or ATPCS. */ + +static bool +arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED) +{ + return !TARGET_AAPCS_BASED; +} + +static enum machine_mode +arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return ATTRIBUTE_UNUSED) +{ + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4) + return SImode; + + return mode; +} + +/* AAPCS based ABIs use short enums by default. */ + +static bool +arm_default_short_enums (void) +{ + return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX; +} + + +/* AAPCS requires that anonymous bitfields affect structure alignment. */ + +static bool +arm_align_anon_bitfield (void) +{ + return TARGET_AAPCS_BASED; +} + + +/* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */ + +static tree +arm_cxx_guard_type (void) +{ + return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node; +} + + +/* The EABI says test the least significant bit of a guard variable. */ + +static bool +arm_cxx_guard_mask_bit (void) +{ + return TARGET_AAPCS_BASED; +} + + +/* The EABI specifies that all array cookies are 8 bytes long. */ + +static tree +arm_get_cookie_size (tree type) +{ + tree size; + + if (!TARGET_AAPCS_BASED) + return default_cxx_get_cookie_size (type); + + size = build_int_cst (sizetype, 8); + return size; +} + + +/* The EABI says that array cookies should also contain the element size. */ + +static bool +arm_cookie_has_size (void) +{ + return TARGET_AAPCS_BASED; +} + + +/* The EABI says constructors and destructors should return a pointer to + the object constructed/destroyed. */ + +static bool +arm_cxx_cdtor_returns_this (void) +{ + return TARGET_AAPCS_BASED; +} + +/* The EABI says that an inline function may never be the key + method. */ + +static bool +arm_cxx_key_method_may_be_inline (void) +{ + return !TARGET_AAPCS_BASED; +} + +static void +arm_cxx_determine_class_data_visibility (tree decl) +{ + if (!TARGET_AAPCS_BASED + || !TARGET_DLLIMPORT_DECL_ATTRIBUTES) + return; + + /* In general, \S 3.2.5.5 of the ARM EABI requires that class data + is exported. However, on systems without dynamic vague linkage, + \S 3.2.5.6 says that COMDAT class data has hidden linkage. */ + if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl)) + DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; + else + DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT; + DECL_VISIBILITY_SPECIFIED (decl) = 1; +} + +static bool +arm_cxx_class_data_always_comdat (void) +{ + /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have + vague linkage if the class has no key function. */ + return !TARGET_AAPCS_BASED; +} + + +/* The EABI says __aeabi_atexit should be used to register static + destructors. */ + +static bool +arm_cxx_use_aeabi_atexit (void) +{ + return TARGET_AAPCS_BASED; +} + + +void +arm_set_return_address (rtx source, rtx scratch) +{ + arm_stack_offsets *offsets; + HOST_WIDE_INT delta; + rtx addr; + unsigned long saved_regs; + + offsets = arm_get_frame_offsets (); + saved_regs = offsets->saved_regs_mask; + + if ((saved_regs & (1 << LR_REGNUM)) == 0) + emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); + else + { + if (frame_pointer_needed) + addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4); + else + { + /* LR will be the first saved register. */ + delta = offsets->outgoing_args - (offsets->frame + 4); + + + if (delta >= 4096) + { + emit_insn (gen_addsi3 (scratch, stack_pointer_rtx, + GEN_INT (delta & ~4095))); + addr = scratch; + delta &= 4095; + } + else + addr = stack_pointer_rtx; + + addr = plus_constant (Pmode, addr, delta); + } + emit_move_insn (gen_frame_mem (Pmode, addr), source); + } +} + + +void +thumb_set_return_address (rtx source, rtx scratch) +{ + arm_stack_offsets *offsets; + HOST_WIDE_INT delta; + HOST_WIDE_INT limit; + int reg; + rtx addr; + unsigned long mask; + + emit_use (source); + + offsets = arm_get_frame_offsets (); + mask = offsets->saved_regs_mask; + if (mask & (1 << LR_REGNUM)) + { + limit = 1024; + /* Find the saved regs. */ + if (frame_pointer_needed) + { + delta = offsets->soft_frame - offsets->saved_args; + reg = THUMB_HARD_FRAME_POINTER_REGNUM; + if (TARGET_THUMB1) + limit = 128; + } + else + { + delta = offsets->outgoing_args - offsets->saved_args; + reg = SP_REGNUM; + } + /* Allow for the stack frame. */ + if (TARGET_THUMB1 && TARGET_BACKTRACE) + delta -= 16; + /* The link register is always the first saved register. */ + delta -= 4; + + /* Construct the address. */ + addr = gen_rtx_REG (SImode, reg); + if (delta > limit) + { + emit_insn (gen_movsi (scratch, GEN_INT (delta))); + emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx)); + addr = scratch; + } + else + addr = plus_constant (Pmode, addr, delta); + + emit_move_insn (gen_frame_mem (Pmode, addr), source); + } + else + emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source); +} + +/* Implements target hook vector_mode_supported_p. */ +bool +arm_vector_mode_supported_p (enum machine_mode mode) +{ + /* Neon also supports V2SImode, etc. listed in the clause below. */ + if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode + || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode)) + return true; + + if ((TARGET_NEON || TARGET_IWMMXT) + && ((mode == V2SImode) + || (mode == V4HImode) + || (mode == V8QImode))) + return true; + + if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode + || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode + || mode == V2HAmode)) + return true; + + return false; +} + +/* Implements target hook array_mode_supported_p. */ + +static bool +arm_array_mode_supported_p (enum machine_mode mode, + unsigned HOST_WIDE_INT nelems) +{ + if (TARGET_NEON + && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) + && (nelems >= 2 && nelems <= 4)) + return true; + + return false; +} + +/* Use the option -mvectorize-with-neon-double to override the use of quardword + registers when autovectorizing for Neon, at least until multiple vector + widths are supported properly by the middle-end. */ + +static enum machine_mode +arm_preferred_simd_mode (enum machine_mode mode) +{ + if (TARGET_NEON) + switch (mode) + { + case SFmode: + return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode; + case SImode: + return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode; + case HImode: + return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode; + case QImode: + return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode; + case DImode: + if (!TARGET_NEON_VECTORIZE_DOUBLE) + return V2DImode; + break; + + default:; + } + + if (TARGET_REALLY_IWMMXT) + switch (mode) + { + case SImode: + return V2SImode; + case HImode: + return V4HImode; + case QImode: + return V8QImode; + + default:; + } + + return word_mode; +} + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. + + We need to define this for LO_REGS on Thumb-1. Otherwise we can end up + using r0-r4 for function arguments, r7 for the stack frame and don't have + enough left over to do doubleword arithmetic. For Thumb-2 all the + potentially problematic instructions accept high registers so this is not + necessary. Care needs to be taken to avoid adding new Thumb-2 patterns + that require many low registers. */ +static bool +arm_class_likely_spilled_p (reg_class_t rclass) +{ + if ((TARGET_THUMB1 && rclass == LO_REGS) + || rclass == CC_REG) + return true; + + return false; +} + +/* Implements target hook small_register_classes_for_mode_p. */ +bool +arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return TARGET_THUMB1; +} + +/* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal + ARM insns and therefore guarantee that the shift count is modulo 256. + DImode shifts (those implemented by lib1funcs.S or by optabs.c) + guarantee no particular behavior for out-of-range counts. */ + +static unsigned HOST_WIDE_INT +arm_shift_truncation_mask (enum machine_mode mode) +{ + return mode == SImode ? 255 : 0; +} + + +/* Map internal gcc register numbers to DWARF2 register numbers. */ + +unsigned int +arm_dbx_register_number (unsigned int regno) +{ + if (regno < 16) + return regno; + + if (IS_VFP_REGNUM (regno)) + { + /* See comment in arm_dwarf_register_span. */ + if (VFP_REGNO_OK_FOR_SINGLE (regno)) + return 64 + regno - FIRST_VFP_REGNUM; + else + return 256 + (regno - FIRST_VFP_REGNUM) / 2; + } + + if (IS_IWMMXT_GR_REGNUM (regno)) + return 104 + regno - FIRST_IWMMXT_GR_REGNUM; + + if (IS_IWMMXT_REGNUM (regno)) + return 112 + regno - FIRST_IWMMXT_REGNUM; + + gcc_unreachable (); +} + +/* Dwarf models VFPv3 registers as 32 64-bit registers. + GCC models tham as 64 32-bit registers, so we need to describe this to + the DWARF generation code. Other registers can use the default. */ +static rtx +arm_dwarf_register_span (rtx rtl) +{ + enum machine_mode mode; + unsigned regno; + rtx parts[16]; + int nregs; + int i; + + regno = REGNO (rtl); + if (!IS_VFP_REGNUM (regno)) + return NULL_RTX; + + /* XXX FIXME: The EABI defines two VFP register ranges: + 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent) + 256-287: D0-D31 + The recommended encoding for S0-S31 is a DW_OP_bit_piece of the + corresponding D register. Until GDB supports this, we shall use the + legacy encodings. We also use these encodings for D0-D15 for + compatibility with older debuggers. */ + mode = GET_MODE (rtl); + if (GET_MODE_SIZE (mode) < 8) + return NULL_RTX; + + if (VFP_REGNO_OK_FOR_SINGLE (regno)) + { + nregs = GET_MODE_SIZE (mode) / 4; + for (i = 0; i < nregs; i += 2) + if (TARGET_BIG_END) + { + parts[i] = gen_rtx_REG (SImode, regno + i + 1); + parts[i + 1] = gen_rtx_REG (SImode, regno + i); + } + else + { + parts[i] = gen_rtx_REG (SImode, regno + i); + parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1); + } + } + else + { + nregs = GET_MODE_SIZE (mode) / 8; + for (i = 0; i < nregs; i++) + parts[i] = gen_rtx_REG (DImode, regno + i); + } + + return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts)); +} + +#if ARM_UNWIND_INFO +/* Emit unwind directives for a store-multiple instruction or stack pointer + push during alignment. + These should only ever be generated by the function prologue code, so + expect them to have a particular form. + The store-multiple instruction sometimes pushes pc as the last register, + although it should not be tracked into unwind information, or for -Os + sometimes pushes some dummy registers before first register that needs + to be tracked in unwind information; such dummy registers are there just + to avoid separate stack adjustment, and will not be restored in the + epilogue. */ + +static void +arm_unwind_emit_sequence (FILE * asm_out_file, rtx p) +{ + int i; + HOST_WIDE_INT offset; + HOST_WIDE_INT nregs; + int reg_size; + unsigned reg; + unsigned lastreg; + unsigned padfirst = 0, padlast = 0; + rtx e; + + e = XVECEXP (p, 0, 0); + gcc_assert (GET_CODE (e) == SET); + + /* First insn will adjust the stack pointer. */ + gcc_assert (GET_CODE (e) == SET + && REG_P (SET_DEST (e)) + && REGNO (SET_DEST (e)) == SP_REGNUM + && GET_CODE (SET_SRC (e)) == PLUS); + + offset = -INTVAL (XEXP (SET_SRC (e), 1)); + nregs = XVECLEN (p, 0) - 1; + gcc_assert (nregs); + + reg = REGNO (SET_SRC (XVECEXP (p, 0, 1))); + if (reg < 16) + { + /* For -Os dummy registers can be pushed at the beginning to + avoid separate stack pointer adjustment. */ + e = XVECEXP (p, 0, 1); + e = XEXP (SET_DEST (e), 0); + if (GET_CODE (e) == PLUS) + padfirst = INTVAL (XEXP (e, 1)); + gcc_assert (padfirst == 0 || optimize_size); + /* The function prologue may also push pc, but not annotate it as it is + never restored. We turn this into a stack pointer adjustment. */ + e = XVECEXP (p, 0, nregs); + e = XEXP (SET_DEST (e), 0); + if (GET_CODE (e) == PLUS) + padlast = offset - INTVAL (XEXP (e, 1)) - 4; + else + padlast = offset - 4; + gcc_assert (padlast == 0 || padlast == 4); + if (padlast == 4) + fprintf (asm_out_file, "\t.pad #4\n"); + reg_size = 4; + fprintf (asm_out_file, "\t.save {"); + } + else if (IS_VFP_REGNUM (reg)) + { + reg_size = 8; + fprintf (asm_out_file, "\t.vsave {"); + } + else + /* Unknown register type. */ + gcc_unreachable (); + + /* If the stack increment doesn't match the size of the saved registers, + something has gone horribly wrong. */ + gcc_assert (offset == padfirst + nregs * reg_size + padlast); + + offset = padfirst; + lastreg = 0; + /* The remaining insns will describe the stores. */ + for (i = 1; i <= nregs; i++) + { + /* Expect (set (mem ) (reg)). + Where is (reg:SP) or (plus (reg:SP) (const_int)). */ + e = XVECEXP (p, 0, i); + gcc_assert (GET_CODE (e) == SET + && MEM_P (SET_DEST (e)) + && REG_P (SET_SRC (e))); + + reg = REGNO (SET_SRC (e)); + gcc_assert (reg >= lastreg); + + if (i != 1) + fprintf (asm_out_file, ", "); + /* We can't use %r for vfp because we need to use the + double precision register names. */ + if (IS_VFP_REGNUM (reg)) + asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2); + else + asm_fprintf (asm_out_file, "%r", reg); + +#ifdef ENABLE_CHECKING + /* Check that the addresses are consecutive. */ + e = XEXP (SET_DEST (e), 0); + if (GET_CODE (e) == PLUS) + gcc_assert (REG_P (XEXP (e, 0)) + && REGNO (XEXP (e, 0)) == SP_REGNUM + && CONST_INT_P (XEXP (e, 1)) + && offset == INTVAL (XEXP (e, 1))); + else + gcc_assert (i == 1 + && REG_P (e) + && REGNO (e) == SP_REGNUM); + offset += reg_size; +#endif + } + fprintf (asm_out_file, "}\n"); + if (padfirst) + fprintf (asm_out_file, "\t.pad #%d\n", padfirst); +} + +/* Emit unwind directives for a SET. */ + +static void +arm_unwind_emit_set (FILE * asm_out_file, rtx p) +{ + rtx e0; + rtx e1; + unsigned reg; + + e0 = XEXP (p, 0); + e1 = XEXP (p, 1); + switch (GET_CODE (e0)) + { + case MEM: + /* Pushing a single register. */ + if (GET_CODE (XEXP (e0, 0)) != PRE_DEC + || !REG_P (XEXP (XEXP (e0, 0), 0)) + || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM) + abort (); + + asm_fprintf (asm_out_file, "\t.save "); + if (IS_VFP_REGNUM (REGNO (e1))) + asm_fprintf(asm_out_file, "{d%d}\n", + (REGNO (e1) - FIRST_VFP_REGNUM) / 2); + else + asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1)); + break; + + case REG: + if (REGNO (e0) == SP_REGNUM) + { + /* A stack increment. */ + if (GET_CODE (e1) != PLUS + || !REG_P (XEXP (e1, 0)) + || REGNO (XEXP (e1, 0)) != SP_REGNUM + || !CONST_INT_P (XEXP (e1, 1))) + abort (); + + asm_fprintf (asm_out_file, "\t.pad #%wd\n", + -INTVAL (XEXP (e1, 1))); + } + else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM) + { + HOST_WIDE_INT offset; + + if (GET_CODE (e1) == PLUS) + { + if (!REG_P (XEXP (e1, 0)) + || !CONST_INT_P (XEXP (e1, 1))) + abort (); + reg = REGNO (XEXP (e1, 0)); + offset = INTVAL (XEXP (e1, 1)); + asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n", + HARD_FRAME_POINTER_REGNUM, reg, + offset); + } + else if (REG_P (e1)) + { + reg = REGNO (e1); + asm_fprintf (asm_out_file, "\t.setfp %r, %r\n", + HARD_FRAME_POINTER_REGNUM, reg); + } + else + abort (); + } + else if (REG_P (e1) && REGNO (e1) == SP_REGNUM) + { + /* Move from sp to reg. */ + asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0)); + } + else if (GET_CODE (e1) == PLUS + && REG_P (XEXP (e1, 0)) + && REGNO (XEXP (e1, 0)) == SP_REGNUM + && CONST_INT_P (XEXP (e1, 1))) + { + /* Set reg to offset from sp. */ + asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n", + REGNO (e0), (int)INTVAL(XEXP (e1, 1))); + } + else + abort (); + break; + + default: + abort (); + } +} + + +/* Emit unwind directives for the given insn. */ + +static void +arm_unwind_emit (FILE * asm_out_file, rtx insn) +{ + rtx note, pat; + bool handled_one = false; + + if (arm_except_unwind_info (&global_options) != UI_TARGET) + return; + + if (!(flag_unwind_tables || crtl->uses_eh_lsda) + && (TREE_NOTHROW (current_function_decl) + || crtl->all_throwers_are_sibcalls)) + return; + + if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn)) + return; + + for (note = REG_NOTES (insn); note ; note = XEXP (note, 1)) + { + switch (REG_NOTE_KIND (note)) + { + case REG_FRAME_RELATED_EXPR: + pat = XEXP (note, 0); + goto found; + + case REG_CFA_REGISTER: + pat = XEXP (note, 0); + if (pat == NULL) + { + pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + } + + /* Only emitted for IS_STACKALIGN re-alignment. */ + { + rtx dest, src; + unsigned reg; + + src = SET_SRC (pat); + dest = SET_DEST (pat); + + gcc_assert (src == stack_pointer_rtx); + reg = REGNO (dest); + asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n", + reg + 0x90, reg); + } + handled_one = true; + break; + + /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P + to get correct dwarf information for shrink-wrap. We should not + emit unwind information for it because these are used either for + pretend arguments or notes to adjust sp and restore registers from + stack. */ + case REG_CFA_DEF_CFA: + case REG_CFA_ADJUST_CFA: + case REG_CFA_RESTORE: + return; + + case REG_CFA_EXPRESSION: + case REG_CFA_OFFSET: + /* ??? Only handling here what we actually emit. */ + gcc_unreachable (); + + default: + break; + } + } + if (handled_one) + return; + pat = PATTERN (insn); + found: + + switch (GET_CODE (pat)) + { + case SET: + arm_unwind_emit_set (asm_out_file, pat); + break; + + case SEQUENCE: + /* Store multiple. */ + arm_unwind_emit_sequence (asm_out_file, pat); + break; + + default: + abort(); + } +} + + +/* Output a reference from a function exception table to the type_info + object X. The EABI specifies that the symbol should be relocated by + an R_ARM_TARGET2 relocation. */ + +static bool +arm_output_ttype (rtx x) +{ + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, x); + /* Use special relocations for symbol references. */ + if (!CONST_INT_P (x)) + fputs ("(TARGET2)", asm_out_file); + fputc ('\n', asm_out_file); + + return TRUE; +} + +/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ + +static void +arm_asm_emit_except_personality (rtx personality) +{ + fputs ("\t.personality\t", asm_out_file); + output_addr_const (asm_out_file, personality); + fputc ('\n', asm_out_file); +} + +/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */ + +static void +arm_asm_init_sections (void) +{ + exception_section = get_unnamed_section (0, output_section_asm_op, + "\t.handlerdata"); +} +#endif /* ARM_UNWIND_INFO */ + +/* Output unwind directives for the start/end of a function. */ + +void +arm_output_fn_unwind (FILE * f, bool prologue) +{ + if (arm_except_unwind_info (&global_options) != UI_TARGET) + return; + + if (prologue) + fputs ("\t.fnstart\n", f); + else + { + /* If this function will never be unwound, then mark it as such. + The came condition is used in arm_unwind_emit to suppress + the frame annotations. */ + if (!(flag_unwind_tables || crtl->uses_eh_lsda) + && (TREE_NOTHROW (current_function_decl) + || crtl->all_throwers_are_sibcalls)) + fputs("\t.cantunwind\n", f); + + fputs ("\t.fnend\n", f); + } +} + +static bool +arm_emit_tls_decoration (FILE *fp, rtx x) +{ + enum tls_reloc reloc; + rtx val; + + val = XVECEXP (x, 0, 0); + reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1)); + + output_addr_const (fp, val); + + switch (reloc) + { + case TLS_GD32: + fputs ("(tlsgd)", fp); + break; + case TLS_LDM32: + fputs ("(tlsldm)", fp); + break; + case TLS_LDO32: + fputs ("(tlsldo)", fp); + break; + case TLS_IE32: + fputs ("(gottpoff)", fp); + break; + case TLS_LE32: + fputs ("(tpoff)", fp); + break; + case TLS_DESCSEQ: + fputs ("(tlsdesc)", fp); + break; + default: + gcc_unreachable (); + } + + switch (reloc) + { + case TLS_GD32: + case TLS_LDM32: + case TLS_IE32: + case TLS_DESCSEQ: + fputs (" + (. - ", fp); + output_addr_const (fp, XVECEXP (x, 0, 2)); + /* For DESCSEQ the 3rd operand encodes thumbness, and is added */ + fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp); + output_addr_const (fp, XVECEXP (x, 0, 3)); + fputc (')', fp); + break; + default: + break; + } + + return TRUE; +} + +/* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */ + +static void +arm_output_dwarf_dtprel (FILE *file, int size, rtx x) +{ + gcc_assert (size == 4); + fputs ("\t.word\t", file); + output_addr_const (file, x); + fputs ("(tlsldo)", file); +} + +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +arm_output_addr_const_extra (FILE *fp, rtx x) +{ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) + return arm_emit_tls_decoration (fp, x); + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL) + { + char label[256]; + int labelno = INTVAL (XVECEXP (x, 0, 0)); + + ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno); + assemble_name_raw (fp, label); + + return TRUE; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF) + { + assemble_name (fp, "_GLOBAL_OFFSET_TABLE_"); + if (GOT_PCREL) + fputs ("+.", fp); + fputs ("-(", fp); + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputc (')', fp); + return TRUE; + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET) + { + output_addr_const (fp, XVECEXP (x, 0, 0)); + if (GOT_PCREL) + fputs ("+.", fp); + fputs ("-(", fp); + output_addr_const (fp, XVECEXP (x, 0, 1)); + fputc (')', fp); + return TRUE; + } + else if (GET_CODE (x) == CONST_VECTOR) + return arm_emit_vector_const (fp, x); + + return FALSE; +} + +/* Output assembly for a shift instruction. + SET_FLAGS determines how the instruction modifies the condition codes. + 0 - Do not set condition codes. + 1 - Set condition codes. + 2 - Use smallest instruction. */ +const char * +arm_output_shift(rtx * operands, int set_flags) +{ + char pattern[100]; + static const char flag_chars[3] = {'?', '.', '!'}; + const char *shift; + HOST_WIDE_INT val; + char c; + + c = flag_chars[set_flags]; + if (TARGET_UNIFIED_ASM) + { + shift = shift_op(operands[3], &val); + if (shift) + { + if (val != -1) + operands[2] = GEN_INT(val); + sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c); + } + else + sprintf (pattern, "mov%%%c\t%%0, %%1", c); + } + else + sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c); + output_asm_insn (pattern, operands); + return ""; +} + +/* Output assembly for a WMMX immediate shift instruction. */ +const char * +arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra) +{ + int shift = INTVAL (operands[2]); + char templ[50]; + enum machine_mode opmode = GET_MODE (operands[0]); + + gcc_assert (shift >= 0); + + /* If the shift value in the register versions is > 63 (for D qualifier), + 31 (for W qualifier) or 15 (for H qualifier). */ + if (((opmode == V4HImode) && (shift > 15)) + || ((opmode == V2SImode) && (shift > 31)) + || ((opmode == DImode) && (shift > 63))) + { + if (wror_or_wsra) + { + sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32); + output_asm_insn (templ, operands); + if (opmode == DImode) + { + sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32); + output_asm_insn (templ, operands); + } + } + else + { + /* The destination register will contain all zeros. */ + sprintf (templ, "wzero\t%%0"); + output_asm_insn (templ, operands); + } + return ""; + } + + if ((opmode == DImode) && (shift > 32)) + { + sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32); + output_asm_insn (templ, operands); + sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32); + output_asm_insn (templ, operands); + } + else + { + sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift); + output_asm_insn (templ, operands); + } + return ""; +} + +/* Output assembly for a WMMX tinsr instruction. */ +const char * +arm_output_iwmmxt_tinsr (rtx *operands) +{ + int mask = INTVAL (operands[3]); + int i; + char templ[50]; + int units = mode_nunits[GET_MODE (operands[0])]; + gcc_assert ((mask & (mask - 1)) == 0); + for (i = 0; i < units; ++i) + { + if ((mask & 0x01) == 1) + { + break; + } + mask >>= 1; + } + gcc_assert (i < units); + { + switch (GET_MODE (operands[0])) + { + case V8QImode: + sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i); + break; + case V4HImode: + sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i); + break; + case V2SImode: + sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i); + break; + default: + gcc_unreachable (); + break; + } + output_asm_insn (templ, operands); + } + return ""; +} + +/* Output a Thumb-1 casesi dispatch sequence. */ +const char * +thumb1_output_casesi (rtx *operands) +{ + rtx diff_vec = PATTERN (NEXT_INSN (operands[0])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE(diff_vec)) + { + case QImode: + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? + "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi"); + case HImode: + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? + "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi"); + case SImode: + return "bl\t%___gnu_thumb1_case_si"; + default: + gcc_unreachable (); + } +} + +/* Output a Thumb-2 casesi instruction. */ +const char * +thumb2_output_casesi (rtx *operands) +{ + rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + output_asm_insn ("cmp\t%0, %1", operands); + output_asm_insn ("bhi\t%l3", operands); + switch (GET_MODE(diff_vec)) + { + case QImode: + return "tbb\t[%|pc, %0]"; + case HImode: + return "tbh\t[%|pc, %0, lsl #1]"; + case SImode: + if (flag_pic) + { + output_asm_insn ("adr\t%4, %l2", operands); + output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands); + output_asm_insn ("add\t%4, %4, %5", operands); + return "bx\t%4"; + } + else + { + output_asm_insn ("adr\t%4, %l2", operands); + return "ldr\t%|pc, [%4, %0, lsl #2]"; + } + default: + gcc_unreachable (); + } +} + +/* Most ARM cores are single issue, but some newer ones can dual issue. + The scheduler descriptions rely on this being correct. */ +static int +arm_issue_rate (void) +{ + switch (arm_tune) + { + case cortexa15: + case cortexa57: + return 3; + + case cortexr4: + case cortexr4f: + case cortexr5: + case genericv7a: + case cortexa5: + case cortexa7: + case cortexa8: + case cortexa9: + case cortexa12: + case cortexa53: + case fa726te: + case marvell_pj4: + return 2; + + default: + return 1; + } +} + +/* A table and a function to perform ARM-specific name mangling for + NEON vector types in order to conform to the AAPCS (see "Procedure + Call Standard for the ARM Architecture", Appendix A). To qualify + for emission with the mangled names defined in that document, a + vector type must not only be of the correct mode but also be + composed of NEON vector element types (e.g. __builtin_neon_qi). */ +typedef struct +{ + enum machine_mode mode; + const char *element_type_name; + const char *aapcs_name; +} arm_mangle_map_entry; + +static arm_mangle_map_entry arm_mangle_map[] = { + /* 64-bit containerized types. */ + { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" }, + { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, + { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, + { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, + { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" }, + { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, + { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, + { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, + { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" }, + { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" }, + + /* 128-bit containerized types. */ + { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" }, + { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" }, + { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" }, + { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" }, + { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" }, + { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" }, + { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" }, + { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" }, + { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" }, + { VOIDmode, NULL, NULL } +}; + +const char * +arm_mangle_type (const_tree type) +{ + arm_mangle_map_entry *pos = arm_mangle_map; + + /* The ARM ABI documents (10th October 2008) say that "__va_list" + has to be managled as if it is in the "std" namespace. */ + if (TARGET_AAPCS_BASED + && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) + return "St9__va_list"; + + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) + return "Dh"; + + if (TREE_CODE (type) != VECTOR_TYPE) + return NULL; + + /* Check the mode of the vector type, and the name of the vector + element type, against the table. */ + while (pos->mode != VOIDmode) + { + tree elt_type = TREE_TYPE (type); + + if (pos->mode == TYPE_MODE (type) + && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL + && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))), + pos->element_type_name)) + return pos->aapcs_name; + + pos++; + } + + /* Use the default mangling for unrecognized (possibly user-defined) + vector types. */ + return NULL; +} + +/* Order of allocation of core registers for Thumb: this allocation is + written over the corresponding initial entries of the array + initialized with REG_ALLOC_ORDER. We allocate all low registers + first. Saving and restoring a low register is usually cheaper than + using a call-clobbered high register. */ + +static const int thumb_core_reg_alloc_order[] = +{ + 3, 2, 1, 0, 4, 5, 6, 7, + 14, 12, 8, 9, 10, 11 +}; + +/* Adjust register allocation order when compiling for Thumb. */ + +void +arm_order_regs_for_local_alloc (void) +{ + const int arm_reg_alloc_order[] = REG_ALLOC_ORDER; + memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order)); + if (TARGET_THUMB) + memcpy (reg_alloc_order, thumb_core_reg_alloc_order, + sizeof (thumb_core_reg_alloc_order)); +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. */ + +bool +arm_frame_pointer_required (void) +{ + return (cfun->has_nonlocal_label + || SUBTARGET_FRAME_POINTER_REQUIRED + || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ())); +} + +/* Only thumb1 can't support conditional execution, so return true if + the target is not thumb1. */ +static bool +arm_have_conditional_execution (void) +{ + return !TARGET_THUMB1; +} + +tree +arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) +{ + enum machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE + || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations)) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + +/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the + decl of the vectorized builtin for the appropriate vector mode. + NULL_TREE is returned if no such builtin is available. */ +#undef ARM_CHECK_BUILTIN_MODE +#define ARM_CHECK_BUILTIN_MODE(C) \ + (out_mode == SFmode && out_n == C \ + && in_mode == SFmode && in_n == C) + +#undef ARM_FIND_VRINT_VARIANT +#define ARM_FIND_VRINT_VARIANT(N) \ + (ARM_CHECK_BUILTIN_MODE (2) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \ + : (ARM_CHECK_BUILTIN_MODE (4) \ + ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \ + : NULL_TREE)) + + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL) + { + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { + case BUILT_IN_FLOORF: + return ARM_FIND_VRINT_VARIANT (vrintm); + case BUILT_IN_CEILF: + return ARM_FIND_VRINT_VARIANT (vrintp); + case BUILT_IN_TRUNCF: + return ARM_FIND_VRINT_VARIANT (vrintz); + case BUILT_IN_ROUNDF: + return ARM_FIND_VRINT_VARIANT (vrinta); + default: + return NULL_TREE; + } + } + return NULL_TREE; +} +#undef ARM_CHECK_BUILTIN_MODE +#undef ARM_FIND_VRINT_VARIANT + +/* The AAPCS sets the maximum alignment of a vector to 64 bits. */ +static HOST_WIDE_INT +arm_vector_alignment (const_tree type) +{ + HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type)); + + if (TARGET_AAPCS_BASED) + align = MIN (align, 64); + + return align; +} + +static unsigned int +arm_autovectorize_vector_sizes (void) +{ + return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8); +} + +static bool +arm_vector_alignment_reachable (const_tree type, bool is_packed) +{ + /* Vectors which aren't in packed structures will not be less aligned than + the natural alignment of their element type, so this is safe. */ + if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access) + return !is_packed; + + return default_builtin_vector_alignment_reachable (type, is_packed); +} + +static bool +arm_builtin_support_vector_misalignment (enum machine_mode mode, + const_tree type, int misalignment, + bool is_packed) +{ + if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access) + { + HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type); + + if (is_packed) + return align == 1; + + /* If the misalignment is unknown, we should be able to handle the access + so long as it is not to a member of a packed data structure. */ + if (misalignment == -1) + return true; + + /* Return true if the misalignment is a multiple of the natural alignment + of the vector's element type. This is probably always going to be + true in practice, since we've already established that this isn't a + packed access. */ + return ((misalignment % align) == 0); + } + + return default_builtin_support_vector_misalignment (mode, type, misalignment, + is_packed); +} + +static void +arm_conditional_register_usage (void) +{ + int regno; + + if (TARGET_THUMB1 && optimize_size) + { + /* When optimizing for size on Thumb-1, it's better not + to use the HI regs, because of the overhead of + stacking them. */ + for (regno = FIRST_HI_REGNUM; + regno <= LAST_HI_REGNUM; ++regno) + fixed_regs[regno] = call_used_regs[regno] = 1; + } + + /* The link register can be clobbered by any branch insn, + but we have no way to track that at present, so mark + it as unavailable. */ + if (TARGET_THUMB1) + fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1; + + if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP) + { + /* VFPv3 registers are disabled when earlier VFP + versions are selected due to the definition of + LAST_VFP_REGNUM. */ + for (regno = FIRST_VFP_REGNUM; + regno <= LAST_VFP_REGNUM; ++ regno) + { + fixed_regs[regno] = 0; + call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16 + || regno >= FIRST_VFP_REGNUM + 32; + } + } + + if (TARGET_REALLY_IWMMXT) + { + regno = FIRST_IWMMXT_GR_REGNUM; + /* The 2002/10/09 revision of the XScale ABI has wCG0 + and wCG1 as call-preserved registers. The 2002/11/21 + revision changed this so that all wCG registers are + scratch registers. */ + for (regno = FIRST_IWMMXT_GR_REGNUM; + regno <= LAST_IWMMXT_GR_REGNUM; ++ regno) + fixed_regs[regno] = 0; + /* The XScale ABI has wR0 - wR9 as scratch registers, + the rest as call-preserved registers. */ + for (regno = FIRST_IWMMXT_REGNUM; + regno <= LAST_IWMMXT_REGNUM; ++ regno) + { + fixed_regs[regno] = 0; + call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10; + } + } + + if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } + else if (TARGET_APCS_STACK) + { + fixed_regs[10] = 1; + call_used_regs[10] = 1; + } + /* -mcaller-super-interworking reserves r11 for calls to + _interwork_r11_call_via_rN(). Making the register global + is an easy way of ensuring that it remains valid for all + calls. */ + if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING + || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) + { + fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; + call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; + if (TARGET_CALLER_INTERWORKING) + global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; + } + SUBTARGET_CONDITIONAL_REGISTER_USAGE +} + +static reg_class_t +arm_preferred_rename_class (reg_class_t rclass) +{ + /* Thumb-2 instructions using LO_REGS may be smaller than instructions + using GENERIC_REGS. During register rename pass, we prefer LO_REGS, + and code size can be reduced. */ + if (TARGET_THUMB2 && rclass == GENERAL_REGS) + return LO_REGS; + else + return NO_REGS; +} + +/* Compute the atrribute "length" of insn "*push_multi". + So this function MUST be kept in sync with that insn pattern. */ +int +arm_attr_length_push_multi(rtx parallel_op, rtx first_op) +{ + int i, regno, hi_reg; + int num_saves = XVECLEN (parallel_op, 0); + + /* ARM mode. */ + if (TARGET_ARM) + return 4; + /* Thumb1 mode. */ + if (TARGET_THUMB1) + return 2; + + /* Thumb2 mode. */ + regno = REGNO (first_op); + hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM); + for (i = 1; i < num_saves && !hi_reg; i++) + { + regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0)); + hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM); + } + + if (!hi_reg) + return 2; + return 4; +} + +/* Compute the number of instructions emitted by output_move_double. */ +int +arm_count_output_move_double_insns (rtx *operands) +{ + int count; + rtx ops[2]; + /* output_move_double may modify the operands array, so call it + here on a copy of the array. */ + ops[0] = operands[0]; + ops[1] = operands[1]; + output_move_double (ops, false, &count); + return count; +} + +int +vfp3_const_double_for_fract_bits (rtx operand) +{ + REAL_VALUE_TYPE r0; + + if (!CONST_DOUBLE_P (operand)) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); + if (exact_real_inverse (DFmode, &r0)) + { + if (exact_real_truncate (DFmode, &r0)) + { + HOST_WIDE_INT value = real_to_integer (&r0); + value = value & 0xffffffff; + if ((value != 0) && ( (value & (value - 1)) == 0)) + return int_log2 (value); + } + } + return 0; +} + +int +vfp3_const_double_for_bits (rtx operand) +{ + REAL_VALUE_TYPE r0; + + if (!CONST_DOUBLE_P (operand)) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (r0, operand); + if (exact_real_truncate (DFmode, &r0)) + { + HOST_WIDE_INT value = real_to_integer (&r0); + value = value & 0xffffffff; + if ((value != 0) && ( (value & (value - 1)) == 0)) + return int_log2 (value); + } + + return 0; +} + +/* Emit a memory barrier around an atomic sequence according to MODEL. */ + +static void +arm_pre_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, true)) + emit_insn (gen_memory_barrier ()); +} + +static void +arm_post_atomic_barrier (enum memmodel model) +{ + if (need_atomic_barrier_p (model, false)) + emit_insn (gen_memory_barrier ()); +} + +/* Emit the load-exclusive and store-exclusive instructions. + Use acquire and release versions if necessary. */ + +static void +arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq) +{ + rtx (*gen) (rtx, rtx); + + if (acq) + { + switch (mode) + { + case QImode: gen = gen_arm_load_acquire_exclusiveqi; break; + case HImode: gen = gen_arm_load_acquire_exclusivehi; break; + case SImode: gen = gen_arm_load_acquire_exclusivesi; break; + case DImode: gen = gen_arm_load_acquire_exclusivedi; break; + default: + gcc_unreachable (); + } + } + else + { + switch (mode) + { + case QImode: gen = gen_arm_load_exclusiveqi; break; + case HImode: gen = gen_arm_load_exclusivehi; break; + case SImode: gen = gen_arm_load_exclusivesi; break; + case DImode: gen = gen_arm_load_exclusivedi; break; + default: + gcc_unreachable (); + } + } + + emit_insn (gen (rval, mem)); +} + +static void +arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, + rtx mem, bool rel) +{ + rtx (*gen) (rtx, rtx, rtx); + + if (rel) + { + switch (mode) + { + case QImode: gen = gen_arm_store_release_exclusiveqi; break; + case HImode: gen = gen_arm_store_release_exclusivehi; break; + case SImode: gen = gen_arm_store_release_exclusivesi; break; + case DImode: gen = gen_arm_store_release_exclusivedi; break; + default: + gcc_unreachable (); + } + } + else + { + switch (mode) + { + case QImode: gen = gen_arm_store_exclusiveqi; break; + case HImode: gen = gen_arm_store_exclusivehi; break; + case SImode: gen = gen_arm_store_exclusivesi; break; + case DImode: gen = gen_arm_store_exclusivedi; break; + default: + gcc_unreachable (); + } + } + + emit_insn (gen (bval, rval, mem)); +} + +/* Mark the previous jump instruction as unlikely. */ + +static void +emit_unlikely_jump (rtx insn) +{ + int very_unlikely = REG_BR_PROB_BASE / 100 - 1; + + insn = emit_jump_insn (insn); + add_int_reg_note (insn, REG_BR_PROB, very_unlikely); +} + +/* Expand a compare and swap pattern. */ + +void +arm_expand_compare_and_swap (rtx operands[]) +{ + rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; + enum machine_mode mode; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); + + bval = operands[0]; + rval = operands[1]; + mem = operands[2]; + oldval = operands[3]; + newval = operands[4]; + is_weak = operands[5]; + mod_s = operands[6]; + mod_f = operands[7]; + mode = GET_MODE (mem); + + /* Normally the succ memory model must be stronger than fail, but in the + unlikely event of fail being ACQUIRE and succ being RELEASE we need to + promote succ to ACQ_REL so that we don't lose the acquire semantics. */ + + if (TARGET_HAVE_LDACQ + && INTVAL (mod_f) == MEMMODEL_ACQUIRE + && INTVAL (mod_s) == MEMMODEL_RELEASE) + mod_s = GEN_INT (MEMMODEL_ACQ_REL); + + switch (mode) + { + case QImode: + case HImode: + /* For narrow modes, we're going to perform the comparison in SImode, + so do the zero-extension now. */ + rval = gen_reg_rtx (SImode); + oldval = convert_modes (SImode, mode, oldval, true); + /* FALLTHRU */ + + case SImode: + /* Force the value into a register if needed. We waited until after + the zero-extension above to do this properly. */ + if (!arm_add_operand (oldval, SImode)) + oldval = force_reg (SImode, oldval); + break; + + case DImode: + if (!cmpdi_operand (oldval, mode)) + oldval = force_reg (mode, oldval); + break; + + default: + gcc_unreachable (); + } + + switch (mode) + { + case QImode: gen = gen_atomic_compare_and_swapqi_1; break; + case HImode: gen = gen_atomic_compare_and_swaphi_1; break; + case SImode: gen = gen_atomic_compare_and_swapsi_1; break; + case DImode: gen = gen_atomic_compare_and_swapdi_1; break; + default: + gcc_unreachable (); + } + + emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); + + if (mode == QImode || mode == HImode) + emit_move_insn (operands[1], gen_lowpart (mode, rval)); + + /* In all cases, we arrange for success to be signaled by Z set. + This arrangement allows for the boolean result to be used directly + in a subsequent branch, post optimization. */ + x = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_EQ (SImode, x, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, bval, x)); +} + +/* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether + another memory store between the load-exclusive and store-exclusive can + reset the monitor from Exclusive to Open state. This means we must wait + until after reload to split the pattern, lest we get a register spill in + the middle of the atomic sequence. */ + +void +arm_split_compare_and_swap (rtx operands[]) +{ + rtx rval, mem, oldval, newval, scratch; + enum machine_mode mode; + enum memmodel mod_s, mod_f; + bool is_weak; + rtx label1, label2, x, cond; + + rval = operands[0]; + mem = operands[1]; + oldval = operands[2]; + newval = operands[3]; + is_weak = (operands[4] != const0_rtx); + mod_s = (enum memmodel) INTVAL (operands[5]); + mod_f = (enum memmodel) INTVAL (operands[6]); + scratch = operands[7]; + mode = GET_MODE (mem); + + bool use_acquire = TARGET_HAVE_LDACQ + && !(mod_s == MEMMODEL_RELAXED + || mod_s == MEMMODEL_CONSUME + || mod_s == MEMMODEL_RELEASE); + + bool use_release = TARGET_HAVE_LDACQ + && !(mod_s == MEMMODEL_RELAXED + || mod_s == MEMMODEL_CONSUME + || mod_s == MEMMODEL_ACQUIRE); + + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_pre_atomic_barrier (mod_s); + + label1 = NULL_RTX; + if (!is_weak) + { + label1 = gen_label_rtx (); + emit_label (label1); + } + label2 = gen_label_rtx (); + + arm_emit_load_exclusive (mode, rval, mem, use_acquire); + + cond = arm_gen_compare_reg (NE, rval, oldval, scratch); + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + + arm_emit_store_exclusive (mode, scratch, mem, newval, use_release); + + /* Weak or strong, we want EQ to be true for success, so that we + match the flags that we got from the compare above. */ + cond = gen_rtx_REG (CCmode, CC_REGNUM); + x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, cond, x)); + + if (!is_weak) + { + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); + emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); + } + + if (mod_f != MEMMODEL_RELAXED) + emit_label (label2); + + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_post_atomic_barrier (mod_s); + + if (mod_f == MEMMODEL_RELAXED) + emit_label (label2); +} + +void +arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, + rtx value, rtx model_rtx, rtx cond) +{ + enum memmodel model = (enum memmodel) INTVAL (model_rtx); + enum machine_mode mode = GET_MODE (mem); + enum machine_mode wmode = (mode == DImode ? DImode : SImode); + rtx label, x; + + bool use_acquire = TARGET_HAVE_LDACQ + && !(model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE); + + bool use_release = TARGET_HAVE_LDACQ + && !(model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_ACQUIRE); + + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_pre_atomic_barrier (model); + + label = gen_label_rtx (); + emit_label (label); + + if (new_out) + new_out = gen_lowpart (wmode, new_out); + if (old_out) + old_out = gen_lowpart (wmode, old_out); + else + old_out = new_out; + value = simplify_gen_subreg (wmode, value, mode, 0); + + arm_emit_load_exclusive (mode, old_out, mem, use_acquire); + + switch (code) + { + case SET: + new_out = value; + break; + + case NOT: + x = gen_rtx_AND (wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + x = gen_rtx_NOT (wmode, new_out); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + + case MINUS: + if (CONST_INT_P (value)) + { + value = GEN_INT (-INTVAL (value)); + code = PLUS; + } + /* FALLTHRU */ + + case PLUS: + if (mode == DImode) + { + /* DImode plus/minus need to clobber flags. */ + /* The adddi3 and subdi3 patterns are incorrectly written so that + they require matching operands, even when we could easily support + three operands. Thankfully, this can be fixed up post-splitting, + as the individual add+adc patterns do accept three operands and + post-reload cprop can make these moves go away. */ + emit_move_insn (new_out, old_out); + if (code == PLUS) + x = gen_adddi3 (new_out, new_out, value); + else + x = gen_subdi3 (new_out, new_out, value); + emit_insn (x); + break; + } + /* FALLTHRU */ + + default: + x = gen_rtx_fmt_ee (code, wmode, old_out, value); + emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); + break; + } + + arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out), + use_release); + + x = gen_rtx_NE (VOIDmode, cond, const0_rtx); + emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label)); + + /* Checks whether a barrier is needed and emits one accordingly. */ + if (!(use_acquire || use_release)) + arm_post_atomic_barrier (model); +} + +#define MAX_VECT_LEN 16 + +struct expand_vec_perm_d +{ + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + enum machine_mode vmode; + unsigned char nelt; + bool one_vector_p; + bool testing_p; +}; + +/* Generate a variable permutation. */ + +static void +arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) +{ + enum machine_mode vmode = GET_MODE (target); + bool one_vector_p = rtx_equal_p (op0, op1); + + gcc_checking_assert (vmode == V8QImode || vmode == V16QImode); + gcc_checking_assert (GET_MODE (op0) == vmode); + gcc_checking_assert (GET_MODE (op1) == vmode); + gcc_checking_assert (GET_MODE (sel) == vmode); + gcc_checking_assert (TARGET_NEON); + + if (one_vector_p) + { + if (vmode == V8QImode) + emit_insn (gen_neon_vtbl1v8qi (target, op0, sel)); + else + emit_insn (gen_neon_vtbl1v16qi (target, op0, sel)); + } + else + { + rtx pair; + + if (vmode == V8QImode) + { + pair = gen_reg_rtx (V16QImode); + emit_insn (gen_neon_vcombinev8qi (pair, op0, op1)); + pair = gen_lowpart (TImode, pair); + emit_insn (gen_neon_vtbl2v8qi (target, pair, sel)); + } + else + { + pair = gen_reg_rtx (OImode); + emit_insn (gen_neon_vcombinev16qi (pair, op0, op1)); + emit_insn (gen_neon_vtbl2v16qi (target, pair, sel)); + } + } +} + +void +arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) +{ + enum machine_mode vmode = GET_MODE (target); + unsigned int i, nelt = GET_MODE_NUNITS (vmode); + bool one_vector_p = rtx_equal_p (op0, op1); + rtx rmask[MAX_VECT_LEN], mask; + + /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's + numbering of elements for big-endian, we must reverse the order. */ + gcc_checking_assert (!BYTES_BIG_ENDIAN); + + /* The VTBL instruction does not use a modulo index, so we must take care + of that ourselves. */ + mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1); + for (i = 0; i < nelt; ++i) + rmask[i] = mask; + mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask)); + sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN); + + arm_expand_vec_perm_1 (target, op0, op1, sel); +} + +/* Generate or test for an insn that supports a constant permutation. */ + +/* Recognize patterns for the VUZP insns. */ + +static bool +arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out0, out1, in0, in1, x; + rtx (*gen)(rtx, rtx, rtx, rtx); + + if (GET_MODE_UNIT_SIZE (d->vmode) >= 8) + return false; + + /* Note that these are little-endian tests. Adjust for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i++) + { + unsigned elt = (i * 2 + odd) & mask; + if (d->perm[i] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vuzpv16qi_internal; break; + case V8QImode: gen = gen_neon_vuzpv8qi_internal; break; + case V8HImode: gen = gen_neon_vuzpv8hi_internal; break; + case V4HImode: gen = gen_neon_vuzpv4hi_internal; break; + case V4SImode: gen = gen_neon_vuzpv4si_internal; break; + case V2SImode: gen = gen_neon_vuzpv2si_internal; break; + case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break; + case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break; + default: + gcc_unreachable (); + } + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + + out0 = d->target; + out1 = gen_reg_rtx (d->vmode); + if (odd) + x = out0, out0 = out1, out1 = x; + + emit_insn (gen (out0, in0, in1, out1)); + return true; +} + +/* Recognize patterns for the VZIP insns. */ + +static bool +arm_evpc_neon_vzip (struct expand_vec_perm_d *d) +{ + unsigned int i, high, mask, nelt = d->nelt; + rtx out0, out1, in0, in1, x; + rtx (*gen)(rtx, rtx, rtx, rtx); + + if (GET_MODE_UNIT_SIZE (d->vmode) >= 8) + return false; + + /* Note that these are little-endian tests. Adjust for big-endian later. */ + high = nelt / 2; + if (d->perm[0] == high) + ; + else if (d->perm[0] == 0) + high = 0; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt / 2; i++) + { + unsigned elt = (i + high) & mask; + if (d->perm[i * 2] != elt) + return false; + elt = (elt + nelt) & mask; + if (d->perm[i * 2 + 1] != elt) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vzipv16qi_internal; break; + case V8QImode: gen = gen_neon_vzipv8qi_internal; break; + case V8HImode: gen = gen_neon_vzipv8hi_internal; break; + case V4HImode: gen = gen_neon_vzipv4hi_internal; break; + case V4SImode: gen = gen_neon_vzipv4si_internal; break; + case V2SImode: gen = gen_neon_vzipv2si_internal; break; + case V2SFmode: gen = gen_neon_vzipv2sf_internal; break; + case V4SFmode: gen = gen_neon_vzipv4sf_internal; break; + default: + gcc_unreachable (); + } + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + high = !high; + } + + out0 = d->target; + out1 = gen_reg_rtx (d->vmode); + if (high) + x = out0, out0 = out1, out1 = x; + + emit_insn (gen (out0, in0, in1, out1)); + return true; +} + +/* Recognize patterns for the VREV insns. */ + +static bool +arm_evpc_neon_vrev (struct expand_vec_perm_d *d) +{ + unsigned int i, j, diff, nelt = d->nelt; + rtx (*gen)(rtx, rtx, rtx); + + if (!d->one_vector_p) + return false; + + diff = d->perm[0]; + switch (diff) + { + case 7: + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vrev64v16qi; break; + case V8QImode: gen = gen_neon_vrev64v8qi; break; + default: + return false; + } + break; + case 3: + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vrev32v16qi; break; + case V8QImode: gen = gen_neon_vrev32v8qi; break; + case V8HImode: gen = gen_neon_vrev64v8hi; break; + case V4HImode: gen = gen_neon_vrev64v4hi; break; + default: + return false; + } + break; + case 1: + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vrev16v16qi; break; + case V8QImode: gen = gen_neon_vrev16v8qi; break; + case V8HImode: gen = gen_neon_vrev32v8hi; break; + case V4HImode: gen = gen_neon_vrev32v4hi; break; + case V4SImode: gen = gen_neon_vrev64v4si; break; + case V2SImode: gen = gen_neon_vrev64v2si; break; + case V4SFmode: gen = gen_neon_vrev64v4sf; break; + case V2SFmode: gen = gen_neon_vrev64v2sf; break; + default: + return false; + } + break; + default: + return false; + } + + for (i = 0; i < nelt ; i += diff + 1) + for (j = 0; j <= diff; j += 1) + { + /* This is guaranteed to be true as the value of diff + is 7, 3, 1 and we should have enough elements in the + queue to generate this. Getting a vector mask with a + value of diff other than these values implies that + something is wrong by the time we get here. */ + gcc_assert (i + j < nelt); + if (d->perm[i + j] != i + diff - j) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + /* ??? The third operand is an artifact of the builtin infrastructure + and is ignored by the actual instruction. */ + emit_insn (gen (d->target, d->op0, const0_rtx)); + return true; +} + +/* Recognize patterns for the VTRN insns. */ + +static bool +arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) +{ + unsigned int i, odd, mask, nelt = d->nelt; + rtx out0, out1, in0, in1, x; + rtx (*gen)(rtx, rtx, rtx, rtx); + + if (GET_MODE_UNIT_SIZE (d->vmode) >= 8) + return false; + + /* Note that these are little-endian tests. Adjust for big-endian later. */ + if (d->perm[0] == 0) + odd = 0; + else if (d->perm[0] == 1) + odd = 1; + else + return false; + mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); + + for (i = 0; i < nelt; i += 2) + { + if (d->perm[i] != i + odd) + return false; + if (d->perm[i + 1] != ((i + nelt + odd) & mask)) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vtrnv16qi_internal; break; + case V8QImode: gen = gen_neon_vtrnv8qi_internal; break; + case V8HImode: gen = gen_neon_vtrnv8hi_internal; break; + case V4HImode: gen = gen_neon_vtrnv4hi_internal; break; + case V4SImode: gen = gen_neon_vtrnv4si_internal; break; + case V2SImode: gen = gen_neon_vtrnv2si_internal; break; + case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break; + case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break; + default: + gcc_unreachable (); + } + + in0 = d->op0; + in1 = d->op1; + if (BYTES_BIG_ENDIAN) + { + x = in0, in0 = in1, in1 = x; + odd = !odd; + } + + out0 = d->target; + out1 = gen_reg_rtx (d->vmode); + if (odd) + x = out0, out0 = out1, out1 = x; + + emit_insn (gen (out0, in0, in1, out1)); + return true; +} + +/* Recognize patterns for the VEXT insns. */ + +static bool +arm_evpc_neon_vext (struct expand_vec_perm_d *d) +{ + unsigned int i, nelt = d->nelt; + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx offset; + + unsigned int location; + + unsigned int next = d->perm[0] + 1; + + /* TODO: Handle GCC's numbering of elements for big-endian. */ + if (BYTES_BIG_ENDIAN) + return false; + + /* Check if the extracted indexes are increasing by one. */ + for (i = 1; i < nelt; next++, i++) + { + /* If we hit the most significant element of the 2nd vector in + the previous iteration, no need to test further. */ + if (next == 2 * nelt) + return false; + + /* If we are operating on only one vector: it could be a + rotation. If there are only two elements of size < 64, let + arm_evpc_neon_vrev catch it. */ + if (d->one_vector_p && (next == nelt)) + { + if ((nelt == 2) && (d->vmode != V2DImode)) + return false; + else + next = 0; + } + + if (d->perm[i] != next) + return false; + } + + location = d->perm[0]; + + switch (d->vmode) + { + case V16QImode: gen = gen_neon_vextv16qi; break; + case V8QImode: gen = gen_neon_vextv8qi; break; + case V4HImode: gen = gen_neon_vextv4hi; break; + case V8HImode: gen = gen_neon_vextv8hi; break; + case V2SImode: gen = gen_neon_vextv2si; break; + case V4SImode: gen = gen_neon_vextv4si; break; + case V2SFmode: gen = gen_neon_vextv2sf; break; + case V4SFmode: gen = gen_neon_vextv4sf; break; + case V2DImode: gen = gen_neon_vextv2di; break; + default: + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + offset = GEN_INT (location); + emit_insn (gen (d->target, d->op0, d->op1, offset)); + return true; +} + +/* The NEON VTBL instruction is a fully variable permuation that's even + stronger than what we expose via VEC_PERM_EXPR. What it doesn't do + is mask the index operand as VEC_PERM_EXPR requires. Therefore we + can do slightly better by expanding this as a constant where we don't + have to apply a mask. */ + +static bool +arm_evpc_neon_vtbl (struct expand_vec_perm_d *d) +{ + rtx rperm[MAX_VECT_LEN], sel; + enum machine_mode vmode = d->vmode; + unsigned int i, nelt = d->nelt; + + /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's + numbering of elements for big-endian, we must reverse the order. */ + if (BYTES_BIG_ENDIAN) + return false; + + if (d->testing_p) + return true; + + /* Generic code will try constant permutation twice. Once with the + original mode and again with the elements lowered to QImode. + So wait and don't do the selector expansion ourselves. */ + if (vmode != V8QImode && vmode != V16QImode) + return false; + + for (i = 0; i < nelt; ++i) + rperm[i] = GEN_INT (d->perm[i]); + sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); + sel = force_reg (vmode, sel); + + arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel); + return true; +} + +static bool +arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) +{ + /* Check if the input mask matches vext before reordering the + operands. */ + if (TARGET_NEON) + if (arm_evpc_neon_vext (d)) + return true; + + /* The pattern matching functions above are written to look for a small + number to begin the sequence (0, 1, N/2). If we begin with an index + from the second operand, we can swap the operands. */ + if (d->perm[0] >= d->nelt) + { + unsigned i, nelt = d->nelt; + rtx x; + + for (i = 0; i < nelt; ++i) + d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1); + + x = d->op0; + d->op0 = d->op1; + d->op1 = x; + } + + if (TARGET_NEON) + { + if (arm_evpc_neon_vuzp (d)) + return true; + if (arm_evpc_neon_vzip (d)) + return true; + if (arm_evpc_neon_vrev (d)) + return true; + if (arm_evpc_neon_vtrn (d)) + return true; + return arm_evpc_neon_vtbl (d); + } + return false; +} + +/* Expand a vec_perm_const pattern. */ + +bool +arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) +{ + struct expand_vec_perm_d d; + int i, nelt, which; + + d.target = target; + d.op0 = op0; + d.op1 = op1; + + d.vmode = GET_MODE (target); + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = false; + + for (i = which = 0; i < nelt; ++i) + { + rtx e = XVECEXP (sel, 0, i); + int ei = INTVAL (e) & (2 * nelt - 1); + which |= (ei < nelt ? 1 : 2); + d.perm[i] = ei; + } + + switch (which) + { + default: + gcc_unreachable(); + + case 3: + d.one_vector_p = false; + if (!rtx_equal_p (op0, op1)) + break; + + /* The elements of PERM do not suggest that only the first operand + is used, but both operands are identical. Allow easier matching + of the permutation by folding the permutation into the single + input vector. */ + /* FALLTHRU */ + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] &= nelt - 1; + d.op0 = op1; + d.one_vector_p = true; + break; + + case 1: + d.op1 = op0; + d.one_vector_p = true; + break; + } + + return arm_expand_vec_perm_const_1 (&d); +} + +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ + +static bool +arm_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel) +{ + struct expand_vec_perm_d d; + unsigned int i, nelt, which; + bool ret; + + d.vmode = vmode; + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = true; + memcpy (d.perm, sel, nelt); + + /* Categorize the set of elements in the selector. */ + for (i = which = 0; i < nelt; ++i) + { + unsigned char e = d.perm[i]; + gcc_assert (e < 2 * nelt); + which |= (e < nelt ? 1 : 2); + } + + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + + /* Check whether the mask can be applied to the vector type. */ + d.one_vector_p = (which != 3); + + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ret = arm_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; +} + +bool +arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code) +{ + /* If we are soft float and we do not have ldrd + then all auto increment forms are ok. */ + if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4)) + return true; + + switch (code) + { + /* Post increment and Pre Decrement are supported for all + instruction forms except for vector forms. */ + case ARM_POST_INC: + case ARM_PRE_DEC: + if (VECTOR_MODE_P (mode)) + { + if (code != ARM_PRE_DEC) + return true; + else + return false; + } + + return true; + + case ARM_POST_DEC: + case ARM_PRE_INC: + /* Without LDRD and mode size greater than + word size, there is no point in auto-incrementing + because ldm and stm will not have these forms. */ + if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4) + return false; + + /* Vector and floating point modes do not support + these auto increment forms. */ + if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode)) + return false; + + return true; + + default: + return false; + + } + + return false; +} + +/* The default expansion of general 64-bit shifts in core-regs is suboptimal, + on ARM, since we know that shifts by negative amounts are no-ops. + Additionally, the default expansion code is not available or suitable + for post-reload insn splits (this can occur when the register allocator + chooses not to do a shift in NEON). + + This function is used in both initial expand and post-reload splits, and + handles all kinds of 64-bit shifts. + + Input requirements: + - It is safe for the input and output to be the same register, but + early-clobber rules apply for the shift amount and scratch registers. + - Shift by register requires both scratch registers. In all other cases + the scratch registers may be NULL. + - Ashiftrt by a register also clobbers the CC register. */ +void +arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in, + rtx amount, rtx scratch1, rtx scratch2) +{ + rtx out_high = gen_highpart (SImode, out); + rtx out_low = gen_lowpart (SImode, out); + rtx in_high = gen_highpart (SImode, in); + rtx in_low = gen_lowpart (SImode, in); + + /* Terminology: + in = the register pair containing the input value. + out = the destination register pair. + up = the high- or low-part of each pair. + down = the opposite part to "up". + In a shift, we can consider bits to shift from "up"-stream to + "down"-stream, so in a left-shift "up" is the low-part and "down" + is the high-part of each register pair. */ + + rtx out_up = code == ASHIFT ? out_low : out_high; + rtx out_down = code == ASHIFT ? out_high : out_low; + rtx in_up = code == ASHIFT ? in_low : in_high; + rtx in_down = code == ASHIFT ? in_high : in_low; + + gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT); + gcc_assert (out + && (REG_P (out) || GET_CODE (out) == SUBREG) + && GET_MODE (out) == DImode); + gcc_assert (in + && (REG_P (in) || GET_CODE (in) == SUBREG) + && GET_MODE (in) == DImode); + gcc_assert (amount + && (((REG_P (amount) || GET_CODE (amount) == SUBREG) + && GET_MODE (amount) == SImode) + || CONST_INT_P (amount))); + gcc_assert (scratch1 == NULL + || (GET_CODE (scratch1) == SCRATCH) + || (GET_MODE (scratch1) == SImode + && REG_P (scratch1))); + gcc_assert (scratch2 == NULL + || (GET_CODE (scratch2) == SCRATCH) + || (GET_MODE (scratch2) == SImode + && REG_P (scratch2))); + gcc_assert (!REG_P (out) || !REG_P (amount) + || !HARD_REGISTER_P (out) + || (REGNO (out) != REGNO (amount) + && REGNO (out) + 1 != REGNO (amount))); + + /* Macros to make following code more readable. */ + #define SUB_32(DEST,SRC) \ + gen_addsi3 ((DEST), (SRC), GEN_INT (-32)) + #define RSB_32(DEST,SRC) \ + gen_subsi3 ((DEST), GEN_INT (32), (SRC)) + #define SUB_S_32(DEST,SRC) \ + gen_addsi3_compare0 ((DEST), (SRC), \ + GEN_INT (-32)) + #define SET(DEST,SRC) \ + gen_rtx_SET (SImode, (DEST), (SRC)) + #define SHIFT(CODE,SRC,AMOUNT) \ + gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT)) + #define LSHIFT(CODE,SRC,AMOUNT) \ + gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \ + SImode, (SRC), (AMOUNT)) + #define REV_LSHIFT(CODE,SRC,AMOUNT) \ + gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \ + SImode, (SRC), (AMOUNT)) + #define ORR(A,B) \ + gen_rtx_IOR (SImode, (A), (B)) + #define BRANCH(COND,LABEL) \ + gen_arm_cond_branch ((LABEL), \ + gen_rtx_ ## COND (CCmode, cc_reg, \ + const0_rtx), \ + cc_reg) + + /* Shifts by register and shifts by constant are handled separately. */ + if (CONST_INT_P (amount)) + { + /* We have a shift-by-constant. */ + + /* First, handle out-of-range shift amounts. + In both cases we try to match the result an ARM instruction in a + shift-by-register would give. This helps reduce execution + differences between optimization levels, but it won't stop other + parts of the compiler doing different things. This is "undefined + behaviour, in any case. */ + if (INTVAL (amount) <= 0) + emit_insn (gen_movdi (out, in)); + else if (INTVAL (amount) >= 64) + { + if (code == ASHIFTRT) + { + rtx const31_rtx = GEN_INT (31); + emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx))); + emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx))); + } + else + emit_insn (gen_movdi (out, const0_rtx)); + } + + /* Now handle valid shifts. */ + else if (INTVAL (amount) < 32) + { + /* Shifts by a constant less than 32. */ + rtx reverse_amount = GEN_INT (32 - INTVAL (amount)); + + emit_insn (SET (out_down, LSHIFT (code, in_down, amount))); + emit_insn (SET (out_down, + ORR (REV_LSHIFT (code, in_up, reverse_amount), + out_down))); + emit_insn (SET (out_up, SHIFT (code, in_up, amount))); + } + else + { + /* Shifts by a constant greater than 31. */ + rtx adj_amount = GEN_INT (INTVAL (amount) - 32); + + emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount))); + if (code == ASHIFTRT) + emit_insn (gen_ashrsi3 (out_up, in_up, + GEN_INT (31))); + else + emit_insn (SET (out_up, const0_rtx)); + } + } + else + { + /* We have a shift-by-register. */ + rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM); + + /* This alternative requires the scratch registers. */ + gcc_assert (scratch1 && REG_P (scratch1)); + gcc_assert (scratch2 && REG_P (scratch2)); + + /* We will need the values "amount-32" and "32-amount" later. + Swapping them around now allows the later code to be more general. */ + switch (code) + { + case ASHIFT: + emit_insn (SUB_32 (scratch1, amount)); + emit_insn (RSB_32 (scratch2, amount)); + break; + case ASHIFTRT: + emit_insn (RSB_32 (scratch1, amount)); + /* Also set CC = amount > 32. */ + emit_insn (SUB_S_32 (scratch2, amount)); + break; + case LSHIFTRT: + emit_insn (RSB_32 (scratch1, amount)); + emit_insn (SUB_32 (scratch2, amount)); + break; + default: + gcc_unreachable (); + } + + /* Emit code like this: + + arithmetic-left: + out_down = in_down << amount; + out_down = (in_up << (amount - 32)) | out_down; + out_down = ((unsigned)in_up >> (32 - amount)) | out_down; + out_up = in_up << amount; + + arithmetic-right: + out_down = in_down >> amount; + out_down = (in_up << (32 - amount)) | out_down; + if (amount < 32) + out_down = ((signed)in_up >> (amount - 32)) | out_down; + out_up = in_up << amount; + + logical-right: + out_down = in_down >> amount; + out_down = (in_up << (32 - amount)) | out_down; + if (amount < 32) + out_down = ((unsigned)in_up >> (amount - 32)) | out_down; + out_up = in_up << amount; + + The ARM and Thumb2 variants are the same but implemented slightly + differently. If this were only called during expand we could just + use the Thumb2 case and let combine do the right thing, but this + can also be called from post-reload splitters. */ + + emit_insn (SET (out_down, LSHIFT (code, in_down, amount))); + + if (!TARGET_THUMB2) + { + /* Emit code for ARM mode. */ + emit_insn (SET (out_down, + ORR (SHIFT (ASHIFT, in_up, scratch1), out_down))); + if (code == ASHIFTRT) + { + rtx done_label = gen_label_rtx (); + emit_jump_insn (BRANCH (LT, done_label)); + emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2), + out_down))); + emit_label (done_label); + } + else + emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2), + out_down))); + } + else + { + /* Emit code for Thumb2 mode. + Thumb2 can't do shift and or in one insn. */ + emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1))); + emit_insn (gen_iorsi3 (out_down, out_down, scratch1)); + + if (code == ASHIFTRT) + { + rtx done_label = gen_label_rtx (); + emit_jump_insn (BRANCH (LT, done_label)); + emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2))); + emit_insn (SET (out_down, ORR (out_down, scratch2))); + emit_label (done_label); + } + else + { + emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2))); + emit_insn (gen_iorsi3 (out_down, out_down, scratch2)); + } + } + + emit_insn (SET (out_up, SHIFT (code, in_up, amount))); + } + + #undef SUB_32 + #undef RSB_32 + #undef SUB_S_32 + #undef SET + #undef SHIFT + #undef LSHIFT + #undef REV_LSHIFT + #undef ORR + #undef BRANCH +} + + +/* Returns true if a valid comparison operation and makes + the operands in a form that is valid. */ +bool +arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) +{ + enum rtx_code code = GET_CODE (*comparison); + int code_int; + enum machine_mode mode = (GET_MODE (*op1) == VOIDmode) + ? GET_MODE (*op2) : GET_MODE (*op1); + + gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode); + + if (code == UNEQ || code == LTGT) + return false; + + code_int = (int)code; + arm_canonicalize_comparison (&code_int, op1, op2, 0); + PUT_CODE (*comparison, (enum rtx_code)code_int); + + switch (mode) + { + case SImode: + if (!arm_add_operand (*op1, mode)) + *op1 = force_reg (mode, *op1); + if (!arm_add_operand (*op2, mode)) + *op2 = force_reg (mode, *op2); + return true; + + case DImode: + if (!cmpdi_operand (*op1, mode)) + *op1 = force_reg (mode, *op1); + if (!cmpdi_operand (*op2, mode)) + *op2 = force_reg (mode, *op2); + return true; + + case SFmode: + case DFmode: + if (!arm_float_compare_operand (*op1, mode)) + *op1 = force_reg (mode, *op1); + if (!arm_float_compare_operand (*op2, mode)) + *op2 = force_reg (mode, *op2); + return true; + default: + break; + } + + return false; + +} + +/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ + +static unsigned HOST_WIDE_INT +arm_asan_shadow_offset (void) +{ + return (unsigned HOST_WIDE_INT) 1 << 29; +} + +#include "gt-arm.h" diff --git a/gcc-4.9/gcc/config/arm/arm.h b/gcc-4.9/gcc/config/arm/arm.h new file mode 100644 index 000000000..7ca47a7ec --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm.h @@ -0,0 +1,2398 @@ +/* Definitions of target machine for GNU compiler, for ARM. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) + and Martin Simmons (@harleqn.co.uk). + More major hacks by Richard Earnshaw (rearnsha@arm.com) + Minor hacks by Nick Clifton (nickc@cygnus.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_ARM_H +#define GCC_ARM_H + +/* We can't use enum machine_mode inside a generator file because it + hasn't been created yet; we shouldn't be using any code that + needs the real definition though, so this ought to be safe. */ +#ifdef GENERATOR_FILE +#define MACHMODE int +#else +#include "insn-modes.h" +#define MACHMODE enum machine_mode +#endif + +#include "config/vxworks-dummy.h" + +/* The architecture define. */ +extern char arm_arch_name[]; + +/* Target CPU builtins. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + if (TARGET_DSP_MULTIPLY) \ + builtin_define ("__ARM_FEATURE_DSP"); \ + if (TARGET_ARM_QBIT) \ + builtin_define ("__ARM_FEATURE_QBIT"); \ + if (TARGET_ARM_SAT) \ + builtin_define ("__ARM_FEATURE_SAT"); \ + if (TARGET_CRYPTO) \ + builtin_define ("__ARM_FEATURE_CRYPTO"); \ + if (unaligned_access) \ + builtin_define ("__ARM_FEATURE_UNALIGNED"); \ + if (TARGET_CRC32) \ + builtin_define ("__ARM_FEATURE_CRC32"); \ + if (TARGET_32BIT) \ + builtin_define ("__ARM_32BIT_STATE"); \ + if (TARGET_ARM_FEATURE_LDREX) \ + builtin_define_with_int_value ( \ + "__ARM_FEATURE_LDREX", TARGET_ARM_FEATURE_LDREX); \ + if ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB) \ + || TARGET_ARM_ARCH_ISA_THUMB >=2) \ + builtin_define ("__ARM_FEATURE_CLZ"); \ + if (TARGET_INT_SIMD) \ + builtin_define ("__ARM_FEATURE_SIMD32"); \ + \ + builtin_define_with_int_value ( \ + "__ARM_SIZEOF_MINIMAL_ENUM", \ + flag_short_enums ? 1 : 4); \ + builtin_define_with_int_value ( \ + "__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE); \ + if (TARGET_ARM_ARCH_PROFILE) \ + builtin_define_with_int_value ( \ + "__ARM_ARCH_PROFILE", TARGET_ARM_ARCH_PROFILE); \ + \ + /* Define __arm__ even when in thumb mode, for \ + consistency with armcc. */ \ + builtin_define ("__arm__"); \ + if (TARGET_ARM_ARCH) \ + builtin_define_with_int_value ( \ + "__ARM_ARCH", TARGET_ARM_ARCH); \ + if (arm_arch_notm) \ + builtin_define ("__ARM_ARCH_ISA_ARM"); \ + builtin_define ("__APCS_32__"); \ + if (TARGET_THUMB) \ + builtin_define ("__thumb__"); \ + if (TARGET_THUMB2) \ + builtin_define ("__thumb2__"); \ + if (TARGET_ARM_ARCH_ISA_THUMB) \ + builtin_define_with_int_value ( \ + "__ARM_ARCH_ISA_THUMB", \ + TARGET_ARM_ARCH_ISA_THUMB); \ + \ + if (TARGET_BIG_END) \ + { \ + builtin_define ("__ARMEB__"); \ + builtin_define ("__ARM_BIG_ENDIAN"); \ + if (TARGET_THUMB) \ + builtin_define ("__THUMBEB__"); \ + if (TARGET_LITTLE_WORDS) \ + builtin_define ("__ARMWEL__"); \ + } \ + else \ + { \ + builtin_define ("__ARMEL__"); \ + if (TARGET_THUMB) \ + builtin_define ("__THUMBEL__"); \ + } \ + \ + if (TARGET_SOFT_FLOAT) \ + builtin_define ("__SOFTFP__"); \ + \ + if (TARGET_VFP) \ + builtin_define ("__VFP_FP__"); \ + \ + if (TARGET_ARM_FP) \ + builtin_define_with_int_value ( \ + "__ARM_FP", TARGET_ARM_FP); \ + if (arm_fp16_format == ARM_FP16_FORMAT_IEEE) \ + builtin_define ("__ARM_FP16_FORMAT_IEEE"); \ + if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) \ + builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE"); \ + if (TARGET_FMA) \ + builtin_define ("__ARM_FEATURE_FMA"); \ + \ + if (TARGET_NEON) \ + { \ + builtin_define ("__ARM_NEON__"); \ + builtin_define ("__ARM_NEON"); \ + } \ + if (TARGET_NEON_FP) \ + builtin_define_with_int_value ( \ + "__ARM_NEON_FP", TARGET_NEON_FP); \ + \ + /* Add a define for interworking. \ + Needed when building libgcc.a. */ \ + if (arm_cpp_interwork) \ + builtin_define ("__THUMB_INTERWORK__"); \ + \ + builtin_assert ("cpu=arm"); \ + builtin_assert ("machine=arm"); \ + \ + builtin_define (arm_arch_name); \ + if (arm_arch_xscale) \ + builtin_define ("__XSCALE__"); \ + if (arm_arch_iwmmxt) \ + { \ + builtin_define ("__IWMMXT__"); \ + builtin_define ("__ARM_WMMX"); \ + } \ + if (arm_arch_iwmmxt2) \ + builtin_define ("__IWMMXT2__"); \ + if (TARGET_AAPCS_BASED) \ + { \ + if (arm_pcs_default == ARM_PCS_AAPCS_VFP) \ + builtin_define ("__ARM_PCS_VFP"); \ + else if (arm_pcs_default == ARM_PCS_AAPCS) \ + builtin_define ("__ARM_PCS"); \ + builtin_define ("__ARM_EABI__"); \ + } \ + if (TARGET_IDIV) \ + builtin_define ("__ARM_ARCH_EXT_IDIV__"); \ + } while (0) + +#include "config/arm/arm-opts.h" + +enum target_cpus +{ +#define ARM_CORE(NAME, INTERNAL_IDENT, IDENT, ARCH, FLAGS, COSTS) \ + TARGET_CPU_##INTERNAL_IDENT, +#include "arm-cores.def" +#undef ARM_CORE + TARGET_CPU_generic +}; + +/* The processor for which instructions should be scheduled. */ +extern enum processor_type arm_tune; + +typedef enum arm_cond_code +{ + ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC, + ARM_HI, ARM_LS, ARM_GE, ARM_LT, ARM_GT, ARM_LE, ARM_AL, ARM_NV +} +arm_cc; + +extern arm_cc arm_current_cc; + +#define ARM_INVERSE_CONDITION_CODE(X) ((arm_cc) (((int)X) ^ 1)) + +/* The maximum number of instructions that is beneficial to + conditionally execute. */ +#undef MAX_CONDITIONAL_EXECUTE +#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute () + +extern int arm_target_label; +extern int arm_ccfsm_state; +extern GTY(()) rtx arm_target_insn; +/* The label of the current constant pool. */ +extern rtx pool_vector_label; +/* Set to 1 when a return insn is output, this means that the epilogue + is not needed. */ +extern int return_used_this_function; +/* Callback to output language specific object attributes. */ +extern void (*arm_lang_output_object_attributes_hook)(void); + +/* Just in case configure has failed to define anything. */ +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT TARGET_CPU_generic +#endif + + +#undef CPP_SPEC +#define CPP_SPEC "%(subtarget_cpp_spec) \ +%{mfloat-abi=soft:%{mfloat-abi=hard: \ + %e-mfloat-abi=soft and -mfloat-abi=hard may not be used together}} \ +%{mbig-endian:%{mlittle-endian: \ + %e-mbig-endian and -mlittle-endian may not be used together}}" + +#ifndef CC1_SPEC +#define CC1_SPEC "" +#endif + +/* This macro defines names of additional specifications to put in the specs + that can be used in various specifications like CC1_SPEC. Its definition + is an initializer with a subgrouping for each command option. + + Each subgrouping contains a string constant, that defines the + specification name, and a string constant that used by the GCC driver + program. + + Do not define this macro if it does not need to do anything. */ +#define EXTRA_SPECS \ + { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }, \ + { "asm_cpu_spec", ASM_CPU_SPEC }, \ + SUBTARGET_EXTRA_SPECS + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "" +#endif + +/* Run-time Target Specification. */ +#define TARGET_SOFT_FLOAT (arm_float_abi == ARM_FLOAT_ABI_SOFT) +/* Use hardware floating point instructions. */ +#define TARGET_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT) +/* Use hardware floating point calling convention. */ +#define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD) +#define TARGET_VFP (arm_fpu_desc->model == ARM_FP_MODEL_VFP) +#define TARGET_IWMMXT (arm_arch_iwmmxt) +#define TARGET_IWMMXT2 (arm_arch_iwmmxt2) +#define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT) +#define TARGET_REALLY_IWMMXT2 (TARGET_IWMMXT2 && TARGET_32BIT) +#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT) +#define TARGET_ARM (! TARGET_THUMB) +#define TARGET_EITHER 1 /* (TARGET_ARM | TARGET_THUMB) */ +#define TARGET_BACKTRACE (leaf_function_p () \ + ? TARGET_TPCS_LEAF_FRAME \ + : TARGET_TPCS_FRAME) +#define TARGET_AAPCS_BASED \ + (arm_abi != ARM_ABI_APCS && arm_abi != ARM_ABI_ATPCS) + +#define TARGET_HARD_TP (target_thread_pointer == TP_CP15) +#define TARGET_SOFT_TP (target_thread_pointer == TP_SOFT) +#define TARGET_GNU2_TLS (target_tls_dialect == TLS_GNU2) + +/* Only 16-bit thumb code. */ +#define TARGET_THUMB1 (TARGET_THUMB && !arm_arch_thumb2) +/* Arm or Thumb-2 32-bit code. */ +#define TARGET_32BIT (TARGET_ARM || arm_arch_thumb2) +/* 32-bit Thumb-2 code. */ +#define TARGET_THUMB2 (TARGET_THUMB && arm_arch_thumb2) +/* Thumb-1 only. */ +#define TARGET_THUMB1_ONLY (TARGET_THUMB1 && !arm_arch_notm) + +#define TARGET_LDRD (arm_arch5e && ARM_DOUBLEWORD_ALIGN \ + && !TARGET_THUMB1) + +#define TARGET_CRC32 (arm_arch_crc) + +/* The following two macros concern the ability to execute coprocessor + instructions for VFPv3 or NEON. TARGET_VFP3/TARGET_VFPD32 are currently + only ever tested when we know we are generating for VFP hardware; we need + to be more careful with TARGET_NEON as noted below. */ + +/* FPU is has the full VFPv3/NEON register file of 32 D registers. */ +#define TARGET_VFPD32 (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_D32) + +/* FPU supports VFPv3 instructions. */ +#define TARGET_VFP3 (TARGET_VFP && arm_fpu_desc->rev >= 3) + +/* FPU only supports VFP single-precision instructions. */ +#define TARGET_VFP_SINGLE (TARGET_VFP && arm_fpu_desc->regs == VFP_REG_SINGLE) + +/* FPU supports VFP double-precision instructions. */ +#define TARGET_VFP_DOUBLE (TARGET_VFP && arm_fpu_desc->regs != VFP_REG_SINGLE) + +/* FPU supports half-precision floating-point with NEON element load/store. */ +#define TARGET_NEON_FP16 \ + (TARGET_VFP && arm_fpu_desc->neon && arm_fpu_desc->fp16) + +/* FPU supports VFP half-precision floating-point. */ +#define TARGET_FP16 (TARGET_VFP && arm_fpu_desc->fp16) + +/* FPU supports fused-multiply-add operations. */ +#define TARGET_FMA (TARGET_VFP && arm_fpu_desc->rev >= 4) + +/* FPU is ARMv8 compatible. */ +#define TARGET_FPU_ARMV8 (TARGET_VFP && arm_fpu_desc->rev >= 8) + +/* FPU supports Crypto extensions. */ +#define TARGET_CRYPTO (TARGET_VFP && arm_fpu_desc->crypto) + +/* FPU supports Neon instructions. The setting of this macro gets + revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT + and TARGET_HARD_FLOAT to ensure that NEON instructions are + available. */ +#define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ + && TARGET_VFP && arm_fpu_desc->neon) + +/* Q-bit is present. */ +#define TARGET_ARM_QBIT \ + (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7)) +/* Saturation operation, e.g. SSAT. */ +#define TARGET_ARM_SAT \ + (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7)) +/* "DSP" multiply instructions, eg. SMULxy. */ +#define TARGET_DSP_MULTIPLY \ + (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7em)) +/* Integer SIMD instructions, and extend-accumulate instructions. */ +#define TARGET_INT_SIMD \ + (TARGET_32BIT && arm_arch6 && (arm_arch_notm || arm_arch7em)) + +/* Should MOVW/MOVT be used in preference to a constant pool. */ +#define TARGET_USE_MOVT \ + (arm_arch_thumb2 \ + && (arm_disable_literal_pool \ + || (!optimize_size && !current_tune->prefer_constant_pool))) + +/* We could use unified syntax for arm mode, but for now we just use it + for Thumb-2. */ +#define TARGET_UNIFIED_ASM TARGET_THUMB2 + +/* Nonzero if this chip provides the DMB instruction. */ +#define TARGET_HAVE_DMB (arm_arch6m || arm_arch7) + +/* Nonzero if this chip implements a memory barrier via CP15. */ +#define TARGET_HAVE_DMB_MCR (arm_arch6 && ! TARGET_HAVE_DMB \ + && ! TARGET_THUMB1) + +/* Nonzero if this chip implements a memory barrier instruction. */ +#define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) + +/* Nonzero if this chip supports ldrex and strex */ +#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) + +/* Nonzero if this chip supports ldrex{bh} and strex{bh}. */ +#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) + +/* Nonzero if this chip supports ldrexd and strexd. */ +#define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) || arm_arch7) \ + && arm_arch_notm) + +/* Nonzero if this chip supports load-acquire and store-release. */ +#define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8) + +/* Nonzero if integer division instructions supported. */ +#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ + || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) + +/* Should NEON be used for 64-bits bitops. */ +#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits) + +/* True iff the full BPABI is being used. If TARGET_BPABI is true, + then TARGET_AAPCS_BASED must be true -- but the converse does not + hold. TARGET_BPABI implies the use of the BPABI runtime library, + etc., in addition to just the AAPCS calling conventions. */ +#ifndef TARGET_BPABI +#define TARGET_BPABI false +#endif + +/* Support for a compile-time default CPU, et cetera. The rules are: + --with-arch is ignored if -march or -mcpu are specified. + --with-cpu is ignored if -march or -mcpu are specified, and is overridden + by --with-arch. + --with-tune is ignored if -mtune or -mcpu are specified (but not affected + by -march). + --with-float is ignored if -mfloat-abi is specified. + --with-fpu is ignored if -mfpu is specified. + --with-abi is ignored if -mabi is specified. + --with-tls is ignored if -mtls-dialect is specified. */ +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \ + {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ + {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}" }, \ + {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }, \ + {"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \ + {"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \ + {"mode", "%{!marm:%{!mthumb:-m%(VALUE)}}"}, \ + {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"}, + +/* Which floating point model to use. */ +enum arm_fp_model +{ + ARM_FP_MODEL_UNKNOWN, + /* VFP floating point model. */ + ARM_FP_MODEL_VFP +}; + +enum vfp_reg_type +{ + VFP_NONE = 0, + VFP_REG_D16, + VFP_REG_D32, + VFP_REG_SINGLE +}; + +extern const struct arm_fpu_desc +{ + const char *name; + enum arm_fp_model model; + int rev; + enum vfp_reg_type regs; + int neon; + int fp16; + int crypto; +} *arm_fpu_desc; + +/* Which floating point hardware to schedule for. */ +extern int arm_fpu_attr; + +#ifndef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT +#endif + +#define LARGEST_EXPONENT_IS_NORMAL(bits) \ + ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + +#ifndef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_APCS +#endif + +/* Map each of the micro-architecture variants to their corresponding + major architecture revision. */ + +enum base_architecture +{ + BASE_ARCH_0 = 0, + BASE_ARCH_2 = 2, + BASE_ARCH_3 = 3, + BASE_ARCH_3M = 3, + BASE_ARCH_4 = 4, + BASE_ARCH_4T = 4, + BASE_ARCH_5 = 5, + BASE_ARCH_5E = 5, + BASE_ARCH_5T = 5, + BASE_ARCH_5TE = 5, + BASE_ARCH_5TEJ = 5, + BASE_ARCH_6 = 6, + BASE_ARCH_6J = 6, + BASE_ARCH_6ZK = 6, + BASE_ARCH_6K = 6, + BASE_ARCH_6T2 = 6, + BASE_ARCH_6M = 6, + BASE_ARCH_6Z = 6, + BASE_ARCH_7 = 7, + BASE_ARCH_7A = 7, + BASE_ARCH_7R = 7, + BASE_ARCH_7M = 7, + BASE_ARCH_7EM = 7, + BASE_ARCH_8A = 8 +}; + +/* The major revision number of the ARM Architecture implemented by the target. */ +extern enum base_architecture arm_base_arch; + +/* Nonzero if this chip supports the ARM Architecture 3M extensions. */ +extern int arm_arch3m; + +/* Nonzero if this chip supports the ARM Architecture 4 extensions. */ +extern int arm_arch4; + +/* Nonzero if this chip supports the ARM Architecture 4T extensions. */ +extern int arm_arch4t; + +/* Nonzero if this chip supports the ARM Architecture 5 extensions. */ +extern int arm_arch5; + +/* Nonzero if this chip supports the ARM Architecture 5E extensions. */ +extern int arm_arch5e; + +/* Nonzero if this chip supports the ARM Architecture 6 extensions. */ +extern int arm_arch6; + +/* Nonzero if this chip supports the ARM Architecture 6k extensions. */ +extern int arm_arch6k; + +/* Nonzero if instructions present in ARMv6-M can be used. */ +extern int arm_arch6m; + +/* Nonzero if this chip supports the ARM Architecture 7 extensions. */ +extern int arm_arch7; + +/* Nonzero if instructions not present in the 'M' profile can be used. */ +extern int arm_arch_notm; + +/* Nonzero if instructions present in ARMv7E-M can be used. */ +extern int arm_arch7em; + +/* Nonzero if this chip supports the ARM Architecture 8 extensions. */ +extern int arm_arch8; + +/* Nonzero if this chip can benefit from load scheduling. */ +extern int arm_ld_sched; + +/* Nonzero if generating Thumb code, either Thumb-1 or Thumb-2. */ +extern int thumb_code; + +/* Nonzero if generating Thumb-1 code. */ +extern int thumb1_code; + +/* Nonzero if this chip is a StrongARM. */ +extern int arm_tune_strongarm; + +/* Nonzero if this chip supports Intel XScale with Wireless MMX technology. */ +extern int arm_arch_iwmmxt; + +/* Nonzero if this chip supports Intel Wireless MMX2 technology. */ +extern int arm_arch_iwmmxt2; + +/* Nonzero if this chip is an XScale. */ +extern int arm_arch_xscale; + +/* Nonzero if tuning for XScale. */ +extern int arm_tune_xscale; + +/* Nonzero if tuning for stores via the write buffer. */ +extern int arm_tune_wbuf; + +/* Nonzero if tuning for Cortex-A9. */ +extern int arm_tune_cortex_a9; + +/* Nonzero if we should define __THUMB_INTERWORK__ in the + preprocessor. + XXX This is a bit of a hack, it's intended to help work around + problems in GLD which doesn't understand that armv5t code is + interworking clean. */ +extern int arm_cpp_interwork; + +/* Nonzero if chip supports Thumb 2. */ +extern int arm_arch_thumb2; + +/* Nonzero if chip supports integer division instruction in ARM mode. */ +extern int arm_arch_arm_hwdiv; + +/* Nonzero if chip supports integer division instruction in Thumb mode. */ +extern int arm_arch_thumb_hwdiv; + +/* Nonzero if we should use Neon to handle 64-bits operations rather + than core registers. */ +extern int prefer_neon_for_64bits; + +/* Nonzero if we shouldn't use literal pools. */ +#ifndef USED_FOR_TARGET +extern bool arm_disable_literal_pool; +#endif + +/* Nonzero if chip supports the ARMv8 CRC instructions. */ +extern int arm_arch_crc; + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) +#endif + +/* Nonzero if PIC code requires explicit qualifiers to generate + PLT and GOT relocs rather than the assembler doing so implicitly. + Subtargets can override these if required. */ +#ifndef NEED_GOT_RELOC +#define NEED_GOT_RELOC 0 +#endif +#ifndef NEED_PLT_RELOC +#define NEED_PLT_RELOC 0 +#endif + +#ifndef TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE +#define TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE 1 +#endif + +/* Nonzero if we need to refer to the GOT with a PC-relative + offset. In other words, generate + + .word _GLOBAL_OFFSET_TABLE_ - [. - (.Lxx + 8)] + + rather than + + .word _GLOBAL_OFFSET_TABLE_ - (.Lxx + 8) + + The default is true, which matches NetBSD. Subtargets can + override this if required. */ +#ifndef GOT_PCREL +#define GOT_PCREL 1 +#endif + +/* Target machine storage Layout. */ + + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ + +/* It is far faster to zero extend chars than to sign extend them */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + { \ + if (MODE == QImode) \ + UNSIGNEDP = 1; \ + else if (MODE == HImode) \ + UNSIGNEDP = 1; \ + (MODE) = SImode; \ + } + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + Most ARM processors are run in little endian mode, so that is the default. + If you want to have it run-time selectable, change the definition in a + cover file to be TARGET_BIG_ENDIAN. */ +#define BYTES_BIG_ENDIAN (TARGET_BIG_END != 0) + +/* Define this if most significant word of a multiword number is the lowest + numbered. + This is always false, even when in big-endian mode. */ +#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN && ! TARGET_LITTLE_WORDS) + +#define UNITS_PER_WORD 4 + +/* True if natural alignment is used for doubleword types. */ +#define ARM_DOUBLEWORD_ALIGN TARGET_AAPCS_BASED + +#define DOUBLEWORD_ALIGNMENT 64 + +#define PARM_BOUNDARY 32 + +#define STACK_BOUNDARY (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32) + +#define PREFERRED_STACK_BOUNDARY \ + (arm_abi == ARM_ABI_ATPCS ? 64 : STACK_BOUNDARY) + +#define FUNCTION_BOUNDARY ((TARGET_THUMB && optimize_size) ? 16 : 32) + +/* The lowest bit is used to indicate Thumb-mode functions, so the + vbit must go into the delta field of pointers to member + functions. */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta + +#define EMPTY_FIELD_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT (ARM_DOUBLEWORD_ALIGN ? DOUBLEWORD_ALIGNMENT : 32) + +#define MALLOC_ABI_ALIGNMENT BIGGEST_ALIGNMENT + +/* XXX Blah -- this macro is used directly by libobjc. Since it + supports no vector modes, cut out the complexity and fall back + on BIGGEST_FIELD_ALIGNMENT. */ +#ifdef IN_TARGET_LIBS +#define BIGGEST_FIELD_ALIGNMENT 64 +#endif + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT_FACTOR (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2) + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && !optimize_size \ + && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR) \ + ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN)) + +/* Align definitions of arrays, unions and structures so that + initializations and copies can be made more efficient. This is not + ABI-changing, so it only affects places where we can see the + definition. Increasing the alignment tends to introduce padding, + so don't do this when optimizing for size/conserving stack space. */ +#define ARM_EXPAND_ALIGNMENT(COND, EXP, ALIGN) \ + (((COND) && ((ALIGN) < BITS_PER_WORD) \ + && (TREE_CODE (EXP) == ARRAY_TYPE \ + || TREE_CODE (EXP) == UNION_TYPE \ + || TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN)) + +/* Align global data. */ +#define DATA_ALIGNMENT(EXP, ALIGN) \ + ARM_EXPAND_ALIGNMENT(!optimize_size, EXP, ALIGN) + +/* Similarly, make sure that objects on the stack are sensibly aligned. */ +#define LOCAL_ALIGNMENT(EXP, ALIGN) \ + ARM_EXPAND_ALIGNMENT(!flag_conserve_stack, EXP, ALIGN) + +/* Setting STRUCTURE_SIZE_BOUNDARY to 32 produces more efficient code, but the + value set in previous versions of this toolchain was 8, which produces more + compact structures. The command line option -mstructure_size_boundary= + can be used to change this value. For compatibility with the ARM SDK + however the value should be left at 32. ARM SDT Reference Manual (ARM DUI + 0020D) page 2-20 says "Structures are aligned on word boundaries". + The AAPCS specifies a value of 8. */ +#define STRUCTURE_SIZE_BOUNDARY arm_structure_size_boundary + +/* This is the value used to initialize arm_structure_size_boundary. If a + particular arm target wants to change the default value it should change + the definition of this macro, not STRUCTURE_SIZE_BOUNDARY. See netbsd.h + for an example of this. */ +#ifndef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 32 +#endif + +/* Nonzero if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* wchar_t is unsigned under the AAPCS. */ +#ifndef WCHAR_TYPE +#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "int") + +#define WCHAR_TYPE_SIZE BITS_PER_WORD +#endif + +/* Sized for fixed-point types. */ + +#define SHORT_FRACT_TYPE_SIZE 8 +#define FRACT_TYPE_SIZE 16 +#define LONG_FRACT_TYPE_SIZE 32 +#define LONG_LONG_FRACT_TYPE_SIZE 64 + +#define SHORT_ACCUM_TYPE_SIZE 16 +#define ACCUM_TYPE_SIZE 32 +#define LONG_ACCUM_TYPE_SIZE 64 +#define LONG_LONG_ACCUM_TYPE_SIZE 64 + +#define MAX_FIXED_MODE_SIZE 64 + +#ifndef SIZE_TYPE +#define SIZE_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long unsigned int") +#endif + +#ifndef PTRDIFF_TYPE +#define PTRDIFF_TYPE (TARGET_AAPCS_BASED ? "int" : "long int") +#endif + +/* AAPCS requires that structure alignment is affected by bitfields. */ +#ifndef PCC_BITFIELD_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS TARGET_AAPCS_BASED +#endif + + +/* Standard register usage. */ + +/* Register allocation in ARM Procedure Call Standard + (S - saved over call). + + r0 * argument word/integer result + r1-r3 argument word + + r4-r8 S register variable + r9 S (rfp) register variable (real frame pointer) + + r10 F S (sl) stack limit (used by -mapcs-stack-check) + r11 F S (fp) argument pointer + r12 (ip) temp workspace + r13 F S (sp) lower end of current stack frame + r14 (lr) link address/workspace + r15 F (pc) program counter + + cc This is NOT a real register, but is used internally + to represent things that use or set the condition + codes. + sfp This isn't either. It is used during rtl generation + since the offset between the frame pointer and the + auto's isn't known until after register allocation. + afp Nor this, we only need this because of non-local + goto. Without it fp appears to be used and the + elimination code won't get rid of sfp. It tracks + fp exactly at all times. + + *: See TARGET_CONDITIONAL_REGISTER_USAGE */ + +/* s0-s15 VFP scratch (aka d0-d7). + s16-s31 S VFP variable (aka d8-d15). + vfpcc Not a real register. Represents the VFP condition + code flags. */ + +/* The stack backtrace structure is as follows: + fp points to here: | save code pointer | [fp] + | return link value | [fp, #-4] + | return sp value | [fp, #-8] + | return fp value | [fp, #-12] + [| saved r10 value |] + [| saved r9 value |] + [| saved r8 value |] + [| saved r7 value |] + [| saved r6 value |] + [| saved r5 value |] + [| saved r4 value |] + [| saved r3 value |] + [| saved r2 value |] + [| saved r1 value |] + [| saved r0 value |] + r0-r3 are not normally saved in a C function. */ + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ +#define FIXED_REGISTERS \ +{ \ + /* Core regs. */ \ + 0,0,0,0,0,0,0,0, \ + 0,0,0,0,0,1,0,1, \ + /* VFP regs. */ \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + /* IWMMXT regs. */ \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1, \ + /* Specials. */ \ + 1,1,1,1 \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. + The CC is not preserved over function calls on the ARM 6, so it is + easier to assume this for all. SFP is preserved, since FP is. */ +#define CALL_USED_REGISTERS \ +{ \ + /* Core regs. */ \ + 1,1,1,1,0,0,0,0, \ + 0,0,0,0,1,1,1,1, \ + /* VFP Regs. */ \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + /* IWMMXT regs. */ \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1,1,1,1,1, \ + 1,1,1,1, \ + /* Specials. */ \ + 1,1,1,1 \ +} + +#ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE +#define SUBTARGET_CONDITIONAL_REGISTER_USAGE +#endif + +/* These are a couple of extensions to the formats accepted + by asm_fprintf: + %@ prints out ASM_COMMENT_START + %r prints out REGISTER_PREFIX reg_names[arg] */ +#define ASM_FPRINTF_EXTENSIONS(FILE, ARGS, P) \ + case '@': \ + fputs (ASM_COMMENT_START, FILE); \ + break; \ + \ + case 'r': \ + fputs (REGISTER_PREFIX, FILE); \ + fputs (reg_names [va_arg (ARGS, int)], FILE); \ + break; + +/* Round X up to the nearest word. */ +#define ROUND_UP_WORD(X) (((X) + 3) & ~3) + +/* Convert fron bytes to ints. */ +#define ARM_NUM_INTS(X) (((X) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* The number of (integer) registers required to hold a quantity of type MODE. + Also used for VFP registers. */ +#define ARM_NUM_REGS(MODE) \ + ARM_NUM_INTS (GET_MODE_SIZE (MODE)) + +/* The number of (integer) registers required to hold a quantity of TYPE MODE. */ +#define ARM_NUM_REGS2(MODE, TYPE) \ + ARM_NUM_INTS ((MODE) == BLKmode ? \ + int_size_in_bytes (TYPE) : GET_MODE_SIZE (MODE)) + +/* The number of (integer) argument register available. */ +#define NUM_ARG_REGS 4 + +/* And similarly for the VFP. */ +#define NUM_VFP_ARG_REGS 16 + +/* Return the register number of the N'th (integer) argument. */ +#define ARG_REGISTER(N) (N - 1) + +/* Specify the registers used for certain standard purposes. + The values of these macros are register numbers. */ + +/* The number of the last argument register. */ +#define LAST_ARG_REGNUM ARG_REGISTER (NUM_ARG_REGS) + +/* The numbers of the Thumb register ranges. */ +#define FIRST_LO_REGNUM 0 +#define LAST_LO_REGNUM 7 +#define FIRST_HI_REGNUM 8 +#define LAST_HI_REGNUM 11 + +/* Overridden by config/arm/bpabi.h. */ +#ifndef ARM_UNWIND_INFO +#define ARM_UNWIND_INFO 0 +#endif + +/* Use r0 and r1 to pass exception handling information. */ +#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? N : INVALID_REGNUM) + +/* The register that holds the return address in exception handlers. */ +#define ARM_EH_STACKADJ_REGNUM 2 +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM) + +#ifndef ARM_TARGET2_DWARF_FORMAT +#define ARM_TARGET2_DWARF_FORMAT DW_EH_PE_pcrel + +/* ttype entries (the only interesting data references used) + use TARGET2 relocations. */ +#define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \ + (((code) == 0 && (data) == 1 && ARM_UNWIND_INFO) ? ARM_TARGET2_DWARF_FORMAT \ + : DW_EH_PE_absptr) +#endif + +/* The native (Norcroft) Pascal compiler for the ARM passes the static chain + as an invisible last argument (possible since varargs don't exist in + Pascal), so the following is not true. */ +#define STATIC_CHAIN_REGNUM 12 + +/* Define this to be where the real frame pointer is if it is not possible to + work out the offset between the frame pointer and the automatic variables + until after register allocation has taken place. FRAME_POINTER_REGNUM + should point to a special register that we will make sure is eliminated. + + For the Thumb we have another problem. The TPCS defines the frame pointer + as r11, and GCC believes that it is always possible to use the frame pointer + as base register for addressing purposes. (See comments in + find_reloads_address()). But - the Thumb does not allow high registers, + including r11, to be used as base address registers. Hence our problem. + + The solution used here, and in the old thumb port is to use r7 instead of + r11 as the hard frame pointer and to have special code to generate + backtrace structures on the stack (if required to do so via a command line + option) using r11. This is the only 'user visible' use of r11 as a frame + pointer. */ +#define ARM_HARD_FRAME_POINTER_REGNUM 11 +#define THUMB_HARD_FRAME_POINTER_REGNUM 7 + +#define HARD_FRAME_POINTER_REGNUM \ + (TARGET_ARM \ + ? ARM_HARD_FRAME_POINTER_REGNUM \ + : THUMB_HARD_FRAME_POINTER_REGNUM) + +#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0 +#define HARD_FRAME_POINTER_IS_ARG_POINTER 0 + +#define FP_REGNUM HARD_FRAME_POINTER_REGNUM + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM SP_REGNUM + +#define FIRST_IWMMXT_REGNUM (LAST_HI_VFP_REGNUM + 1) +#define LAST_IWMMXT_REGNUM (FIRST_IWMMXT_REGNUM + 15) + +/* Need to sync with WCGR in iwmmxt.md. */ +#define FIRST_IWMMXT_GR_REGNUM (LAST_IWMMXT_REGNUM + 1) +#define LAST_IWMMXT_GR_REGNUM (FIRST_IWMMXT_GR_REGNUM + 3) + +#define IS_IWMMXT_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM)) +#define IS_IWMMXT_GR_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM)) + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM 102 + +/* Base register for access to arguments of the function. */ +#define ARG_POINTER_REGNUM 103 + +#define FIRST_VFP_REGNUM 16 +#define D7_VFP_REGNUM (FIRST_VFP_REGNUM + 15) +#define LAST_VFP_REGNUM \ + (TARGET_VFPD32 ? LAST_HI_VFP_REGNUM : LAST_LO_VFP_REGNUM) + +#define IS_VFP_REGNUM(REGNUM) \ + (((REGNUM) >= FIRST_VFP_REGNUM) && ((REGNUM) <= LAST_VFP_REGNUM)) + +/* VFP registers are split into two types: those defined by VFP versions < 3 + have D registers overlaid on consecutive pairs of S registers. VFP version 3 + defines 16 new D registers (d16-d31) which, for simplicity and correctness + in various parts of the backend, we implement as "fake" single-precision + registers (which would be S32-S63, but cannot be used in that way). The + following macros define these ranges of registers. */ +#define LAST_LO_VFP_REGNUM (FIRST_VFP_REGNUM + 31) +#define FIRST_HI_VFP_REGNUM (LAST_LO_VFP_REGNUM + 1) +#define LAST_HI_VFP_REGNUM (FIRST_HI_VFP_REGNUM + 31) + +#define VFP_REGNO_OK_FOR_SINGLE(REGNUM) \ + ((REGNUM) <= LAST_LO_VFP_REGNUM) + +/* DFmode values are only valid in even register pairs. */ +#define VFP_REGNO_OK_FOR_DOUBLE(REGNUM) \ + ((((REGNUM) - FIRST_VFP_REGNUM) & 1) == 0) + +/* Neon Quad values must start at a multiple of four registers. */ +#define NEON_REGNO_OK_FOR_QUAD(REGNUM) \ + ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0) + +/* Neon structures of vectors must be in even register pairs and there + must be enough registers available. Because of various patterns + requiring quad registers, we require them to start at a multiple of + four. */ +#define NEON_REGNO_OK_FOR_NREGS(REGNUM, N) \ + ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0 \ + && (LAST_VFP_REGNUM - (REGNUM) >= 2 * (N) - 1)) + +/* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP. */ +/* Intel Wireless MMX Technology registers add 16 + 4 more. */ +/* VFP (VFP3) adds 32 (64) + 1 VFPCC. */ +#define FIRST_PSEUDO_REGISTER 104 + +#define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO) + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may be accessed + via the stack pointer) in functions that seem suitable. + If we have to have a frame pointer we might as well make use of it. + APCS says that the frame pointer does not need to be pushed in leaf + functions, or simple tail call functions. */ + +#ifndef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED 0 +#endif + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. + + On the ARM core regs are UNITS_PER_WORD bits wide. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((TARGET_32BIT \ + && REGNO > PC_REGNUM \ + && REGNO != FRAME_POINTER_REGNUM \ + && REGNO != ARG_POINTER_REGNUM) \ + && !IS_VFP_REGNUM (REGNO) \ + ? 1 : ARM_NUM_REGS (MODE)) + +/* Return true if REGNO is suitable for holding a quantity of type MODE. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + arm_hard_regno_mode_ok ((REGNO), (MODE)) + +#define MODES_TIEABLE_P(MODE1, MODE2) arm_modes_tieable_p (MODE1, MODE2) + +#define VALID_IWMMXT_REG_MODE(MODE) \ + (arm_vector_mode_supported_p (MODE) || (MODE) == DImode) + +/* Modes valid for Neon D registers. */ +#define VALID_NEON_DREG_MODE(MODE) \ + ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ + || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode) + +/* Modes valid for Neon Q registers. */ +#define VALID_NEON_QREG_MODE(MODE) \ + ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \ + || (MODE) == V4SFmode || (MODE) == V2DImode) + +/* Structure modes valid for Neon registers. */ +#define VALID_NEON_STRUCT_MODE(MODE) \ + ((MODE) == TImode || (MODE) == EImode || (MODE) == OImode \ + || (MODE) == CImode || (MODE) == XImode) + +/* The register numbers in sequence, for passing to arm_gen_load_multiple. */ +extern int arm_regs_in_sequence[]; + +/* The order in which register should be allocated. It is good to use ip + since no saving is required (though calls clobber it) and it never contains + function parameters. It is quite good to use lr since other calls may + clobber it anyway. Allocate r0 through r3 in reverse order since r3 is + least likely to contain a function parameter; in addition results are + returned in r0. + For VFP/VFPv3, allocate D16-D31 first, then caller-saved registers (D0-D7), + then D8-D15. The reason for doing this is to attempt to reduce register + pressure when both single- and double-precision registers are used in a + function. */ + +#define VREG(X) (FIRST_VFP_REGNUM + (X)) +#define WREG(X) (FIRST_IWMMXT_REGNUM + (X)) +#define WGREG(X) (FIRST_IWMMXT_GR_REGNUM + (X)) + +#define REG_ALLOC_ORDER \ +{ \ + /* General registers. */ \ + 3, 2, 1, 0, 12, 14, 4, 5, \ + 6, 7, 8, 9, 10, 11, \ + /* High VFP registers. */ \ + VREG(32), VREG(33), VREG(34), VREG(35), \ + VREG(36), VREG(37), VREG(38), VREG(39), \ + VREG(40), VREG(41), VREG(42), VREG(43), \ + VREG(44), VREG(45), VREG(46), VREG(47), \ + VREG(48), VREG(49), VREG(50), VREG(51), \ + VREG(52), VREG(53), VREG(54), VREG(55), \ + VREG(56), VREG(57), VREG(58), VREG(59), \ + VREG(60), VREG(61), VREG(62), VREG(63), \ + /* VFP argument registers. */ \ + VREG(15), VREG(14), VREG(13), VREG(12), \ + VREG(11), VREG(10), VREG(9), VREG(8), \ + VREG(7), VREG(6), VREG(5), VREG(4), \ + VREG(3), VREG(2), VREG(1), VREG(0), \ + /* VFP call-saved registers. */ \ + VREG(16), VREG(17), VREG(18), VREG(19), \ + VREG(20), VREG(21), VREG(22), VREG(23), \ + VREG(24), VREG(25), VREG(26), VREG(27), \ + VREG(28), VREG(29), VREG(30), VREG(31), \ + /* IWMMX registers. */ \ + WREG(0), WREG(1), WREG(2), WREG(3), \ + WREG(4), WREG(5), WREG(6), WREG(7), \ + WREG(8), WREG(9), WREG(10), WREG(11), \ + WREG(12), WREG(13), WREG(14), WREG(15), \ + WGREG(0), WGREG(1), WGREG(2), WGREG(3), \ + /* Registers not for general use. */ \ + CC_REGNUM, VFPCC_REGNUM, \ + FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM, \ + SP_REGNUM, PC_REGNUM \ +} + +/* Use different register alloc ordering for Thumb. */ +#define ADJUST_REG_ALLOC_ORDER arm_order_regs_for_local_alloc () + +/* Tell IRA to use the order we define rather than messing it up with its + own cost calculations. */ +#define HONOR_REG_ALLOC_ORDER + +/* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ +#define HARD_REGNO_RENAME_OK(SRC, DST) \ + (! IS_INTERRUPT (cfun->machine->func_type) || \ + df_regs_ever_live_p (DST)) + +/* Register and constant classes. */ + +/* Register classes. */ +enum reg_class +{ + NO_REGS, + LO_REGS, + STACK_REG, + BASE_REGS, + HI_REGS, + CALLER_SAVE_REGS, + GENERAL_REGS, + CORE_REGS, + VFP_D0_D7_REGS, + VFP_LO_REGS, + VFP_HI_REGS, + VFP_REGS, + IWMMXT_REGS, + IWMMXT_GR_REGS, + CC_REG, + VFPCC_REG, + SFP_REG, + AFP_REG, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "LO_REGS", \ + "STACK_REG", \ + "BASE_REGS", \ + "HI_REGS", \ + "CALLER_SAVE_REGS", \ + "GENERAL_REGS", \ + "CORE_REGS", \ + "VFP_D0_D7_REGS", \ + "VFP_LO_REGS", \ + "VFP_HI_REGS", \ + "VFP_REGS", \ + "IWMMXT_REGS", \ + "IWMMXT_GR_REGS", \ + "CC_REG", \ + "VFPCC_REG", \ + "SFP_REG", \ + "AFP_REG", \ + "ALL_REGS" \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ +#define REG_CLASS_CONTENTS \ +{ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */ \ + { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ + { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */ \ + { 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */ \ + { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \ + { 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \ + { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */ \ + { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS */ \ + { 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000 }, /* VFP_LO_REGS */ \ + { 0x00000000, 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_HI_REGS */ \ + { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF, 0x00000000 }, /* VFP_REGS */ \ + { 0x00000000, 0x00000000, 0xFFFF0000, 0x00000000 }, /* IWMMXT_REGS */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x0000000F }, /* IWMMXT_GR_REGS */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000010 }, /* CC_REG */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000020 }, /* VFPCC_REG */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000040 }, /* SFP_REG */ \ + { 0x00000000, 0x00000000, 0x00000000, 0x00000080 }, /* AFP_REG */ \ + { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F } /* ALL_REGS */ \ +} + +/* Any of the VFP register classes. */ +#define IS_VFP_CLASS(X) \ + ((X) == VFP_D0_D7_REGS || (X) == VFP_LO_REGS \ + || (X) == VFP_HI_REGS || (X) == VFP_REGS) + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ +#define REGNO_REG_CLASS(REGNO) arm_regno_class (REGNO) + +/* In VFPv1, VFP registers could only be accessed in the mode they + were set, so subregs would be invalid there. However, we don't + support VFPv1 at the moment, and the restriction was lifted in + VFPv2. + In big-endian mode, modes greater than word size (i.e. DFmode) are stored in + VFP registers in little-endian order. We can't describe that accurately to + GCC, so avoid taking subregs of such values. + The only exception is going from a 128-bit to a 64-bit type. In that case + the data layout happens to be consistent for big-endian, so we explicitly allow + that case. */ +#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ + (TARGET_VFP && TARGET_BIG_END \ + && !(GET_MODE_SIZE (FROM) == 16 && GET_MODE_SIZE (TO) == 8) \ + && (GET_MODE_SIZE (FROM) > UNITS_PER_WORD \ + || GET_MODE_SIZE (TO) > UNITS_PER_WORD) \ + && reg_classes_intersect_p (VFP_REGS, (CLASS))) + +/* The class value for index registers, and the one for base regs. */ +#define INDEX_REG_CLASS (TARGET_THUMB1 ? LO_REGS : GENERAL_REGS) +#define BASE_REG_CLASS (TARGET_THUMB1 ? LO_REGS : CORE_REGS) + +/* For the Thumb the high registers cannot be used as base registers + when addressing quantities in QI or HI mode; if we don't know the + mode, then we must be conservative. */ +#define MODE_BASE_REG_CLASS(MODE) \ + (arm_lra_flag \ + ? (TARGET_32BIT ? CORE_REGS \ + : GET_MODE_SIZE (MODE) >= 4 ? BASE_REGS \ + : LO_REGS) \ + : ((TARGET_ARM || (TARGET_THUMB2 && !optimize_size)) ? CORE_REGS \ + : ((MODE) == SImode) ? BASE_REGS \ + : LO_REGS)) + +/* For Thumb we can not support SP+reg addressing, so we return LO_REGS + instead of BASE_REGS. */ +#define MODE_BASE_REG_REG_CLASS(MODE) BASE_REG_CLASS + +/* When this hook returns true for MODE, the compiler allows + registers explicitly used in the rtl to be used as spill registers + but prevents the compiler from extending the lifetime of these + registers. */ +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \ + arm_small_register_classes_for_mode_p + +/* Must leave BASE_REGS reloads alone */ +#define THUMB_SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ + (lra_in_progress ? NO_REGS \ + : ((CLASS) != LO_REGS && (CLASS) != BASE_REGS \ + ? ((true_regnum (X) == -1 ? LO_REGS \ + : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS \ + : NO_REGS)) \ + : NO_REGS)) + +#define THUMB_SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ + (lra_in_progress ? NO_REGS \ + : (CLASS) != LO_REGS && (CLASS) != BASE_REGS \ + ? ((true_regnum (X) == -1 ? LO_REGS \ + : (true_regnum (X) + HARD_REGNO_NREGS (0, MODE) > 8) ? LO_REGS \ + : NO_REGS)) \ + : NO_REGS) + +/* Return the register class of a scratch register needed to copy IN into + or out of a register in CLASS in MODE. If it can be done directly, + NO_REGS is returned. */ +#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ + /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \ + ((TARGET_VFP && TARGET_HARD_FLOAT \ + && IS_VFP_CLASS (CLASS)) \ + ? coproc_secondary_reload_class (MODE, X, FALSE) \ + : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) \ + ? coproc_secondary_reload_class (MODE, X, TRUE) \ + : TARGET_32BIT \ + ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \ + ? GENERAL_REGS : NO_REGS) \ + : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X)) + +/* If we need to load shorts byte-at-a-time, then we need a scratch. */ +#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ + /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \ + ((TARGET_VFP && TARGET_HARD_FLOAT \ + && IS_VFP_CLASS (CLASS)) \ + ? coproc_secondary_reload_class (MODE, X, FALSE) : \ + (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ? \ + coproc_secondary_reload_class (MODE, X, TRUE) : \ + (TARGET_32BIT ? \ + (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS) \ + && CONSTANT_P (X)) \ + ? GENERAL_REGS : \ + (((MODE) == HImode && ! arm_arch4 \ + && (MEM_P (X) \ + || ((REG_P (X) || GET_CODE (X) == SUBREG) \ + && true_regnum (X) == -1))) \ + ? GENERAL_REGS : NO_REGS) \ + : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X))) + +/* Try a machine-dependent way of reloading an illegitimate address + operand. If we find one, push the reload and jump to WIN. This + macro is used in only one place: `find_reloads_address' in reload.c. + + For the ARM, we wish to handle large displacements off a base + register by splitting the addend across a MOV and the mem insn. + This can cut the number of reloads needed. */ +#define ARM_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND, WIN) \ + do \ + { \ + if (arm_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND)) \ + goto WIN; \ + } \ + while (0) + +/* XXX If an HImode FP+large_offset address is converted to an HImode + SP+large_offset address, then reload won't know how to fix it. It sees + only that SP isn't valid for HImode, and so reloads the SP into an index + register, but the resulting address is still invalid because the offset + is too big. We fix it here instead by reloading the entire address. */ +/* We could probably achieve better results by defining PROMOTE_MODE to help + cope with the variances between the Thumb's signed and unsigned byte and + halfword load instructions. */ +/* ??? This should be safe for thumb2, but we may be able to do better. */ +#define THUMB_LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ +do { \ + rtx new_x = thumb_legitimize_reload_address (&X, MODE, OPNUM, TYPE, IND_L); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ +} while (0) + +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \ + if (TARGET_ARM) \ + ARM_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN); \ + else \ + THUMB_LEGITIMIZE_RELOAD_ADDRESS (X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. + ARM regs are UNITS_PER_WORD bits. + FIXME: Is this true for iWMMX? */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (ARM_NUM_REGS (MODE)) + +/* If defined, gives a class of registers that cannot be used as the + operand of a SUBREG that changes the mode of the object illegally. */ + +/* Stack layout; function entry, exit and calling. */ + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* The amount of scratch space needed by _interwork_{r7,r11}_call_via_rN(). + When present, it is one word in size, and sits at the top of the frame, + between the soft frame pointer and either r7 or r11. + + We only need _interwork_rM_call_via_rN() for -mcaller-super-interworking, + and only then if some outgoing arguments are passed on the stack. It would + be tempting to also check whether the stack arguments are passed by indirect + calls, but there seems to be no reason in principle why a post-reload pass + couldn't convert a direct call into an indirect one. */ +#define CALLER_INTERWORKING_SLOT_SIZE \ + (TARGET_CALLER_INTERWORKING \ + && crtl->outgoing_args_size != 0 \ + ? UNITS_PER_WORD : 0) + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* If we generate an insn to push BYTES bytes, + this says how many the stack pointer really advances by. */ +/* The push insns do not do this rounding implicitly. + So don't define this. */ +/* #define PUSH_ROUNDING(NPUSHED) ROUND_UP_WORD (NPUSHED) */ + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Offset of first parameter from the argument pointer register value. */ +#define FIRST_PARM_OFFSET(FNDECL) (TARGET_ARM ? 4 : 0) + +/* Amount of memory needed for an untyped call to save all possible return + registers. */ +#define APPLY_RESULT_SIZE arm_apply_result_size() + +/* Define DEFAULT_PCC_STRUCT_RETURN to 1 if all structure and union return + values must be in memory. On the ARM, they need only do so if larger + than a word, or if they contain elements offset from zero in the struct. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* These bits describe the different types of function supported + by the ARM backend. They are exclusive. i.e. a function cannot be both a + normal function and an interworked function, for example. Knowing the + type of a function is important for determining its prologue and + epilogue sequences. + Note value 7 is currently unassigned. Also note that the interrupt + function types all have bit 2 set, so that they can be tested for easily. + Note that 0 is deliberately chosen for ARM_FT_UNKNOWN so that when the + machine_function structure is initialized (to zero) func_type will + default to unknown. This will force the first use of arm_current_func_type + to call arm_compute_func_type. */ +#define ARM_FT_UNKNOWN 0 /* Type has not yet been determined. */ +#define ARM_FT_NORMAL 1 /* Your normal, straightforward function. */ +#define ARM_FT_INTERWORKED 2 /* A function that supports interworking. */ +#define ARM_FT_ISR 4 /* An interrupt service routine. */ +#define ARM_FT_FIQ 5 /* A fast interrupt service routine. */ +#define ARM_FT_EXCEPTION 6 /* An ARM exception handler (subcase of ISR). */ + +#define ARM_FT_TYPE_MASK ((1 << 3) - 1) + +/* In addition functions can have several type modifiers, + outlined by these bit masks: */ +#define ARM_FT_INTERRUPT (1 << 2) /* Note overlap with FT_ISR and above. */ +#define ARM_FT_NAKED (1 << 3) /* No prologue or epilogue. */ +#define ARM_FT_VOLATILE (1 << 4) /* Does not return. */ +#define ARM_FT_NESTED (1 << 5) /* Embedded inside another func. */ +#define ARM_FT_STACKALIGN (1 << 6) /* Called with misaligned stack. */ + +/* Some macros to test these flags. */ +#define ARM_FUNC_TYPE(t) (t & ARM_FT_TYPE_MASK) +#define IS_INTERRUPT(t) (t & ARM_FT_INTERRUPT) +#define IS_VOLATILE(t) (t & ARM_FT_VOLATILE) +#define IS_NAKED(t) (t & ARM_FT_NAKED) +#define IS_NESTED(t) (t & ARM_FT_NESTED) +#define IS_STACKALIGN(t) (t & ARM_FT_STACKALIGN) + + +/* Structure used to hold the function stack frame layout. Offsets are + relative to the stack pointer on function entry. Positive offsets are + in the direction of stack growth. + Only soft_frame is used in thumb mode. */ + +typedef struct GTY(()) arm_stack_offsets +{ + int saved_args; /* ARG_POINTER_REGNUM. */ + int frame; /* ARM_HARD_FRAME_POINTER_REGNUM. */ + int saved_regs; + int soft_frame; /* FRAME_POINTER_REGNUM. */ + int locals_base; /* THUMB_HARD_FRAME_POINTER_REGNUM. */ + int outgoing_args; /* STACK_POINTER_REGNUM. */ + unsigned int saved_regs_mask; +} +arm_stack_offsets; + +#ifndef GENERATOR_FILE +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +typedef struct GTY(()) machine_function +{ + /* Additional stack adjustment in __builtin_eh_throw. */ + rtx eh_epilogue_sp_ofs; + /* Records if LR has to be saved for far jumps. */ + int far_jump_used; + /* Records if ARG_POINTER was ever live. */ + int arg_pointer_live; + /* Records if the save of LR has been eliminated. */ + int lr_save_eliminated; + /* The size of the stack frame. Only valid after reload. */ + arm_stack_offsets stack_offsets; + /* Records the type of the current function. */ + unsigned long func_type; + /* Record if the function has a variable argument list. */ + int uses_anonymous_args; + /* Records if sibcalls are blocked because an argument + register is needed to preserve stack alignment. */ + int sibcall_blocked; + /* The PIC register for this function. This might be a pseudo. */ + rtx pic_reg; + /* Labels for per-function Thumb call-via stubs. One per potential calling + register. We can never call via LR or PC. We can call via SP if a + trampoline happens to be on the top of the stack. */ + rtx call_via[14]; + /* Set to 1 when a return insn is output, this means that the epilogue + is not needed. */ + int return_used_this_function; + /* When outputting Thumb-1 code, record the last insn that provides + information about condition codes, and the comparison operands. */ + rtx thumb1_cc_insn; + rtx thumb1_cc_op0; + rtx thumb1_cc_op1; + /* Also record the CC mode that is supported. */ + enum machine_mode thumb1_cc_mode; +} +machine_function; +#endif + +/* As in the machine_function, a global set of call-via labels, for code + that is in text_section. */ +extern GTY(()) rtx thumb_call_via_label[14]; + +/* The number of potential ways of assigning to a co-processor. */ +#define ARM_NUM_COPROC_SLOTS 1 + +/* Enumeration of procedure calling standard variants. We don't really + support all of these yet. */ +enum arm_pcs +{ + ARM_PCS_AAPCS, /* Base standard AAPCS. */ + ARM_PCS_AAPCS_VFP, /* Use VFP registers for floating point values. */ + ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors. */ + /* This must be the last AAPCS variant. */ + ARM_PCS_AAPCS_LOCAL, /* Private call within this compilation unit. */ + ARM_PCS_ATPCS, /* ATPCS. */ + ARM_PCS_APCS, /* APCS (legacy Linux etc). */ + ARM_PCS_UNKNOWN +}; + +/* Default procedure calling standard of current compilation unit. */ +extern enum arm_pcs arm_pcs_default; + +/* A C type for declaring a variable that is used as the first argument of + `FUNCTION_ARG' and other related values. */ +typedef struct +{ + /* This is the number of registers of arguments scanned so far. */ + int nregs; + /* This is the number of iWMMXt register arguments scanned so far. */ + int iwmmxt_nregs; + int named_count; + int nargs; + /* Which procedure call variant to use for this call. */ + enum arm_pcs pcs_variant; + + /* AAPCS related state tracking. */ + int aapcs_arg_processed; /* No need to lay out this argument again. */ + int aapcs_cprc_slot; /* Index of co-processor rules to handle + this argument, or -1 if using core + registers. */ + int aapcs_ncrn; + int aapcs_next_ncrn; + rtx aapcs_reg; /* Register assigned to this argument. */ + int aapcs_partial; /* How many bytes are passed in regs (if + split between core regs and stack. + Zero otherwise. */ + int aapcs_cprc_failed[ARM_NUM_COPROC_SLOTS]; + int can_split; /* Argument can be split between core regs + and the stack. */ + /* Private data for tracking VFP register allocation */ + unsigned aapcs_vfp_regs_free; + unsigned aapcs_vfp_reg_alloc; + int aapcs_vfp_rcount; + MACHMODE aapcs_vfp_rmode; +} CUMULATIVE_ARGS; + +#define FUNCTION_ARG_PADDING(MODE, TYPE) \ + (arm_pad_arg_upward (MODE, TYPE) ? upward : downward) + +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (arm_pad_reg_upward (MODE, TYPE, FIRST) ? upward : downward) + +/* For AAPCS, padding should never be below the argument. For other ABIs, + * mimic the default. */ +#define PAD_VARARGS_DOWN \ + ((TARGET_AAPCS_BASED) ? 0 : BYTES_BIG_ENDIAN) + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. + On the ARM, the offset starts at 0. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + arm_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL)) + +/* 1 if N is a possible register number for function argument passing. + On the ARM, r0-r3 are used to pass args. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + (IN_RANGE ((REGNO), 0, 3) \ + || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT \ + && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \ + || (TARGET_IWMMXT_ABI \ + && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9))) + + +/* If your target environment doesn't prefix user functions with an + underscore, you may wish to re-define this to prevent any conflicts. */ +#ifndef ARM_MCOUNT_NAME +#define ARM_MCOUNT_NAME "*mcount" +#endif + +/* Call the function profiler with a given profile label. The Acorn + compiler puts this BEFORE the prolog but gcc puts it afterwards. + On the ARM the full profile code will look like: + .data + LP1 + .word 0 + .text + mov ip, lr + bl mcount + .word LP1 + + profile_function() in final.c outputs the .data section, FUNCTION_PROFILER + will output the .text section. + + The ``mov ip,lr'' seems like a good idea to stick with cc convention. + ``prof'' doesn't seem to mind about this! + + Note - this version of the code is designed to work in both ARM and + Thumb modes. */ +#ifndef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + char temp[20]; \ + rtx sym; \ + \ + asm_fprintf (STREAM, "\tmov\t%r, %r\n\tbl\t", \ + IP_REGNUM, LR_REGNUM); \ + assemble_name (STREAM, ARM_MCOUNT_NAME); \ + fputc ('\n', STREAM); \ + ASM_GENERATE_INTERNAL_LABEL (temp, "LP", LABELNO); \ + sym = gen_rtx_SYMBOL_REF (Pmode, temp); \ + assemble_aligned_integer (UNITS_PER_WORD, sym); \ +} +#endif + +#ifdef THUMB_FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM, LABELNO) \ + if (TARGET_ARM) \ + ARM_FUNCTION_PROFILER (STREAM, LABELNO) \ + else \ + THUMB_FUNCTION_PROFILER (STREAM, LABELNO) +#else +#define FUNCTION_PROFILER(STREAM, LABELNO) \ + ARM_FUNCTION_PROFILER (STREAM, LABELNO) +#endif + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. + + On the ARM, the function epilogue recovers the stack pointer from the + frame. */ +#define EXIT_IGNORE_STACK 1 + +#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == LR_REGNUM) + +/* Determine if the epilogue should be output as RTL. + You should override this if you define FUNCTION_EXTRA_EPILOGUE. */ +#define USE_RETURN_INSN(ISCOND) \ + (TARGET_32BIT ? use_return_insn (ISCOND, NULL) : 0) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + We have two registers that can be eliminated on the ARM. First, the + arg pointer register can often be eliminated in favor of the stack + pointer register. Secondly, the pseudo frame pointer register can always + be eliminated; it is replaced with either the stack or the real frame + pointer. Note we have to use {ARM|THUMB}_HARD_FRAME_POINTER_REGNUM + because the definition of HARD_FRAME_POINTER_REGNUM is not a constant. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },\ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },\ + { ARG_POINTER_REGNUM, ARM_HARD_FRAME_POINTER_REGNUM },\ + { ARG_POINTER_REGNUM, THUMB_HARD_FRAME_POINTER_REGNUM },\ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },\ + { FRAME_POINTER_REGNUM, ARM_HARD_FRAME_POINTER_REGNUM },\ + { FRAME_POINTER_REGNUM, THUMB_HARD_FRAME_POINTER_REGNUM }} + +/* Define the offset between two registers, one to be eliminated, and the + other its replacement, at the start of a routine. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + if (TARGET_ARM) \ + (OFFSET) = arm_compute_initial_elimination_offset (FROM, TO); \ + else \ + (OFFSET) = thumb_compute_initial_elimination_offset (FROM, TO) + +/* Special case handling of the location of arguments passed on the stack. */ +#define DEBUGGER_ARG_OFFSET(value, addr) value ? value : arm_debugger_arg_offset (value, addr) + +/* Initialize data used by insn expanders. This is called from insn_emit, + once for every function before code is generated. */ +#define INIT_EXPANDERS arm_init_expanders () + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE (TARGET_32BIT ? 16 : 20) + +/* Alignment required for a trampoline in bits. */ +#define TRAMPOLINE_ALIGNMENT 32 + +/* Addressing modes, and classification of registers for them. */ +#define HAVE_POST_INCREMENT 1 +#define HAVE_PRE_INCREMENT TARGET_32BIT +#define HAVE_POST_DECREMENT TARGET_32BIT +#define HAVE_PRE_DECREMENT TARGET_32BIT +#define HAVE_PRE_MODIFY_DISP TARGET_32BIT +#define HAVE_POST_MODIFY_DISP TARGET_32BIT +#define HAVE_PRE_MODIFY_REG TARGET_32BIT +#define HAVE_POST_MODIFY_REG TARGET_32BIT + +enum arm_auto_incmodes + { + ARM_POST_INC, + ARM_PRE_INC, + ARM_POST_DEC, + ARM_PRE_DEC + }; + +#define ARM_AUTOINC_VALID_FOR_MODE_P(mode, code) \ + (TARGET_32BIT && arm_autoinc_modes_ok_p (mode, code)) +#define USE_LOAD_POST_INCREMENT(mode) \ + ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_POST_INC) +#define USE_LOAD_PRE_INCREMENT(mode) \ + ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_PRE_INC) +#define USE_LOAD_POST_DECREMENT(mode) \ + ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_POST_DEC) +#define USE_LOAD_PRE_DECREMENT(mode) \ + ARM_AUTOINC_VALID_FOR_MODE_P(mode, ARM_PRE_DEC) + +#define USE_STORE_PRE_DECREMENT(mode) USE_LOAD_PRE_DECREMENT(mode) +#define USE_STORE_PRE_INCREMENT(mode) USE_LOAD_PRE_INCREMENT(mode) +#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode) +#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode) + +/* Macros to check register numbers against specific register classes. */ + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in reginfo.c during register + allocation. */ +#define TEST_REGNO(R, TEST, VALUE) \ + ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE)) + +/* Don't allow the pc to be used. */ +#define ARM_REGNO_OK_FOR_BASE_P(REGNO) \ + (TEST_REGNO (REGNO, <, PC_REGNUM) \ + || TEST_REGNO (REGNO, ==, FRAME_POINTER_REGNUM) \ + || TEST_REGNO (REGNO, ==, ARG_POINTER_REGNUM)) + +#define THUMB1_REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + (TEST_REGNO (REGNO, <=, LAST_LO_REGNUM) \ + || (GET_MODE_SIZE (MODE) >= 4 \ + && TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM))) + +#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + (TARGET_THUMB1 \ + ? THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO, MODE) \ + : ARM_REGNO_OK_FOR_BASE_P (REGNO)) + +/* Nonzero if X can be the base register in a reg+reg addressing mode. + For Thumb, we can not use SP + reg, so reject SP. */ +#define REGNO_MODE_OK_FOR_REG_BASE_P(X, MODE) \ + REGNO_MODE_OK_FOR_BASE_P (X, QImode) + +/* For ARM code, we don't care about the mode, but for Thumb, the index + must be suitable for use in a QImode load. */ +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + (REGNO_MODE_OK_FOR_BASE_P (REGNO, QImode) \ + && !TEST_REGNO (REGNO, ==, STACK_POINTER_REGNUM)) + +/* Maximum number of registers that can appear in a valid memory address. + Shifts in addresses can't be by a register. */ +#define MAX_REGS_PER_ADDRESS 2 + +/* Recognize any constant value that is a valid address. */ +/* XXX We can address any constant, eventually... */ +/* ??? Should the TARGET_ARM here also apply to thumb2? */ +#define CONSTANT_ADDRESS_P(X) \ + (GET_CODE (X) == SYMBOL_REF \ + && (CONSTANT_POOL_ADDRESS_P (X) \ + || (TARGET_ARM && optimize > 0 && SYMBOL_REF_FLAG (X)))) + +/* True if SYMBOL + OFFSET constants must refer to something within + SYMBOL's section. */ +#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 0 + +/* Nonzero if all target requires all absolute relocations be R_ARM_ABS32. */ +#ifndef TARGET_DEFAULT_WORD_RELOCATIONS +#define TARGET_DEFAULT_WORD_RELOCATIONS 0 +#endif + +#ifndef SUBTARGET_NAME_ENCODING_LENGTHS +#define SUBTARGET_NAME_ENCODING_LENGTHS +#endif + +/* This is a C fragment for the inside of a switch statement. + Each case label should return the number of characters to + be stripped from the start of a function's name, if that + name starts with the indicated character. */ +#define ARM_NAME_ENCODING_LENGTHS \ + case '*': return 1; \ + SUBTARGET_NAME_ENCODING_LENGTHS + +/* This is how to output a reference to a user-level label named NAME. + `assemble_name' uses this. */ +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(FILE, NAME) \ + arm_asm_output_labelref (FILE, NAME) + +/* Output IT instructions for conditionally executed Thumb-2 instructions. */ +#define ASM_OUTPUT_OPCODE(STREAM, PTR) \ + if (TARGET_THUMB2) \ + thumb2_asm_output_opcode (STREAM); + +/* The EABI specifies that constructors should go in .init_array. + Other targets use .ctors for compatibility. */ +#ifndef ARM_EABI_CTORS_SECTION_OP +#define ARM_EABI_CTORS_SECTION_OP \ + "\t.section\t.init_array,\"aw\",%init_array" +#endif +#ifndef ARM_EABI_DTORS_SECTION_OP +#define ARM_EABI_DTORS_SECTION_OP \ + "\t.section\t.fini_array,\"aw\",%fini_array" +#endif +#define ARM_CTORS_SECTION_OP \ + "\t.section\t.ctors,\"aw\",%progbits" +#define ARM_DTORS_SECTION_OP \ + "\t.section\t.dtors,\"aw\",%progbits" + +/* Define CTORS_SECTION_ASM_OP. */ +#undef CTORS_SECTION_ASM_OP +#undef DTORS_SECTION_ASM_OP +#ifndef IN_LIBGCC2 +# define CTORS_SECTION_ASM_OP \ + (TARGET_AAPCS_BASED ? ARM_EABI_CTORS_SECTION_OP : ARM_CTORS_SECTION_OP) +# define DTORS_SECTION_ASM_OP \ + (TARGET_AAPCS_BASED ? ARM_EABI_DTORS_SECTION_OP : ARM_DTORS_SECTION_OP) +#else /* !defined (IN_LIBGCC2) */ +/* In libgcc, CTORS_SECTION_ASM_OP must be a compile-time constant, + so we cannot use the definition above. */ +# ifdef __ARM_EABI__ +/* The .ctors section is not part of the EABI, so we do not define + CTORS_SECTION_ASM_OP when in libgcc; that prevents crtstuff + from trying to use it. We do define it when doing normal + compilation, as .init_array can be used instead of .ctors. */ +/* There is no need to emit begin or end markers when using + init_array; the dynamic linker will compute the size of the + array itself based on special symbols created by the static + linker. However, we do need to arrange to set up + exception-handling here. */ +# define CTOR_LIST_BEGIN asm (ARM_EABI_CTORS_SECTION_OP) +# define CTOR_LIST_END /* empty */ +# define DTOR_LIST_BEGIN asm (ARM_EABI_DTORS_SECTION_OP) +# define DTOR_LIST_END /* empty */ +# else /* !defined (__ARM_EABI__) */ +# define CTORS_SECTION_ASM_OP ARM_CTORS_SECTION_OP +# define DTORS_SECTION_ASM_OP ARM_DTORS_SECTION_OP +# endif /* !defined (__ARM_EABI__) */ +#endif /* !defined (IN_LIBCC2) */ + +/* True if the operating system can merge entities with vague linkage + (e.g., symbols in COMDAT group) during dynamic linking. */ +#ifndef TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P +#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P true +#endif + +#define ARM_OUTPUT_FN_UNWIND(F, PROLOGUE) arm_output_fn_unwind (F, PROLOGUE) + +/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx + and check its validity for a certain class. + We have two alternate definitions for each of them. + The usual definition accepts all pseudo regs; the other rejects + them unless they have been allocated suitable hard regs. + The symbol REG_OK_STRICT causes the latter definition to be used. + Thumb-2 has the same restrictions as arm. */ +#ifndef REG_OK_STRICT + +#define ARM_REG_OK_FOR_BASE_P(X) \ + (REGNO (X) <= LAST_ARM_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) == ARG_POINTER_REGNUM) + +#define ARM_REG_OK_FOR_INDEX_P(X) \ + ((REGNO (X) <= LAST_ARM_REGNUM \ + && REGNO (X) != STACK_POINTER_REGNUM) \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || REGNO (X) == FRAME_POINTER_REGNUM \ + || REGNO (X) == ARG_POINTER_REGNUM) + +#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE) \ + (REGNO (X) <= LAST_LO_REGNUM \ + || REGNO (X) >= FIRST_PSEUDO_REGISTER \ + || (GET_MODE_SIZE (MODE) >= 4 \ + && (REGNO (X) == STACK_POINTER_REGNUM \ + || (X) == hard_frame_pointer_rtx \ + || (X) == arg_pointer_rtx))) + +#define REG_STRICT_P 0 + +#else /* REG_OK_STRICT */ + +#define ARM_REG_OK_FOR_BASE_P(X) \ + ARM_REGNO_OK_FOR_BASE_P (REGNO (X)) + +#define ARM_REG_OK_FOR_INDEX_P(X) \ + ARM_REGNO_OK_FOR_INDEX_P (REGNO (X)) + +#define THUMB1_REG_MODE_OK_FOR_BASE_P(X, MODE) \ + THUMB1_REGNO_MODE_OK_FOR_BASE_P (REGNO (X), MODE) + +#define REG_STRICT_P 1 + +#endif /* REG_OK_STRICT */ + +/* Now define some helpers in terms of the above. */ + +#define REG_MODE_OK_FOR_BASE_P(X, MODE) \ + (TARGET_THUMB1 \ + ? THUMB1_REG_MODE_OK_FOR_BASE_P (X, MODE) \ + : ARM_REG_OK_FOR_BASE_P (X)) + +/* For 16-bit Thumb, a valid index register is anything that can be used in + a byte load instruction. */ +#define THUMB1_REG_OK_FOR_INDEX_P(X) \ + THUMB1_REG_MODE_OK_FOR_BASE_P (X, QImode) + +/* Nonzero if X is a hard reg that can be used as an index + or if it is a pseudo reg. On the Thumb, the stack pointer + is not suitable. */ +#define REG_OK_FOR_INDEX_P(X) \ + (TARGET_THUMB1 \ + ? THUMB1_REG_OK_FOR_INDEX_P (X) \ + : ARM_REG_OK_FOR_INDEX_P (X)) + +/* Nonzero if X can be the base register in a reg+reg addressing mode. + For Thumb, we can not use SP + reg, so reject SP. */ +#define REG_MODE_OK_FOR_REG_BASE_P(X, MODE) \ + REG_OK_FOR_INDEX_P (X) + +#define ARM_BASE_REGISTER_RTX_P(X) \ + (REG_P (X) && ARM_REG_OK_FOR_BASE_P (X)) + +#define ARM_INDEX_REGISTER_RTX_P(X) \ + (REG_P (X) && ARM_REG_OK_FOR_INDEX_P (X)) + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE Pmode + +#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \ + || (TARGET_THUMB1 \ + && (optimize_size || flag_pic))) + +#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \ + (TARGET_THUMB1 \ + ? (min >= 0 && max < 512 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \ + : min >= -256 && max < 256 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode) \ + : min >= 0 && max < 8192 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode) \ + : min >= -4096 && max < 4096 \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ + : SImode) \ + : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode \ + : (max >= 0x200) ? HImode \ + : QImode)) + +/* signed 'char' is most compatible, but RISC OS wants it unsigned. + unsigned is probably best, but may break some code. */ +#ifndef DEFAULT_SIGNED_CHAR +#define DEFAULT_SIGNED_CHAR 0 +#endif + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 4 + +#undef MOVE_RATIO +#define MOVE_RATIO(speed) (arm_tune_xscale ? 4 : 2) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) \ + (TARGET_THUMB ? ZERO_EXTEND : \ + ((arm_arch4 || (MODE) == QImode) ? ZERO_EXTEND \ + : ((BYTES_BIG_ENDIAN && (MODE) == HImode) ? SIGN_EXTEND : UNKNOWN))) + +/* Nonzero if access to memory by bytes is slow and undesirable. */ +#define SLOW_BYTE_ACCESS 0 + +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 + +/* Immediate shift counts are truncated by the output routines (or was it + the assembler?). Shift counts in a register are truncated by ARM. Note + that the native compiler puts too large (> 32) immediate shift counts + into a register and shifts by the register, letting the ARM decide what + to do instead of doing that itself. */ +/* This is all wrong. Defining SHIFT_COUNT_TRUNCATED tells combine that + code like (X << (Y % 32)) for register X, Y is equivalent to (X << Y). + On the arm, Y in a register is used modulo 256 for the shift. Only for + rotates is modulo 32 used. */ +/* #define SHIFT_COUNT_TRUNCATED 1 */ + +/* All integers have the same format so truncation is easy. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Calling from registers is a massive pain. */ +#define NO_FUNCTION_CSE 1 + +/* The machine modes of pointers and functions */ +#define Pmode SImode +#define FUNCTION_MODE Pmode + +#define ARM_FRAME_RTX(X) \ + ( (X) == frame_pointer_rtx || (X) == stack_pointer_rtx \ + || (X) == arg_pointer_rtx) + +/* Try to generate sequences that don't involve branches, we can then use + conditional instructions. */ +#define BRANCH_COST(speed_p, predictable_p) \ + (current_tune->branch_cost (speed_p, predictable_p)) + +/* False if short circuit operation is preferred. */ +#define LOGICAL_OP_NON_SHORT_CIRCUIT \ + ((optimize_size) \ + ? (TARGET_THUMB ? false : true) \ + : (current_tune->logical_op_non_short_circuit[TARGET_ARM])) + + +/* Position Independent Code. */ +/* We decide which register to use based on the compilation options and + the assembler in use; this is more general than the APCS restriction of + using sb (r9) all the time. */ +extern unsigned arm_pic_register; + +/* The register number of the register used to address a table of static + data addresses in memory. */ +#define PIC_OFFSET_TABLE_REGNUM arm_pic_register + +/* We can't directly access anything that contains a symbol, + nor can we indirect via the constant pool. One exception is + UNSPEC_TLS, which is always PIC. */ +#define LEGITIMATE_PIC_OPERAND_P(X) \ + (!(symbol_mentioned_p (X) \ + || label_mentioned_p (X) \ + || (GET_CODE (X) == SYMBOL_REF \ + && CONSTANT_POOL_ADDRESS_P (X) \ + && (symbol_mentioned_p (get_pool_constant (X)) \ + || label_mentioned_p (get_pool_constant (X))))) \ + || tls_mentioned_p (X)) + +/* We need to know when we are making a constant pool; this determines + whether data needs to be in the GOT or can be referenced via a GOT + offset. */ +extern int making_const_table; + +/* Handle pragmas for compatibility with Intel's compilers. */ +/* Also abuse this to register additional C specific EABI attributes. */ +#define REGISTER_TARGET_PRAGMAS() do { \ + c_register_pragma (0, "long_calls", arm_pr_long_calls); \ + c_register_pragma (0, "no_long_calls", arm_pr_no_long_calls); \ + c_register_pragma (0, "long_calls_off", arm_pr_long_calls_off); \ + arm_lang_object_attributes_init(); \ +} while (0) + +/* Condition code information. */ +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ + +#define SELECT_CC_MODE(OP, X, Y) arm_select_cc_mode (OP, X, Y) + +#define REVERSIBLE_CC_MODE(MODE) 1 + +#define REVERSE_CONDITION(CODE,MODE) \ + (((MODE) == CCFPmode || (MODE) == CCFPEmode) \ + ? reverse_condition_maybe_unordered (code) \ + : reverse_condition (code)) + +/* The arm5 clz instruction returns 32. */ +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) + +#define CC_STATUS_INIT \ + do { cfun->machine->thumb1_cc_insn = NULL_RTX; } while (0) + +#undef ASM_APP_OFF +#define ASM_APP_OFF (TARGET_THUMB1 ? "\t.code\t16\n" : \ + TARGET_THUMB2 ? "\t.thumb\n" : "") + +/* Output a push or a pop instruction (only used when profiling). + We can't push STATIC_CHAIN_REGNUM (r12) directly with Thumb-1. We know + that ASM_OUTPUT_REG_PUSH will be matched with ASM_OUTPUT_REG_POP, and + that r7 isn't used by the function profiler, so we can use it as a + scratch reg. WARNING: This isn't safe in the general case! It may be + sensitive to future changes in final.c:profile_function. */ +#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \ + do \ + { \ + if (TARGET_ARM) \ + asm_fprintf (STREAM,"\tstmfd\t%r!,{%r}\n", \ + STACK_POINTER_REGNUM, REGNO); \ + else if (TARGET_THUMB1 \ + && (REGNO) == STATIC_CHAIN_REGNUM) \ + { \ + asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ + asm_fprintf (STREAM, "\tmov\tr7, %r\n", REGNO);\ + asm_fprintf (STREAM, "\tpush\t{r7}\n"); \ + } \ + else \ + asm_fprintf (STREAM, "\tpush {%r}\n", REGNO); \ + } while (0) + + +/* See comment for ASM_OUTPUT_REG_PUSH concerning Thumb-1 issue. */ +#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \ + do \ + { \ + if (TARGET_ARM) \ + asm_fprintf (STREAM, "\tldmfd\t%r!,{%r}\n", \ + STACK_POINTER_REGNUM, REGNO); \ + else if (TARGET_THUMB1 \ + && (REGNO) == STATIC_CHAIN_REGNUM) \ + { \ + asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ + asm_fprintf (STREAM, "\tmov\t%r, r7\n", REGNO);\ + asm_fprintf (STREAM, "\tpop\t{r7}\n"); \ + } \ + else \ + asm_fprintf (STREAM, "\tpop {%r}\n", REGNO); \ + } while (0) + +#define ADDR_VEC_ALIGN(JUMPTABLE) \ + ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0) + +/* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the + default alignment from elfos.h. */ +#undef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) /* Empty. */ + +/* Make sure subsequent insns are aligned after a TBB. */ +#define ASM_OUTPUT_CASE_END(FILE, NUM, JUMPTABLE) \ + do \ + { \ + if (GET_MODE (PATTERN (JUMPTABLE)) == QImode) \ + ASM_OUTPUT_ALIGN (FILE, 1); \ + } \ + while (0) + +#define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ + do \ + { \ + if (TARGET_THUMB) \ + { \ + if (is_called_in_ARM_mode (DECL) \ + || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY \ + && cfun->is_thunk)) \ + fprintf (STREAM, "\t.code 32\n") ; \ + else if (TARGET_THUMB1) \ + fprintf (STREAM, "\t.code\t16\n\t.thumb_func\n") ; \ + else \ + fprintf (STREAM, "\t.thumb\n\t.thumb_func\n") ; \ + } \ + if (TARGET_POKE_FUNCTION_NAME) \ + arm_poke_function_name (STREAM, (const char *) NAME); \ + } \ + while (0) + +/* For aliases of functions we use .thumb_set instead. */ +#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL1, DECL2) \ + do \ + { \ + const char *const LABEL1 = XSTR (XEXP (DECL_RTL (decl), 0), 0); \ + const char *const LABEL2 = IDENTIFIER_POINTER (DECL2); \ + \ + if (TARGET_THUMB && TREE_CODE (DECL1) == FUNCTION_DECL) \ + { \ + fprintf (FILE, "\t.thumb_set "); \ + assemble_name (FILE, LABEL1); \ + fprintf (FILE, ","); \ + assemble_name (FILE, LABEL2); \ + fprintf (FILE, "\n"); \ + } \ + else \ + ASM_OUTPUT_DEF (FILE, LABEL1, LABEL2); \ + } \ + while (0) + +#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN +/* To support -falign-* switches we need to use .p2align so + that alignment directives in code sections will be padded + with no-op instructions, rather than zeroes. */ +#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP) \ + if ((LOG) != 0) \ + { \ + if ((MAX_SKIP) == 0) \ + fprintf ((FILE), "\t.p2align %d\n", (int) (LOG)); \ + else \ + fprintf ((FILE), "\t.p2align %d,,%d\n", \ + (int) (LOG), (int) (MAX_SKIP)); \ + } +#endif + +/* Add two bytes to the length of conditionally executed Thumb-2 + instructions for the IT instruction. */ +#define ADJUST_INSN_LENGTH(insn, length) \ + if (TARGET_THUMB2 && GET_CODE (PATTERN (insn)) == COND_EXEC) \ + length += 2; + +/* Only perform branch elimination (by making instructions conditional) if + we're optimizing. For Thumb-2 check if any IT instructions need + outputting. */ +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + if (TARGET_ARM && optimize) \ + arm_final_prescan_insn (INSN); \ + else if (TARGET_THUMB2) \ + thumb2_final_prescan_insn (INSN); \ + else if (TARGET_THUMB1) \ + thumb1_final_prescan_insn (INSN) + +#define ARM_SIGN_EXTEND(x) ((HOST_WIDE_INT) \ + (HOST_BITS_PER_WIDE_INT <= 32 ? (unsigned HOST_WIDE_INT) (x) \ + : ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0xffffffff) |\ + ((((unsigned HOST_WIDE_INT)(x)) & (unsigned HOST_WIDE_INT) 0x80000000) \ + ? ((~ (unsigned HOST_WIDE_INT) 0) \ + & ~ (unsigned HOST_WIDE_INT) 0xffffffff) \ + : 0)))) + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. */ + +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + arm_return_addr (COUNT, FRAME) + +/* Mask of the bits in the PC that contain the real return address + when running in 26-bit mode. */ +#define RETURN_ADDR_MASK26 (0x03fffffc) + +/* Pick up the return address upon entry to a procedure. Used for + dwarf2 unwind information. This also enables the table driven + mechanism. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (LR_REGNUM) + +/* Used to mask out junk bits from the return address, such as + processor state, interrupt status, condition codes and the like. */ +#define MASK_RETURN_ADDR \ + /* If we are generating code for an ARM2/ARM3 machine or for an ARM6 \ + in 26 bit mode, the condition codes must be masked out of the \ + return address. This does not apply to ARM6 and later processors \ + when running in 32 bit mode. */ \ + ((arm_arch4 || TARGET_THUMB) \ + ? (gen_int_mode ((unsigned long)0xffffffff, Pmode)) \ + : arm_gen_return_addr_mask ()) + + +/* Do not emit .note.GNU-stack by default. */ +#ifndef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 0 +#endif + +#define TARGET_ARM_ARCH \ + (arm_base_arch) \ + +#define TARGET_ARM_V6M (!arm_arch_notm && !arm_arch_thumb2) +#define TARGET_ARM_V7M (!arm_arch_notm && arm_arch_thumb2) + +/* The highest Thumb instruction set version supported by the chip. */ +#define TARGET_ARM_ARCH_ISA_THUMB \ + (arm_arch_thumb2 ? 2 \ + : ((TARGET_ARM_ARCH >= 5 || arm_arch4t) ? 1 : 0)) + +/* Expands to an upper-case char of the target's architectural + profile. */ +#define TARGET_ARM_ARCH_PROFILE \ + (!arm_arch_notm \ + ? 'M' \ + : (arm_arch7 \ + ? (strlen (arm_arch_name) >=3 \ + ? (arm_arch_name[strlen (arm_arch_name) - 3]) \ + : 0) \ + : 0)) + +/* Bit-field indicating what size LDREX/STREX loads/stores are available. + Bit 0 for bytes, up to bit 3 for double-words. */ +#define TARGET_ARM_FEATURE_LDREX \ + ((TARGET_HAVE_LDREX ? 4 : 0) \ + | (TARGET_HAVE_LDREXBH ? 3 : 0) \ + | (TARGET_HAVE_LDREXD ? 8 : 0)) + +/* Set as a bit mask indicating the available widths of hardware floating + point types. Where bit 1 indicates 16-bit support, bit 2 indicates + 32-bit support, bit 3 indicates 64-bit support. */ +#define TARGET_ARM_FP \ + (TARGET_VFP_SINGLE ? 4 \ + : (TARGET_VFP_DOUBLE ? (TARGET_FP16 ? 14 : 12) : 0)) + + +/* Set as a bit mask indicating the available widths of floating point + types for hardware NEON floating point. This is the same as + TARGET_ARM_FP without the 64-bit bit set. */ +#ifdef TARGET_NEON +#define TARGET_NEON_FP \ + (TARGET_ARM_FP & (0xff ^ 0x08)) +#endif + +/* The maximum number of parallel loads or stores we support in an ldm/stm + instruction. */ +#define MAX_LDM_STM_OPS 4 + +#define BIG_LITTLE_SPEC \ + " %{mcpu=*:-mcpu=%:rewrite_mcpu(%{mcpu=*:%*})}" + +extern const char *arm_rewrite_mcpu (int argc, const char **argv); +#define BIG_LITTLE_CPU_SPEC_FUNCTIONS \ + { "rewrite_mcpu", arm_rewrite_mcpu }, + +#define ASM_CPU_SPEC \ + " %{mcpu=generic-*:-march=%*;" \ + " :%{march=*:-march=%*}}" \ + BIG_LITTLE_SPEC + +/* -mcpu=native handling only makes sense with compiler running on + an ARM chip. */ +#if defined(__arm__) +extern const char *host_detect_local_cpu (int argc, const char **argv); +# define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, \ + BIG_LITTLE_CPU_SPEC_FUNCTIONS + +# define MCPU_MTUNE_NATIVE_SPECS \ + " %{march=native:%. + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;; Beware of splitting Thumb1 patterns that output multiple +;; assembly instructions, in particular instruction such as SBC and +;; ADC which consume flags. For example, in the pattern thumb_subdi3 +;; below, the output SUB implicitly sets the flags (assembled to SUBS) +;; and then the Carry flag is used by SBC to compute the correct +;; result. If we split thumb_subdi3 pattern into two separate RTL +;; insns (using define_insn_and_split), the scheduler might place +;; other RTL insns between SUB and SBC, possibly modifying the Carry +;; flag used by SBC. This might happen because most Thumb1 patterns +;; for flag-setting instructions do not have explicit RTL for setting +;; or clobbering the flags. Instead, they have the attribute "conds" +;; with value "set" or "clob". However, this attribute is not used to +;; identify dependencies and therefore the scheduler might reorder +;; these instruction. Currenly, this problem cannot happen because +;; there are no separate Thumb1 patterns for individual instruction +;; that consume flags (except conditional execution, which is treated +;; differently). In particular there is no Thumb1 armv6-m pattern for +;; sbc or adc. + + +;;--------------------------------------------------------------------------- +;; Constants + +;; Register numbers -- All machine registers should be defined here +(define_constants + [(R0_REGNUM 0) ; First CORE register + (R1_REGNUM 1) ; Second CORE register + (IP_REGNUM 12) ; Scratch register + (SP_REGNUM 13) ; Stack pointer + (LR_REGNUM 14) ; Return address register + (PC_REGNUM 15) ; Program counter + (LAST_ARM_REGNUM 15) ; + (CC_REGNUM 100) ; Condition code pseudo register + (VFPCC_REGNUM 101) ; VFP Condition code pseudo register + ] +) +;; 3rd operand to select_dominance_cc_mode +(define_constants + [(DOM_CC_X_AND_Y 0) + (DOM_CC_NX_OR_Y 1) + (DOM_CC_X_OR_Y 2) + ] +) +;; conditional compare combination +(define_constants + [(CMP_CMP 0) + (CMN_CMP 1) + (CMP_CMN 2) + (CMN_CMN 3) + (NUM_OF_COND_CMP 4) + ] +) + + +;;--------------------------------------------------------------------------- +;; Attributes + +;; Processor type. This is created automatically from arm-cores.def. +(include "arm-tune.md") + +;; Instruction classification types +(include "types.md") + +; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when +; generating ARM code. This is used to control the length of some insn +; patterns that share the same RTL in both ARM and Thumb code. +(define_attr "is_thumb" "no,yes" (const (symbol_ref "thumb_code"))) + +; IS_ARCH6 is set to 'yes' when we are generating code form ARMv6. +(define_attr "is_arch6" "no,yes" (const (symbol_ref "arm_arch6"))) + +; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code. +(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code"))) + +; We use this attribute to disable alternatives that can produce 32-bit +; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks +; that contain 32-bit instructions. +(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes")) + +; This attribute is used to disable a predicated alternative when we have +; arm_restrict_it. +(define_attr "predicable_short_it" "no,yes" (const_string "yes")) + +;; Operand number of an input operand that is shifted. Zero if the +;; given instruction does not shift one of its input operands. +(define_attr "shift" "" (const_int 0)) + +; Floating Point Unit. If we only have floating point emulation, then there +; is no point in scheduling the floating point insns. (Well, for best +; performance we should try and group them together). +(define_attr "fpu" "none,vfp" + (const (symbol_ref "arm_fpu_attr"))) + +(define_attr "predicated" "yes,no" (const_string "no")) + +; LENGTH of an instruction (in bytes) +(define_attr "length" "" + (const_int 4)) + +; The architecture which supports the instruction (or alternative). +; This can be "a" for ARM, "t" for either of the Thumbs, "32" for +; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6" +; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without +; arm_arch6. This attribute is used to compute attribute "enabled", +; use type "any" to enable an alternative in all cases. +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2" + (const_string "any")) + +(define_attr "arch_enabled" "no,yes" + (cond [(eq_attr "arch" "any") + (const_string "yes") + + (and (eq_attr "arch" "a") + (match_test "TARGET_ARM")) + (const_string "yes") + + (and (eq_attr "arch" "t") + (match_test "TARGET_THUMB")) + (const_string "yes") + + (and (eq_attr "arch" "t1") + (match_test "TARGET_THUMB1")) + (const_string "yes") + + (and (eq_attr "arch" "t2") + (match_test "TARGET_THUMB2")) + (const_string "yes") + + (and (eq_attr "arch" "32") + (match_test "TARGET_32BIT")) + (const_string "yes") + + (and (eq_attr "arch" "v6") + (match_test "TARGET_32BIT && arm_arch6")) + (const_string "yes") + + (and (eq_attr "arch" "nov6") + (match_test "TARGET_32BIT && !arm_arch6")) + (const_string "yes") + + (and (eq_attr "arch" "avoid_neon_for_64bits") + (match_test "TARGET_NEON") + (not (match_test "TARGET_PREFER_NEON_64BITS"))) + (const_string "yes") + + (and (eq_attr "arch" "neon_for_64bits") + (match_test "TARGET_NEON") + (match_test "TARGET_PREFER_NEON_64BITS")) + (const_string "yes") + + (and (eq_attr "arch" "iwmmxt2") + (match_test "TARGET_REALLY_IWMMXT2")) + (const_string "yes")] + + (const_string "no"))) + +(define_attr "opt" "any,speed,size" + (const_string "any")) + +(define_attr "opt_enabled" "no,yes" + (cond [(eq_attr "opt" "any") + (const_string "yes") + + (and (eq_attr "opt" "speed") + (match_test "optimize_function_for_speed_p (cfun)")) + (const_string "yes") + + (and (eq_attr "opt" "size") + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "yes")] + (const_string "no"))) + +(define_attr "use_literal_pool" "no,yes" + (cond [(and (eq_attr "type" "f_loads,f_loadd") + (match_test "CONSTANT_P (operands[1])")) + (const_string "yes")] + (const_string "no"))) + +; Allows an insn to disable certain alternatives for reasons other than +; arch support. +(define_attr "insn_enabled" "no,yes" + (const_string "yes")) + +; Enable all alternatives that are both arch_enabled and insn_enabled. + (define_attr "enabled" "no,yes" + (cond [(eq_attr "insn_enabled" "no") + (const_string "no") + + (and (eq_attr "predicable_short_it" "no") + (and (eq_attr "predicated" "yes") + (match_test "arm_restrict_it"))) + (const_string "no") + + (and (eq_attr "enabled_for_depr_it" "no") + (match_test "arm_restrict_it")) + (const_string "no") + + (and (eq_attr "use_literal_pool" "yes") + (match_test "arm_disable_literal_pool")) + (const_string "no") + + (eq_attr "arch_enabled" "no") + (const_string "no") + + (eq_attr "opt_enabled" "no") + (const_string "no")] + (const_string "yes"))) + +; POOL_RANGE is how far away from a constant pool entry that this insn +; can be placed. If the distance is zero, then this insn will never +; reference the pool. +; Note that for Thumb constant pools the PC value is rounded down to the +; nearest multiple of four. Therefore, THUMB2_POOL_RANGE (and POOL_RANGE for +; Thumb insns) should be set to - 2. +; NEG_POOL_RANGE is nonzero for insns that can reference a constant pool entry +; before its address. It is set to - (8 + ). +(define_attr "arm_pool_range" "" (const_int 0)) +(define_attr "thumb2_pool_range" "" (const_int 0)) +(define_attr "arm_neg_pool_range" "" (const_int 0)) +(define_attr "thumb2_neg_pool_range" "" (const_int 0)) + +(define_attr "pool_range" "" + (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_pool_range")] + (attr "arm_pool_range"))) +(define_attr "neg_pool_range" "" + (cond [(eq_attr "is_thumb" "yes") (attr "thumb2_neg_pool_range")] + (attr "arm_neg_pool_range"))) + +; An assembler sequence may clobber the condition codes without us knowing. +; If such an insn references the pool, then we have no way of knowing how, +; so use the most conservative value for pool_range. +(define_asm_attributes + [(set_attr "conds" "clob") + (set_attr "length" "4") + (set_attr "pool_range" "250")]) + +; Load scheduling, set from the arm_ld_sched variable +; initialized by arm_option_override() +(define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched"))) + +; YES if the "type" attribute assigned to the insn denotes an +; Advanced SIMD instruction, NO otherwise. +(define_attr "is_neon_type" "yes,no" + (if_then_else (eq_attr "type" + "neon_add, neon_add_q, neon_add_widen, neon_add_long,\ + neon_qadd, neon_qadd_q, neon_add_halve, neon_add_halve_q,\ + neon_add_halve_narrow_q,\ + neon_sub, neon_sub_q, neon_sub_widen, neon_sub_long, neon_qsub,\ + neon_qsub_q, neon_sub_halve, neon_sub_halve_q,\ + neon_sub_halve_narrow_q,\ + neon_abs, neon_abs_q, neon_neg, neon_neg_q, neon_qneg,\ + neon_qneg_q, neon_qabs, neon_qabs_q, neon_abd, neon_abd_q,\ + neon_abd_long, neon_minmax, neon_minmax_q, neon_compare,\ + neon_compare_q, neon_compare_zero, neon_compare_zero_q,\ + neon_arith_acc, neon_arith_acc_q, neon_reduc_add,\ + neon_reduc_add_q, neon_reduc_add_long, neon_reduc_add_acc,\ + neon_reduc_add_acc_q, neon_reduc_minmax, neon_reduc_minmax_q,\ + neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ + neon_shift_imm, neon_shift_imm_q, neon_shift_imm_narrow_q,\ + neon_shift_imm_long, neon_shift_reg, neon_shift_reg_q,\ + neon_shift_acc, neon_shift_acc_q, neon_sat_shift_imm,\ + neon_sat_shift_imm_q, neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg, neon_sat_shift_reg_q,\ + neon_ins, neon_ins_q, neon_move, neon_move_q, neon_move_narrow_q,\ + neon_permute, neon_permute_q, neon_zip, neon_zip_q, neon_tbl1,\ + neon_tbl1_q, neon_tbl2, neon_tbl2_q, neon_tbl3, neon_tbl3_q,\ + neon_tbl4, neon_tbl4_q, neon_bsl, neon_bsl_q, neon_cls,\ + neon_cls_q, neon_cnt, neon_cnt_q, neon_dup, neon_dup_q,\ + neon_ext, neon_ext_q, neon_rbit, neon_rbit_q,\ + neon_rev, neon_rev_q, neon_mul_b, neon_mul_b_q, neon_mul_h,\ + neon_mul_h_q, neon_mul_s, neon_mul_s_q, neon_mul_b_long,\ + neon_mul_h_long, neon_mul_s_long, neon_mul_d_long, neon_mul_h_scalar,\ + neon_mul_h_scalar_q, neon_mul_s_scalar, neon_mul_s_scalar_q,\ + neon_mul_h_scalar_long, neon_mul_s_scalar_long, neon_sat_mul_b,\ + neon_sat_mul_b_q, neon_sat_mul_h, neon_sat_mul_h_q,\ + neon_sat_mul_s, neon_sat_mul_s_q, neon_sat_mul_b_long,\ + neon_sat_mul_h_long, neon_sat_mul_s_long, neon_sat_mul_h_scalar,\ + neon_sat_mul_h_scalar_q, neon_sat_mul_s_scalar,\ + neon_sat_mul_s_scalar_q, neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long, neon_mla_b, neon_mla_b_q, neon_mla_h,\ + neon_mla_h_q, neon_mla_s, neon_mla_s_q, neon_mla_b_long,\ + neon_mla_h_long, neon_mla_s_long, neon_mla_h_scalar,\ + neon_mla_h_scalar_q, neon_mla_s_scalar, neon_mla_s_scalar_q,\ + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ + neon_sat_mla_b_long, neon_sat_mla_h_long,\ + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long,\ + neon_to_gp, neon_to_gp_q, neon_from_gp, neon_from_gp_q,\ + neon_ldr, neon_load1_1reg, neon_load1_1reg_q, neon_load1_2reg,\ + neon_load1_2reg_q, neon_load1_3reg, neon_load1_3reg_q,\ + neon_load1_4reg, neon_load1_4reg_q, neon_load1_all_lanes,\ + neon_load1_all_lanes_q, neon_load1_one_lane, neon_load1_one_lane_q,\ + neon_load2_2reg, neon_load2_2reg_q, neon_load2_4reg,\ + neon_load2_4reg_q, neon_load2_all_lanes, neon_load2_all_lanes_q,\ + neon_load2_one_lane, neon_load2_one_lane_q,\ + neon_load3_3reg, neon_load3_3reg_q, neon_load3_all_lanes,\ + neon_load3_all_lanes_q, neon_load3_one_lane, neon_load3_one_lane_q,\ + neon_load4_4reg, neon_load4_4reg_q, neon_load4_all_lanes,\ + neon_load4_all_lanes_q, neon_load4_one_lane, neon_load4_one_lane_q,\ + neon_str, neon_store1_1reg, neon_store1_1reg_q, neon_store1_2reg,\ + neon_store1_2reg_q, neon_store1_3reg, neon_store1_3reg_q,\ + neon_store1_4reg, neon_store1_4reg_q, neon_store1_one_lane,\ + neon_store1_one_lane_q, neon_store2_2reg, neon_store2_2reg_q,\ + neon_store2_4reg, neon_store2_4reg_q, neon_store2_one_lane,\ + neon_store2_one_lane_q, neon_store3_3reg, neon_store3_3reg_q,\ + neon_store3_one_lane, neon_store3_one_lane_q, neon_store4_4reg,\ + neon_store4_4reg_q, neon_store4_one_lane, neon_store4_one_lane_q,\ + neon_fp_abd_s, neon_fp_abd_s_q, neon_fp_abd_d, neon_fp_abd_d_q,\ + neon_fp_addsub_s, neon_fp_addsub_s_q, neon_fp_addsub_d,\ + neon_fp_addsub_d_q, neon_fp_compare_s, neon_fp_compare_s_q,\ + neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\ + neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\ + neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\ + neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s, + neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\ + neon_fp_reduc_minmax_d_q,\ + neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\ + neon_fp_cvt_widen_h, neon_fp_cvt_widen_s, neon_fp_to_int_s,\ + neon_fp_to_int_s_q, neon_int_to_fp_s, neon_int_to_fp_s_q,\ + neon_fp_round_s, neon_fp_round_s_q, neon_fp_recpe_s,\ + neon_fp_recpe_s_q,\ + neon_fp_recpe_d, neon_fp_recpe_d_q, neon_fp_recps_s,\ + neon_fp_recps_s_q, neon_fp_recps_d, neon_fp_recps_d_q,\ + neon_fp_recpx_s, neon_fp_recpx_s_q, neon_fp_recpx_d,\ + neon_fp_recpx_d_q, neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ + neon_fp_rsqrte_d, neon_fp_rsqrte_d_q, neon_fp_rsqrts_s,\ + neon_fp_rsqrts_s_q, neon_fp_rsqrts_d, neon_fp_rsqrts_d_q,\ + neon_fp_mul_s, neon_fp_mul_s_q, neon_fp_mul_s_scalar,\ + neon_fp_mul_s_scalar_q, neon_fp_mul_d, neon_fp_mul_d_q,\ + neon_fp_mul_d_scalar_q, neon_fp_mla_s, neon_fp_mla_s_q,\ + neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q, neon_fp_mla_d,\ + neon_fp_mla_d_q, neon_fp_mla_d_scalar_q, neon_fp_sqrt_s,\ + neon_fp_sqrt_s_q, neon_fp_sqrt_d, neon_fp_sqrt_d_q,\ + neon_fp_div_s, neon_fp_div_s_q, neon_fp_div_d, neon_fp_div_d_q, crypto_aes,\ + crypto_sha1_xor, crypto_sha1_fast, crypto_sha1_slow, crypto_sha256_fast,\ + crypto_sha256_slow") + (const_string "yes") + (const_string "no"))) + +; condition codes: this one is used by final_prescan_insn to speed up +; conditionalizing instructions. It saves having to scan the rtl to see if +; it uses or alters the condition codes. +; +; USE means that the condition codes are used by the insn in the process of +; outputting code, this means (at present) that we can't use the insn in +; inlined branches +; +; SET means that the purpose of the insn is to set the condition codes in a +; well defined manner. +; +; CLOB means that the condition codes are altered in an undefined manner, if +; they are altered at all +; +; UNCONDITIONAL means the instruction can not be conditionally executed and +; that the instruction does not use or alter the condition codes. +; +; NOCOND means that the instruction does not use or alter the condition +; codes but can be converted into a conditionally exectuted instruction. + +(define_attr "conds" "use,set,clob,unconditional,nocond" + (if_then_else + (ior (eq_attr "is_thumb1" "yes") + (eq_attr "type" "call")) + (const_string "clob") + (if_then_else (eq_attr "is_neon_type" "no") + (const_string "nocond") + (const_string "unconditional")))) + +; Predicable means that the insn can be conditionally executed based on +; an automatically added predicate (additional patterns are generated by +; gen...). We default to 'no' because no Thumb patterns match this rule +; and not all ARM patterns do. +(define_attr "predicable" "no,yes" (const_string "no")) + +; Only model the write buffer for ARM6 and ARM7. Earlier processors don't +; have one. Later ones, such as StrongARM, have write-back caches, so don't +; suffer blockages enough to warrant modelling this (and it can adversely +; affect the schedule). +(define_attr "model_wbuf" "no,yes" (const (symbol_ref "arm_tune_wbuf"))) + +; WRITE_CONFLICT implies that a read following an unrelated write is likely +; to stall the processor. Used with model_wbuf above. +(define_attr "write_conflict" "no,yes" + (if_then_else (eq_attr "type" + "block,call,load1") + (const_string "yes") + (const_string "no"))) + +; Classify the insns into those that take one cycle and those that take more +; than one on the main cpu execution unit. +(define_attr "core_cycles" "single,multi" + (if_then_else (eq_attr "type" + "adc_imm, adc_reg, adcs_imm, adcs_reg, adr, alu_ext, alu_imm, alu_reg,\ + alu_shift_imm, alu_shift_reg, alus_ext, alus_imm, alus_reg,\ + alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\ + logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\ + logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\ + wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\ + wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\ + wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\ + wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\ + wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\ + wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\ + wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\ + wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\ + wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\ + wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\ + wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge") + (const_string "single") + (const_string "multi"))) + +;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a +;; distant label. Only applicable to Thumb code. +(define_attr "far_jump" "yes,no" (const_string "no")) + + +;; The number of machine instructions this pattern expands to. +;; Used for Thumb-2 conditional execution. +(define_attr "ce_count" "" (const_int 1)) + +;;--------------------------------------------------------------------------- +;; Unspecs + +(include "unspecs.md") + +;;--------------------------------------------------------------------------- +;; Mode iterators + +(include "iterators.md") + +;;--------------------------------------------------------------------------- +;; Predicates + +(include "predicates.md") +(include "constraints.md") + +;;--------------------------------------------------------------------------- +;; Pipeline descriptions + +(define_attr "tune_cortexr4" "yes,no" + (const (if_then_else + (eq_attr "tune" "cortexr4,cortexr4f,cortexr5") + (const_string "yes") + (const_string "no")))) + +;; True if the generic scheduling description should be used. + +(define_attr "generic_sched" "yes,no" + (const (if_then_else + (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa12,cortexa15,cortexa53,cortexm4,marvell_pj4") + (eq_attr "tune_cortexr4" "yes")) + (const_string "no") + (const_string "yes")))) + +(define_attr "generic_vfp" "yes,no" + (const (if_then_else + (and (eq_attr "fpu" "vfp") + (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexa53,cortexm4,marvell_pj4") + (eq_attr "tune_cortexr4" "no")) + (const_string "yes") + (const_string "no")))) + +(include "marvell-f-iwmmxt.md") +(include "arm-generic.md") +(include "arm926ejs.md") +(include "arm1020e.md") +(include "arm1026ejs.md") +(include "arm1136jfs.md") +(include "fa526.md") +(include "fa606te.md") +(include "fa626te.md") +(include "fmp626.md") +(include "fa726te.md") +(include "cortex-a5.md") +(include "cortex-a7.md") +(include "cortex-a8.md") +(include "cortex-a9.md") +(include "cortex-a15.md") +(include "cortex-a53.md") +(include "cortex-r4.md") +(include "cortex-r4f.md") +(include "cortex-m4.md") +(include "cortex-m4-fpu.md") +(include "vfp11.md") +(include "marvell-pj4.md") + + +;;--------------------------------------------------------------------------- +;; Insn patterns +;; +;; Addition insns. + +;; Note: For DImode insns, there is normally no reason why operands should +;; not be in the same register, what we don't want is for something being +;; written to partially overlap something that is an input. + +(define_expand "adddi3" + [(parallel + [(set (match_operand:DI 0 "s_register_operand" "") + (plus:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "arm_adddi_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_EITHER" + " + if (TARGET_THUMB1) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + if (!REG_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + } + " +) + +(define_insn "*thumb1_adddi3" + [(set (match_operand:DI 0 "register_operand" "=l") + (plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "register_operand" "l"))) + (clobber (reg:CC CC_REGNUM)) + ] + "TARGET_THUMB1" + "add\\t%Q0, %Q0, %Q2\;adc\\t%R0, %R0, %R2" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*arm_adddi3" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r") + (match_operand:DI 2 "arm_adddi_operand" "r, 0, r, Dd, Dd"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !TARGET_NEON" + "#" + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0])))" + [(parallel [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (match_dup 5)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart_mode (SImode, DImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*adddi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (plus:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(parallel [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (plus:SI (plus:SI (ashiftrt:SI (match_dup 2) + (const_int 31)) + (match_dup 4)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*adddi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (plus:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(parallel [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (plus:SI (plus:SI (match_dup 4) (const_int 0)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (plus:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + " + if (TARGET_32BIT && CONST_INT_P (operands[2])) + { + arm_split_constant (PLUS, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } + " +) + +; If there is a scratch available, this will be faster than synthesizing the +; addition. +(define_peephole2 + [(match_scratch:SI 3 "r") + (set (match_operand:SI 0 "arm_general_register_operand" "") + (plus:SI (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_32BIT && + !(const_ok_for_arm (INTVAL (operands[2])) + || const_ok_for_arm (-INTVAL (operands[2]))) + && const_ok_for_arm (~INTVAL (operands[2]))" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))] + "" +) + +;; The r/r/k alternative is required when reloading the address +;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will +;; put the duplicated register first, and not try the commutative version. +(define_insn_and_split "*arm_addsi3" + [(set (match_operand:SI 0 "s_register_operand" "=rk,l,l ,l ,r ,k ,r,r ,k ,r ,k,k,r ,k ,r") + (plus:SI (match_operand:SI 1 "s_register_operand" "%0 ,l,0 ,l ,rk,k ,r,rk,k ,rk,k,r,rk,k ,rk") + (match_operand:SI 2 "reg_or_int_operand" "rk ,l,Py,Pd,rI,rI,k,Pj,Pj,L ,L,L,PJ,PJ,?n")))] + "TARGET_32BIT" + "@ + add%?\\t%0, %0, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %1, %2 + add%?\\t%0, %2, %1 + addw%?\\t%0, %1, %2 + addw%?\\t%0, %1, %2 + sub%?\\t%0, %1, #%n2 + sub%?\\t%0, %1, #%n2 + sub%?\\t%0, %1, #%n2 + subw%?\\t%0, %1, #%n2 + subw%?\\t%0, %1, #%n2 + #" + "TARGET_32BIT + && CONST_INT_P (operands[2]) + && !const_ok_for_op (INTVAL (operands[2]), PLUS) + && (reload_completed || !arm_eliminable_register (operands[1]))" + [(clobber (const_int 0))] + " + arm_split_constant (PLUS, SImode, curr_insn, + INTVAL (operands[2]), operands[0], + operands[1], 0); + DONE; + " + [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,4,4,4,16") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no") + (set_attr "arch" "t2,t2,t2,t2,*,*,*,t2,t2,*,*,a,t2,t2,*") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_imm") + (const_string "alu_reg"))) + ] +) + +(define_insn_and_split "*thumb1_addsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,k,l,l,l") + (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,k,k,0,l,k") + (match_operand:SI 2 "nonmemory_operand" "I,J,lL,*hk,*rk,M,O,Pa,Pb,Pc")))] + "TARGET_THUMB1" + "* + static const char * const asms[] = + { + \"add\\t%0, %0, %2\", + \"sub\\t%0, %0, #%n2\", + \"add\\t%0, %1, %2\", + \"add\\t%0, %0, %2\", + \"add\\t%0, %0, %2\", + \"add\\t%0, %1, %2\", + \"add\\t%0, %1, %2\", + \"#\", + \"#\", + \"#\" + }; + if ((which_alternative == 2 || which_alternative == 6) + && CONST_INT_P (operands[2]) + && INTVAL (operands[2]) < 0) + return \"sub\\t%0, %1, #%n2\"; + return asms[which_alternative]; + " + "&& reload_completed && CONST_INT_P (operands[2]) + && ((operands[1] != stack_pointer_rtx + && (INTVAL (operands[2]) > 255 || INTVAL (operands[2]) < -255)) + || (operands[1] == stack_pointer_rtx + && INTVAL (operands[2]) > 1020))" + [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 3)))] + { + HOST_WIDE_INT offset = INTVAL (operands[2]); + if (operands[1] == stack_pointer_rtx) + offset -= 1020; + else + { + if (offset > 255) + offset = 255; + else if (offset < -255) + offset = -255; + } + operands[3] = GEN_INT (offset); + operands[2] = GEN_INT (INTVAL (operands[2]) - offset); + } + [(set_attr "length" "2,2,2,2,2,2,2,4,4,4") + (set_attr "type" "alus_imm,alus_imm,alus_reg,alus_reg,alus_reg, + alus_reg,alus_reg,multiple,multiple,multiple")] +) + +;; Reloading and elimination of the frame pointer can +;; sometimes cause this optimization to be missed. +(define_peephole2 + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (set (match_dup 0) + (plus:SI (match_dup 0) (reg:SI SP_REGNUM)))] + "TARGET_THUMB1 + && (unsigned HOST_WIDE_INT) (INTVAL (operands[1])) < 1024 + && (INTVAL (operands[1]) & 3) == 0" + [(set (match_dup 0) (plus:SI (reg:SI SP_REGNUM) (match_dup 1)))] + "" +) + +(define_insn "addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "r, r,r") + (match_operand:SI 2 "arm_add_operand" "I,L,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_ARM" + "@ + add%.\\t%0, %1, %2 + sub%.\\t%0, %1, #%n2 + add%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "alus_imm,alus_imm,alus_reg")] +) + +(define_insn "*addsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 0 "s_register_operand" "r, r, r") + (match_operand:SI 1 "arm_add_operand" "I,L, r")) + (const_int 0)))] + "TARGET_ARM" + "@ + cmn%?\\t%0, %1 + cmp%?\\t%0, #%n1 + cmn%?\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "predicable" "yes") + (set_attr "type" "alus_imm,alus_imm,alus_reg")] +) + +(define_insn "*compare_negsi_si" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (neg:SI (match_operand:SI 0 "s_register_operand" "l,r")) + (match_operand:SI 1 "s_register_operand" "l,r")))] + "TARGET_32BIT" + "cmn%?\\t%1, %0" + [(set_attr "conds" "set") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,*") + (set_attr "length" "2,4") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "alus_reg")] +) + +;; This is the canonicalization of addsi3_compare0_for_combiner when the +;; addend is a constant. +(define_insn "cmpsi2_addneg" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_addimm_operand" "L,I"))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (plus:SI (match_dup 1) + (match_operand:SI 3 "arm_addimm_operand" "I,L")))] + "TARGET_32BIT && INTVAL (operands[2]) == -INTVAL (operands[3])" + "@ + add%.\\t%0, %1, %3 + sub%.\\t%0, %1, #%n3" + [(set_attr "conds" "set") + (set_attr "type" "alus_reg")] +) + +;; Convert the sequence +;; sub rd, rn, #1 +;; cmn rd, #1 (equivalent to cmp rd, #-1) +;; bne dest +;; into +;; subs rd, rn, #1 +;; bcs dest ((unsigned)rn >= 1) +;; similarly for the beq variant using bcc. +;; This is a common looping idiom (while (n--)) +(define_peephole2 + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (plus:SI (match_operand:SI 1 "arm_general_register_operand" "") + (const_int -1))) + (set (match_operand 2 "cc_register" "") + (compare (match_dup 0) (const_int -1))) + (set (pc) + (if_then_else (match_operator 3 "equality_operator" + [(match_dup 2) (const_int 0)]) + (match_operand 4 "" "") + (match_operand 5 "" "")))] + "TARGET_32BIT && peep2_reg_dead_p (3, operands[2])" + [(parallel[ + (set (match_dup 2) + (compare:CC + (match_dup 1) (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 1) (const_int -1)))]) + (set (pc) + (if_then_else (match_op_dup 3 [(match_dup 2) (const_int 0)]) + (match_dup 4) + (match_dup 5)))] + "operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); + operands[3] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE + ? GEU : LTU), + VOIDmode, + operands[2], const0_rtx);" +) + +;; The next four insns work because they compare the result with one of +;; the operands, and we know that the use of the condition code is +;; either GEU or LTU, so we can use the carry flag from the addition +;; instead of doing the compare a second time. +(define_insn "*addsi3_compare_op1" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "arm_add_operand" "I,L,r")) + (match_dup 1))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + add%.\\t%0, %1, %2 + sub%.\\t%0, %1, #%n2 + add%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "alus_imm,alus_imm,alus_reg")] +) + +(define_insn "*addsi3_compare_op2" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "arm_add_operand" "I,L,r")) + (match_dup 2))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + add%.\\t%0, %1, %2 + add%.\\t%0, %1, %2 + sub%.\\t%0, %1, #%n2" + [(set_attr "conds" "set") + (set_attr "type" "alus_imm,alus_imm,alus_reg")] +) + +(define_insn "*compare_addsi2_op0" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,r,r,r") + (match_operand:SI 1 "arm_add_operand" "Pv,l,I,L,r")) + (match_dup 0)))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, #%n1 + cmn%?\\t%0, %1 + cmn%?\\t%0, %1 + cmp%?\\t%0, #%n1 + cmn%?\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,t2,*,*,*") + (set_attr "predicable_short_it" "yes,yes,no,no,no") + (set_attr "length" "2,2,4,4,4") + (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")] +) + +(define_insn "*compare_addsi2_op1" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:SI (match_operand:SI 0 "s_register_operand" "l,l,r,r,r") + (match_operand:SI 1 "arm_add_operand" "Pv,l,I,L,r")) + (match_dup 1)))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, #%n1 + cmn%?\\t%0, %1 + cmn%?\\t%0, %1 + cmp%?\\t%0, #%n1 + cmn%?\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,t2,*,*,*") + (set_attr "predicable_short_it" "yes,yes,no,no,no") + (set_attr "length" "2,2,4,4,4") + (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_imm,alus_reg")] + ) + +(define_insn "*addsi3_carryin_" + [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") + (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%l,r,r") + (match_operand:SI 2 "arm_not_operand" "0,rI,K")) + (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "@ + adc%?\\t%0, %1, %2 + adc%?\\t%0, %1, %2 + sbc%?\\t%0, %1, #%B2" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,*,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,no,no") + (set_attr "type" "adc_reg,adc_reg,adc_imm")] +) + +(define_insn "*addsi3_carryin_alt2_" + [(set (match_operand:SI 0 "s_register_operand" "=l,r,r") + (plus:SI (plus:SI (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)) + (match_operand:SI 1 "s_register_operand" "%l,r,r")) + (match_operand:SI 2 "arm_rhs_operand" "l,rI,K")))] + "TARGET_32BIT" + "@ + adc%?\\t%0, %1, %2 + adc%?\\t%0, %1, %2 + sbc%?\\t%0, %1, #%B2" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,*,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,no,no") + (set_attr "type" "adc_reg,adc_reg,adc_imm")] +) + +(define_insn "*addsi3_carryin_shift_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (plus:SI + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "reg_or_int_operand" "rM")]) + (match_operand:SI 1 "s_register_operand" "r")) + (LTUGEU:SI (reg: CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "adc%?\\t%0, %1, %3%S2" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] +) + +(define_insn "*addsi3_carryin_clobercc_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r") + (match_operand:SI 2 "arm_rhs_operand" "rI")) + (LTUGEU:SI (reg: CC_REGNUM) (const_int 0)))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "adc%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "adcs_reg")] +) + +(define_insn "*subsi3_carryin" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I") + (match_operand:SI 2 "s_register_operand" "r,r")) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "@ + sbc%?\\t%0, %1, %2 + rsc%?\\t%0, %2, %1" + [(set_attr "conds" "use") + (set_attr "arch" "*,a") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "adc_reg,adc_imm")] +) + +(define_insn "*subsi3_carryin_const" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (plus:SI (match_operand:SI 1 "reg_or_int_operand" "r") + (match_operand:SI 2 "arm_not_operand" "K")) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbc\\t%0, %1, #%B2" + [(set_attr "conds" "use") + (set_attr "type" "adc_imm")] +) + +(define_insn "*subsi3_carryin_compare" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r"))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (minus:SI (match_dup 1) + (match_dup 2)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbcs\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "adcs_reg")] +) + +(define_insn "*subsi3_carryin_compare_const" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "reg_or_int_operand" "r") + (match_operand:SI 2 "arm_not_operand" "K"))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (plus:SI (match_dup 1) + (match_dup 2)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbcs\\t%0, %1, #%B2" + [(set_attr "conds" "set") + (set_attr "type" "adcs_imm")] +) + +(define_insn "*subsi3_carryin_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (minus:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "reg_or_int_operand" "rM")])) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_32BIT" + "sbc%?\\t%0, %1, %3%S2" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] +) + +(define_insn "*rsbsi3_carryin_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI (minus:SI + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "reg_or_int_operand" "rM")]) + (match_operand:SI 1 "s_register_operand" "r")) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + "TARGET_ARM" + "rsc%?\\t%0, %1, %3%S2" + [(set_attr "conds" "use") + (set_attr "predicable" "yes") + (set (attr "type") (if_then_else (match_operand 4 "const_int_operand" "") + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] +) + +; transform ((x << y) - 1) to ~(~(x-1) << y) Where X is a constant. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (plus:SI (ashift:SI (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "s_register_operand" "")) + (const_int -1))) + (clobber (match_operand:SI 3 "s_register_operand" ""))] + "TARGET_32BIT" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 0) (not:SI (ashift:SI (match_dup 3) (match_dup 2))))] + " + operands[1] = GEN_INT (~(INTVAL (operands[1]) - 1)); +") + +(define_expand "addsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (plus:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " +") + +(define_expand "adddf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (plus:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " +") + +(define_expand "subdi3" + [(parallel + [(set (match_operand:DI 0 "s_register_operand" "") + (minus:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_EITHER" + " + if (TARGET_THUMB1) + { + if (!REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + if (!REG_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + } + " +) + +(define_insn_and_split "*arm_subdi3" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "0,r,0") + (match_operand:DI 2 "s_register_operand" "r,0,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !TARGET_NEON" + "#" ; "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb_subdi3" + [(set (match_operand:DI 0 "register_operand" "=l") + (minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB1" + "sub\\t%Q0, %Q0, %Q2\;sbc\\t%R0, %R0, %R2" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*subdi_di_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "0,r") + (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, #0" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (plus:SI (match_dup 4) (match_dup 5)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = GEN_INT (~0); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*subdi_di_sesidi" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "0,r") + (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" ; "subs\\t%Q0, %Q1, %2\;sbc\\t%R0, %R1, %2, asr #31" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) + (ashiftrt:SI (match_dup 2) + (const_int 31))) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*subdi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, #0" + ; is equivalent to: + ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, #0" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 1))) + (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))]) + (set (match_dup 3) (minus:SI (minus:SI (const_int 0) (match_dup 4)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*subdi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (minus:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" ; "rsbs\\t%Q0, %Q1, %2\;rsc\\t%R0, %R1, %2, asr #31" + ; is equivalent to: + ; "subs\\t%Q0, %2, %Q1\;rsc\\t%R0, %R1, %2, asr #31" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 1))) + (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1)))]) + (set (match_dup 3) (minus:SI (minus:SI + (ashiftrt:SI (match_dup 2) + (const_int 31)) + (match_dup 4)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*subdi_zesidi_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (minus:DI (zero_extend:DI + (match_operand:SI 1 "s_register_operand" "r")) + (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" ; "subs\\t%Q0, %1, %2\;sbc\\t%R0, %1, %1" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 3) (minus:SI (minus:SI (match_dup 1) (match_dup 1)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (minus:SI (match_operand:SI 1 "reg_or_int_operand" "") + (match_operand:SI 2 "s_register_operand" "")))] + "TARGET_EITHER" + " + if (CONST_INT_P (operands[1])) + { + if (TARGET_32BIT) + { + arm_split_constant (MINUS, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], + operands[2], optimize && can_create_pseudo_p ()); + DONE; + } + else /* TARGET_THUMB1 */ + operands[1] = force_reg (SImode, operands[1]); + } + " +) + +(define_insn "thumb1_subsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (minus:SI (match_operand:SI 1 "register_operand" "l") + (match_operand:SI 2 "reg_or_int_operand" "lPd")))] + "TARGET_THUMB1" + "sub\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "conds" "set") + (set_attr "type" "alus_reg")] +) + +; ??? Check Thumb-2 split length +(define_insn_and_split "*arm_subsi3_insn" + [(set (match_operand:SI 0 "s_register_operand" "=l,l ,l ,l ,r ,r,r,rk,r") + (minus:SI (match_operand:SI 1 "reg_or_int_operand" "l ,0 ,l ,Pz,rI,r,r,k ,?n") + (match_operand:SI 2 "reg_or_int_operand" "l ,Py,Pd,l ,r ,I,r,r ,r")))] + "TARGET_32BIT" + "@ + sub%?\\t%0, %1, %2 + sub%?\\t%0, %2 + sub%?\\t%0, %1, %2 + rsb%?\\t%0, %2, %1 + rsb%?\\t%0, %2, %1 + sub%?\\t%0, %1, %2 + sub%?\\t%0, %1, %2 + sub%?\\t%0, %1, %2 + #" + "&& (CONST_INT_P (operands[1]) + && !const_ok_for_arm (INTVAL (operands[1])))" + [(clobber (const_int 0))] + " + arm_split_constant (MINUS, SImode, curr_insn, + INTVAL (operands[1]), operands[0], operands[2], 0); + DONE; + " + [(set_attr "length" "4,4,4,4,4,4,4,4,16") + (set_attr "arch" "t2,t2,t2,t2,*,*,*,*,*") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no") + (set_attr "type" "alu_reg,alu_reg,alu_reg,alu_reg,alu_imm,alu_imm,alu_reg,alu_reg,multiple")] +) + +(define_peephole2 + [(match_scratch:SI 3 "r") + (set (match_operand:SI 0 "arm_general_register_operand" "") + (minus:SI (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "arm_general_register_operand" "")))] + "TARGET_32BIT + && !const_ok_for_arm (INTVAL (operands[1])) + && const_ok_for_arm (~INTVAL (operands[1]))" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 0) (minus:SI (match_dup 3) (match_dup 2)))] + "" +) + +(define_insn "*subsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I") + (match_operand:SI 2 "arm_rhs_operand" "I,r,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + sub%.\\t%0, %1, %2 + sub%.\\t%0, %1, %2 + rsb%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "type" "alus_imm,alus_reg,alus_reg")] +) + +(define_insn "subsi3_compare" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "arm_rhs_operand" "r,r,I") + (match_operand:SI 2 "arm_rhs_operand" "I,r,r"))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + sub%.\\t%0, %1, %2 + sub%.\\t%0, %1, %2 + rsb%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "type" "alus_imm,alus_reg,alus_reg")] +) + +(define_expand "subsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (minus:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " +") + +(define_expand "subdf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (minus:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " +") + + +;; Multiplication insns + +(define_expand "mulhi3" + [(set (match_operand:HI 0 "s_register_operand" "") + (mult:HI (match_operand:HI 1 "s_register_operand" "") + (match_operand:HI 2 "s_register_operand" "")))] + "TARGET_DSP_MULTIPLY" + " + { + rtx result = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3 (result, operands[1], operands[2])); + emit_move_insn (operands[0], gen_lowpart (HImode, result)); + DONE; + }" +) + +(define_expand "mulsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (mult:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_EITHER" + "" +) + +;; Use `&' and then `0' to prevent the operands 0 and 1 being the same +(define_insn "*arm_mulsi3" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 1 "s_register_operand" "%0,r")))] + "TARGET_32BIT && !arm_arch6" + "mul%?\\t%0, %2, %1" + [(set_attr "type" "mul") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_mulsi3_v6" + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r") + (match_operand:SI 2 "s_register_operand" "l,0,r")))] + "TARGET_32BIT && arm_arch6" + "mul%?\\t%0, %1, %2" + [(set_attr "type" "mul") + (set_attr "predicable" "yes") + (set_attr "arch" "t2,t2,*") + (set_attr "length" "4") + (set_attr "predicable_short_it" "yes,yes,no")] +) + +; Unfortunately with the Thumb the '&'/'0' trick can fails when operands +; 1 and 2; are the same, because reload will make operand 0 match +; operand 1 without realizing that this conflicts with operand 2. We fix +; this by adding another alternative to match this case, and then `reload' +; it ourselves. This alternative must come first. +(define_insn "*thumb_mulsi3" + [(set (match_operand:SI 0 "register_operand" "=&l,&l,&l") + (mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0") + (match_operand:SI 2 "register_operand" "l,l,l")))] + "TARGET_THUMB1 && !arm_arch6" + "* + if (which_alternative < 2) + return \"mov\\t%0, %1\;mul\\t%0, %2\"; + else + return \"mul\\t%0, %2\"; + " + [(set_attr "length" "4,4,2") + (set_attr "type" "muls")] +) + +(define_insn "*thumb_mulsi3_v6" + [(set (match_operand:SI 0 "register_operand" "=l,l,l") + (mult:SI (match_operand:SI 1 "register_operand" "0,l,0") + (match_operand:SI 2 "register_operand" "l,0,0")))] + "TARGET_THUMB1 && arm_arch6" + "@ + mul\\t%0, %2 + mul\\t%0, %1 + mul\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "muls")] +) + +(define_insn "*mulsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 1 "s_register_operand" "%0,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (mult:SI (match_dup 2) (match_dup 1)))] + "TARGET_ARM && !arm_arch6" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "type" "muls")] +) + +(define_insn "*mulsi3_compare0_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (match_dup 2) (match_dup 1)))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "type" "muls")] +) + +(define_insn "*mulsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 1 "s_register_operand" "%0,r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=&r,&r"))] + "TARGET_ARM && !arm_arch6" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "type" "muls")] +) + +(define_insn "*mulsi_compare0_scratch_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mul%.\\t%0, %2, %1" + [(set_attr "conds" "set") + (set_attr "type" "muls")] +) + +;; Unnamed templates to match MLA instruction. + +(define_insn "*mulsi3addsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") + (plus:SI + (mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) + (match_operand:SI 3 "s_register_operand" "r,r,0,0")))] + "TARGET_32BIT && !arm_arch6" + "mla%?\\t%0, %2, %1, %3" + [(set_attr "type" "mla") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsi3addsi_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI + (mult:SI (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_32BIT && arm_arch6" + "mla%?\\t%0, %2, %1, %3" + [(set_attr "type" "mla") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "*mulsi3addsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) + (match_operand:SI 3 "s_register_operand" "r,r,0,0")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r") + (plus:SI (mult:SI (match_dup 2) (match_dup 1)) + (match_dup 3)))] + "TARGET_ARM && arm_arch6" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "type" "mlas")] +) + +(define_insn "*mulsi3addsi_compare0_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:SI 3 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (mult:SI (match_dup 2) (match_dup 1)) + (match_dup 3)))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "type" "mlas")] +) + +(define_insn "*mulsi3addsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 1 "s_register_operand" "%0,r,0,r")) + (match_operand:SI 3 "s_register_operand" "?r,r,0,0")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))] + "TARGET_ARM && !arm_arch6" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "type" "mlas")] +) + +(define_insn "*mulsi3addsi_compare0_scratch_v6" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (mult:SI + (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r")) + (match_operand:SI 3 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_ARM && arm_arch6 && optimize_size" + "mla%.\\t%0, %2, %1, %3" + [(set_attr "conds" "set") + (set_attr "type" "mlas")] +) + +(define_insn "*mulsi3subsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (minus:SI + (match_operand:SI 3 "s_register_operand" "r") + (mult:SI (match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch_thumb2" + "mls%?\\t%0, %2, %1, %3" + [(set_attr "type" "mla") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_expand "maddsidi4" + [(set (match_operand:DI 0 "s_register_operand" "") + (plus:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (match_operand:DI 3 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" + "") + +(define_insn "*mulsidi3adddi" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (plus:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r")) + (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "smlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "type" "smlal") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsidi3adddi_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r")) + (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch6" + "smlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "type" "smlal") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +;; 32x32->64 widening multiply. +;; As with mulsi3, the only difference between the v3-5 and v6+ +;; versions of these patterns is the requirement that the output not +;; overlap the inputs, but that still means we have to have a named +;; expander and two different starred insns. + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))))] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*mulsidi3_nov6" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "smull%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "smull") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulsidi3_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch6" + "smull%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "smull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))))] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*umulsidi3_nov6" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "umull%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "umull") + (set_attr "predicable" "yes")] +) + +(define_insn "*umulsidi3_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))] + "TARGET_32BIT && arm_arch6" + "umull%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "umull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_expand "umaddsidi4" + [(set (match_operand:DI 0 "s_register_operand" "") + (plus:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (match_operand:DI 3 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch3m" + "") + +(define_insn "*umulsidi3adddi" + [(set (match_operand:DI 0 "s_register_operand" "=&r") + (plus:DI + (mult:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r")) + (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "umlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "type" "umlal") + (set_attr "predicable" "yes")] +) + +(define_insn "*umulsidi3adddi_v6" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r")) + (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r"))) + (match_operand:DI 1 "s_register_operand" "0")))] + "TARGET_32BIT && arm_arch6" + "umlal%?\\t%Q0, %R0, %3, %2" + [(set_attr "type" "umlal") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_expand "smulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 ""))])] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*smulsi3_highpart_nov6" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=&r,&r"))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "smull%?\\t%3, %0, %2, %1" + [(set_attr "type" "smull") + (set_attr "predicable" "yes")] +) + +(define_insn "*smulsi3_highpart_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=r"))] + "TARGET_32BIT && arm_arch6" + "smull%?\\t%3, %0, %2, %1" + [(set_attr "type" "smull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_expand "umulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 ""))])] + "TARGET_32BIT && arm_arch3m" + "" +) + +(define_insn "*umulsi3_highpart_nov6" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%0,r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=&r,&r"))] + "TARGET_32BIT && arm_arch3m && !arm_arch6" + "umull%?\\t%3, %0, %2, %1" + [(set_attr "type" "umull") + (set_attr "predicable" "yes")] +) + +(define_insn "*umulsi3_highpart_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=r"))] + "TARGET_32BIT && arm_arch6" + "umull%?\\t%3, %0, %2, %1" + [(set_attr "type" "umull") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "s_register_operand" "%r")) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))))] + "TARGET_DSP_MULTIPLY" + "smulbb%?\\t%0, %1, %2" + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes")] +) + +(define_insn "*mulhisi3tb" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))))] + "TARGET_DSP_MULTIPLY" + "smultb%?\\t%0, %1, %2" + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "*mulhisi3bt" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "s_register_operand" "r")) + (ashiftrt:SI + (match_operand:SI 2 "s_register_operand" "r") + (const_int 16))))] + "TARGET_DSP_MULTIPLY" + "smulbt%?\\t%0, %1, %2" + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "*mulhisi3tt" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "s_register_operand" "r") + (const_int 16))))] + "TARGET_DSP_MULTIPLY" + "smultt%?\\t%0, %1, %2" + [(set_attr "type" "smulxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "maddhisi4" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (mult:SI (sign_extend:SI + (match_operand:HI 1 "s_register_operand" "r")) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_DSP_MULTIPLY" + "smlabb%?\\t%0, %1, %2, %3" + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +;; Note: there is no maddhisi4ibt because this one is canonical form +(define_insn "*maddhisi4tb" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "s_register_operand" "r"))) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_DSP_MULTIPLY" + "smlatb%?\\t%0, %1, %2, %3" + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "*maddhisi4tt" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (mult:SI (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "s_register_operand" "r") + (const_int 16))) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_DSP_MULTIPLY" + "smlatt%?\\t%0, %1, %2, %3" + [(set_attr "type" "smlaxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_insn "maddhidi4" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI (sign_extend:DI + (match_operand:HI 1 "s_register_operand" "r")) + (sign_extend:DI + (match_operand:HI 2 "s_register_operand" "r"))) + (match_operand:DI 3 "s_register_operand" "0")))] + "TARGET_DSP_MULTIPLY" + "smlalbb%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +;; Note: there is no maddhidi4ibt because this one is canonical form +(define_insn "*maddhidi4tb" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI (sign_extend:DI + (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16))) + (sign_extend:DI + (match_operand:HI 2 "s_register_operand" "r"))) + (match_operand:DI 3 "s_register_operand" "0")))] + "TARGET_DSP_MULTIPLY" + "smlaltb%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*maddhidi4tt" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (plus:DI + (mult:DI (sign_extend:DI + (ashiftrt:SI + (match_operand:SI 1 "s_register_operand" "r") + (const_int 16))) + (sign_extend:DI + (ashiftrt:SI + (match_operand:SI 2 "s_register_operand" "r") + (const_int 16)))) + (match_operand:DI 3 "s_register_operand" "0")))] + "TARGET_DSP_MULTIPLY" + "smlaltt%?\\t%Q0, %R0, %1, %2" + [(set_attr "type" "smlalxy") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_expand "mulsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (mult:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " +") + +(define_expand "muldf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (mult:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " +") + +;; Division insns + +(define_expand "divsf3" + [(set (match_operand:SF 0 "s_register_operand" "") + (div:SF (match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "") + +(define_expand "divdf3" + [(set (match_operand:DF 0 "s_register_operand" "") + (div:DF (match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "") + +;; Boolean and,ior,xor insns + +;; Split up double word logical operations + +;; Split up simple DImode logical operations. Simply perform the logical +;; operation on the upper and lower halves of the registers. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (match_operator:DI 6 "logical_binary_operator" + [(match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "s_register_operand" "")]))] + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0]))) + && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))" + [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)])) + (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" +) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (match_operator:DI 6 "logical_binary_operator" + [(sign_extend:DI (match_operand:SI 2 "s_register_operand" "")) + (match_operand:DI 1 "s_register_operand" "")]))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)])) + (set (match_dup 3) (match_op_dup:SI 6 + [(ashiftrt:SI (match_dup 2) (const_int 31)) + (match_dup 4)]))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" +) + +;; The zero extend of operand 2 means we can just copy the high part of +;; operand1 into operand0. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (ior:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")) + (match_operand:DI 1 "s_register_operand" "")))] + "TARGET_32BIT && operands[0] != operands[1] && reload_completed" + [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) (match_dup 4))] + " + { + operands[4] = gen_highpart (SImode, operands[1]); + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" +) + +;; The zero extend of operand 2 means we can just copy the high part of +;; operand1 into operand0. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (xor:DI + (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")) + (match_operand:DI 1 "s_register_operand" "")))] + "TARGET_32BIT && operands[0] != operands[1] && reload_completed" + [(set (match_dup 0) (xor:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) (match_dup 4))] + " + { + operands[4] = gen_highpart (SImode, operands[1]); + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" +) + +(define_expand "anddi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (and:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "neon_inv_logic_op2" "")))] + "TARGET_32BIT" + "" +) + +(define_insn_and_split "*anddi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))] + "TARGET_32BIT && !TARGET_IWMMXT" +{ + switch (which_alternative) + { + case 0: /* fall through */ + case 6: return "vand\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vand", &operands[2], + DImode, 1, VALID_NEON_QREG_MODE (DImode)); + case 2: + case 3: + case 4: + case 5: /* fall through */ + return "#"; + default: gcc_unreachable (); + } +} + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (AND, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (AND, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "type" "neon_logic,neon_logic,multiple,multiple,\ + multiple,multiple,neon_logic,neon_logic") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*, + avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "length" "*,*,8,8,8,8,*,*") + ] +) + +(define_insn_and_split "*anddi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + ; The zero extend of operand 2 clears the high word of the output + ; operand. + [(set (match_dup 0) (and:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) (const_int 0))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "*anddi_sesdi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "andsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (and:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (CONST_INT_P (operands[2])) + { + if (INTVAL (operands[2]) == 255 && arm_arch6) + { + operands[1] = convert_to_mode (QImode, operands[1], 1); + emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0], + operands[1])); + } + else + arm_split_constant (AND, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], + operands[1], + optimize && can_create_pseudo_p ()); + + DONE; + } + } + else /* TARGET_THUMB1 */ + { + if (!CONST_INT_P (operands[2])) + { + rtx tmp = force_reg (SImode, operands[2]); + if (rtx_equal_p (operands[0], operands[1])) + operands[2] = tmp; + else + { + operands[2] = operands[1]; + operands[1] = tmp; + } + } + else + { + int i; + + if (((unsigned HOST_WIDE_INT) ~INTVAL (operands[2])) < 256) + { + operands[2] = force_reg (SImode, + GEN_INT (~INTVAL (operands[2]))); + + emit_insn (gen_thumb1_bicsi3 (operands[0], operands[2], operands[1])); + + DONE; + } + + for (i = 9; i <= 31; i++) + { + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (operands[2])) + { + emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i), + const0_rtx)); + DONE; + } + else if ((((HOST_WIDE_INT) 1) << i) - 1 + == ~INTVAL (operands[2])) + { + rtx shift = GEN_INT (i); + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_lshrsi3 (reg, operands[1], shift)); + emit_insn (gen_ashlsi3 (operands[0], reg, shift)); + + DONE; + } + } + + operands[2] = force_reg (SImode, operands[2]); + } + } + " +) + +; ??? Check split length for Thumb-2 +(define_insn_and_split "*arm_andsi3_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r") + (and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r") + (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))] + "TARGET_32BIT" + "@ + and%?\\t%0, %1, %2 + and%?\\t%0, %1, %2 + bic%?\\t%0, %1, #%B2 + and%?\\t%0, %1, %2 + #" + "TARGET_32BIT + && CONST_INT_P (operands[2]) + && !(const_ok_for_arm (INTVAL (operands[2])) + || const_ok_for_arm (~INTVAL (operands[2])))" + [(clobber (const_int 0))] + " + arm_split_constant (AND, SImode, curr_insn, + INTVAL (operands[2]), operands[0], operands[1], 0); + DONE; + " + [(set_attr "length" "4,4,4,4,16") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no,yes,no,no,no") + (set_attr "type" "logic_imm,logic_imm,logic_reg,logic_reg,logic_imm")] +) + +(define_insn "*thumb1_andsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (and:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "and\\t%0, %2" + [(set_attr "length" "2") + (set_attr "type" "logic_imm") + (set_attr "conds" "set")]) + +(define_insn "*andsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "arm_not_operand" "I,K,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (and:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "@ + and%.\\t%0, %1, %2 + bic%.\\t%0, %1, #%B2 + and%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "logics_imm,logics_imm,logics_reg")] +) + +(define_insn "*andsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (match_operand:SI 0 "s_register_operand" "r,r,r") + (match_operand:SI 1 "arm_not_operand" "I,K,r")) + (const_int 0))) + (clobber (match_scratch:SI 2 "=X,r,X"))] + "TARGET_32BIT" + "@ + tst%?\\t%0, %1 + bic%.\\t%2, %0, #%B1 + tst%?\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "type" "logics_imm,logics_imm,logics_reg")] +) + +(define_insn "*zeroextractsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (zero_extract:SI + (match_operand:SI 0 "s_register_operand" "r") + (match_operand 1 "const_int_operand" "n") + (match_operand 2 "const_int_operand" "n")) + (const_int 0)))] + "TARGET_32BIT + && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32 + && INTVAL (operands[1]) > 0 + && INTVAL (operands[1]) + (INTVAL (operands[2]) & 1) <= 8 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32)" + "* + operands[1] = GEN_INT (((1 << INTVAL (operands[1])) - 1) + << INTVAL (operands[2])); + output_asm_insn (\"tst%?\\t%0, %1\", operands); + return \"\"; + " + [(set_attr "conds" "set") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logics_imm")] +) + +(define_insn_and_split "*ne_zeroextractsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ne:SI (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n")) + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)" + "#" + "TARGET_32BIT + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32)" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (const_int 1)))] + " + operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1) + << INTVAL (operands[3])); + " + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 12) + (const_int 8))) + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*ne_zeroextractsi_shifted" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ne:SI (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (const_int 0)) + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + "TARGET_ARM" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (const_int 1)))] + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + " + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*ite_ne_zeroextractsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (ne (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (match_operand:SI 3 "const_int_operand" "n")) + (const_int 0)) + (match_operand:SI 4 "arm_not_operand" "rIK") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32) + && !reg_overlap_mentioned_p (operands[0], operands[4])" + "#" + "TARGET_ARM + && (INTVAL (operands[3]) >= 0 && INTVAL (operands[3]) < 32 + && INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) + (INTVAL (operands[3]) & 1) <= 8 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32) + && !reg_overlap_mentioned_p (operands[0], operands[4])" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (match_dup 4)))] + " + operands[2] = GEN_INT (((1 << INTVAL (operands[2])) - 1) + << INTVAL (operands[3])); + " + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*ite_ne_zeroextractsi_shifted" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (ne (zero_extract:SI + (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "n") + (const_int 0)) + (const_int 0)) + (match_operand:SI 3 "arm_not_operand" "rIK") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])" + "#" + "TARGET_ARM && !reg_overlap_mentioned_p (operands[0], operands[3])" + [(parallel [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SI (eq (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (match_dup 0) (match_dup 3)))] + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + " + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extract:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" ""))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_THUMB1" + [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (lshiftrt:SI (match_dup 4) (match_dup 3)))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[2]); + + operands[2] = GEN_INT (32 - temp - INTVAL (operands[3])); + operands[3] = GEN_INT (32 - temp); + }" +) + +;; ??? Use Thumb-2 has bitfield insert/extract instructions. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" + [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operand:SI 5 "s_register_operand" "")])) + (clobber (match_operand:SI 6 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) + (match_op_dup 1 + [(lshiftrt:SI (match_dup 6) (match_dup 4)) + (match_dup 5)]))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[3]); + + operands[3] = GEN_INT (32 - temp - INTVAL (operands[4])); + operands[4] = GEN_INT (32 - temp); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_THUMB1" + [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 3)))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[2]); + + operands[2] = GEN_INT (32 - temp - INTVAL (operands[3])); + operands[3] = GEN_INT (32 - temp); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" + [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operand:SI 5 "s_register_operand" "")])) + (clobber (match_operand:SI 6 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 6) (ashift:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) + (match_op_dup 1 + [(ashiftrt:SI (match_dup 6) (match_dup 4)) + (match_dup 5)]))] + "{ + HOST_WIDE_INT temp = INTVAL (operands[3]); + + operands[3] = GEN_INT (32 - temp - INTVAL (operands[4])); + operands[4] = GEN_INT (32 - temp); + }" +) + +;;; ??? This pattern is bogus. If operand3 has bits outside the range +;;; represented by the bitfield, then this will produce incorrect results. +;;; Somewhere, the value needs to be truncated. On targets like the m68k, +;;; which have a real bit-field insert instruction, the truncation happens +;;; in the bit-field insert instruction itself. Since arm does not have a +;;; bit-field insert instruction, we would have to emit code here to truncate +;;; the value before we insert. This loses some of the advantage of having +;;; this insv pattern, so this pattern needs to be reevalutated. + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "general_operand" "") + (match_operand 2 "general_operand" "")) + (match_operand 3 "reg_or_int_operand" ""))] + "TARGET_ARM || arm_arch_thumb2" + " + { + int start_bit = INTVAL (operands[2]); + int width = INTVAL (operands[1]); + HOST_WIDE_INT mask = (((HOST_WIDE_INT)1) << width) - 1; + rtx target, subtarget; + + if (arm_arch_thumb2) + { + if (unaligned_access && MEM_P (operands[0]) + && s_register_operand (operands[3], GET_MODE (operands[3])) + && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0) + { + rtx base_addr; + + if (BYTES_BIG_ENDIAN) + start_bit = GET_MODE_BITSIZE (GET_MODE (operands[3])) - width + - start_bit; + + if (width == 32) + { + base_addr = adjust_address (operands[0], SImode, + start_bit / BITS_PER_UNIT); + emit_insn (gen_unaligned_storesi (base_addr, operands[3])); + } + else + { + rtx tmp = gen_reg_rtx (HImode); + + base_addr = adjust_address (operands[0], HImode, + start_bit / BITS_PER_UNIT); + emit_move_insn (tmp, gen_lowpart (HImode, operands[3])); + emit_insn (gen_unaligned_storehi (base_addr, tmp)); + } + DONE; + } + else if (s_register_operand (operands[0], GET_MODE (operands[0]))) + { + bool use_bfi = TRUE; + + if (CONST_INT_P (operands[3])) + { + HOST_WIDE_INT val = INTVAL (operands[3]) & mask; + + if (val == 0) + { + emit_insn (gen_insv_zero (operands[0], operands[1], + operands[2])); + DONE; + } + + /* See if the set can be done with a single orr instruction. */ + if (val == mask && const_ok_for_arm (val << start_bit)) + use_bfi = FALSE; + } + + if (use_bfi) + { + if (!REG_P (operands[3])) + operands[3] = force_reg (SImode, operands[3]); + + emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + } + else + FAIL; + } + + if (!s_register_operand (operands[0], GET_MODE (operands[0]))) + FAIL; + + target = copy_rtx (operands[0]); + /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical + subreg as the final target. */ + if (GET_CODE (target) == SUBREG) + { + subtarget = gen_reg_rtx (SImode); + if (GET_MODE_SIZE (GET_MODE (SUBREG_REG (target))) + < GET_MODE_SIZE (SImode)) + target = SUBREG_REG (target); + } + else + subtarget = target; + + if (CONST_INT_P (operands[3])) + { + /* Since we are inserting a known constant, we may be able to + reduce the number of bits that we have to clear so that + the mask becomes simple. */ + /* ??? This code does not check to see if the new mask is actually + simpler. It may not be. */ + rtx op1 = gen_reg_rtx (SImode); + /* ??? Truncate operand3 to fit in the bitfield. See comment before + start of this pattern. */ + HOST_WIDE_INT op3_value = mask & INTVAL (operands[3]); + HOST_WIDE_INT mask2 = ((mask & ~op3_value) << start_bit); + + emit_insn (gen_andsi3 (op1, operands[0], + gen_int_mode (~mask2, SImode))); + emit_insn (gen_iorsi3 (subtarget, op1, + gen_int_mode (op3_value << start_bit, SImode))); + } + else if (start_bit == 0 + && !(const_ok_for_arm (mask) + || const_ok_for_arm (~mask))) + { + /* A Trick, since we are setting the bottom bits in the word, + we can shift operand[3] up, operand[0] down, OR them together + and rotate the result back again. This takes 3 insns, and + the third might be mergeable into another op. */ + /* The shift up copes with the possibility that operand[3] is + wider than the bitfield. */ + rtx op0 = gen_reg_rtx (SImode); + rtx op1 = gen_reg_rtx (SImode); + + emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width))); + emit_insn (gen_lshrsi3 (op1, operands[0], operands[1])); + emit_insn (gen_iorsi3 (op1, op1, op0)); + emit_insn (gen_rotlsi3 (subtarget, op1, operands[1])); + } + else if ((width + start_bit == 32) + && !(const_ok_for_arm (mask) + || const_ok_for_arm (~mask))) + { + /* Similar trick, but slightly less efficient. */ + + rtx op0 = gen_reg_rtx (SImode); + rtx op1 = gen_reg_rtx (SImode); + + emit_insn (gen_ashlsi3 (op0, operands[3], GEN_INT (32 - width))); + emit_insn (gen_ashlsi3 (op1, operands[0], operands[1])); + emit_insn (gen_lshrsi3 (op1, op1, operands[1])); + emit_insn (gen_iorsi3 (subtarget, op1, op0)); + } + else + { + rtx op0 = gen_int_mode (mask, SImode); + rtx op1 = gen_reg_rtx (SImode); + rtx op2 = gen_reg_rtx (SImode); + + if (!(const_ok_for_arm (mask) || const_ok_for_arm (~mask))) + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp, op0)); + op0 = tmp; + } + + /* Mask out any bits in operand[3] that are not needed. */ + emit_insn (gen_andsi3 (op1, operands[3], op0)); + + if (CONST_INT_P (op0) + && (const_ok_for_arm (mask << start_bit) + || const_ok_for_arm (~(mask << start_bit)))) + { + op0 = gen_int_mode (~(mask << start_bit), SImode); + emit_insn (gen_andsi3 (op2, operands[0], op0)); + } + else + { + if (CONST_INT_P (op0)) + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp, op0)); + op0 = tmp; + } + + if (start_bit != 0) + emit_insn (gen_ashlsi3 (op0, op0, operands[2])); + + emit_insn (gen_andsi_notsi_si (op2, operands[0], op0)); + } + + if (start_bit != 0) + emit_insn (gen_ashlsi3 (op1, op1, operands[2])); + + emit_insn (gen_iorsi3 (subtarget, op1, op2)); + } + + if (subtarget != target) + { + /* If TARGET is still a SUBREG, then it must be wider than a word, + so we must be careful only to set the subword we were asked to. */ + if (GET_CODE (target) == SUBREG) + emit_move_insn (target, subtarget); + else + emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); + } + + DONE; + }" +) + +(define_insn "insv_zero" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") + (match_operand:SI 1 "const_int_operand" "M") + (match_operand:SI 2 "const_int_operand" "M")) + (const_int 0))] + "arm_arch_thumb2" + "bfc%?\t%0, %2, %1" + [(set_attr "length" "4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] +) + +(define_insn "insv_t2" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") + (match_operand:SI 1 "const_int_operand" "M") + (match_operand:SI 2 "const_int_operand" "M")) + (match_operand:SI 3 "s_register_operand" "r"))] + "arm_arch_thumb2" + "bfi%?\t%0, %3, %2, %1" + [(set_attr "length" "4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] +) + +; constants for op 2 will never be given to these patterns. +(define_insn_and_split "*anddi_notdi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r")) + (match_operand:DI 2 "s_register_operand" "r,0")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed + && ! (TARGET_NEON && IS_VFP_REGNUM (REGNO (operands[0]))) + && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))" + [(set (match_dup 0) (and:SI (not:SI (match_dup 1)) (match_dup 2))) + (set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*anddi_notzesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (not:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r"))) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "@ + bic%?\\t%Q0, %Q1, %2 + #" + ; (not (zero_extend ...)) allows us to just copy the high word from + ; operand1 to operand0. + "TARGET_32BIT + && reload_completed + && operands[0] != operands[1]" + [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (match_dup 4))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*anddi_notsesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (and:DI (not:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r"))) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (and:SI (not:SI + (ashiftrt:SI (match_dup 2) (const_int 31))) + (match_dup 4)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + +(define_insn "andsi_notsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT" + "bic%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg")] +) + +(define_insn "thumb1_bicsi3" + [(set (match_operand:SI 0 "register_operand" "=l") + (and:SI (not:SI (match_operand:SI 1 "register_operand" "l")) + (match_operand:SI 2 "register_operand" "0")))] + "TARGET_THUMB1" + "bic\\t%0, %1" + [(set_attr "length" "2") + (set_attr "conds" "set") + (set_attr "type" "logics_reg")] +) + +(define_insn "andsi_not_shiftsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rM")])) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_ARM" + "bic%?\\t%0, %1, %2%S4" + [(set_attr "predicable" "yes") + (set_attr "shift" "2") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "logic_shift_imm") + (const_string "logic_shift_reg")))] +) + +(define_insn "*andsi_notsi_si_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_dup 2)) (match_dup 1)))] + "TARGET_32BIT" + "bic%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "logics_shift_reg")] +) + +(define_insn "*andsi_notsi_si_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "bic%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "logics_shift_reg")] +) + +(define_expand "iordi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ior:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "neon_logic_op2" "")))] + "TARGET_32BIT" + "" +) + +(define_insn_and_split "*iordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0") + (match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))] + "TARGET_32BIT && !TARGET_IWMMXT" + { + switch (which_alternative) + { + case 0: /* fall through */ + case 6: return "vorr\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 7: return neon_output_logic_immediate ("vorr", &operands[2], + DImode, 0, VALID_NEON_QREG_MODE (DImode)); + case 2: + case 3: + case 4: + case 5: + return "#"; + default: gcc_unreachable (); + } + } + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (IOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (IOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "type" "neon_logic,neon_logic,multiple,multiple,multiple,\ + multiple,neon_logic,neon_logic") + (set_attr "length" "*,*,8,8,8,8,*,*") + (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")] +) + +(define_insn "*iordi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "@ + orr%?\\t%Q0, %Q1, %2 + #" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg,multiple")] +) + +(define_insn "*iordi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "type" "multiple")] +) + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (ior:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + " + if (CONST_INT_P (operands[2])) + { + if (TARGET_32BIT) + { + arm_split_constant (IOR, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } + else /* TARGET_THUMB1 */ + { + rtx tmp = force_reg (SImode, operands[2]); + if (rtx_equal_p (operands[0], operands[1])) + operands[2] = tmp; + else + { + operands[2] = operands[1]; + operands[1] = tmp; + } + } + } + " +) + +(define_insn_and_split "*iorsi3_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r") + (ior:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r") + (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))] + "TARGET_32BIT" + "@ + orr%?\\t%0, %1, %2 + orr%?\\t%0, %1, %2 + orn%?\\t%0, %1, #%B2 + orr%?\\t%0, %1, %2 + #" + "TARGET_32BIT + && CONST_INT_P (operands[2]) + && !(const_ok_for_arm (INTVAL (operands[2])) + || (TARGET_THUMB2 && const_ok_for_arm (~INTVAL (operands[2]))))" + [(clobber (const_int 0))] +{ + arm_split_constant (IOR, SImode, curr_insn, + INTVAL (operands[2]), operands[0], operands[1], 0); + DONE; +} + [(set_attr "length" "4,4,4,4,16") + (set_attr "arch" "32,t2,t2,32,32") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no,yes,no,no,no") + (set_attr "type" "logic_imm,logic_reg,logic_imm,logic_reg,logic_reg")] +) + +(define_insn "*thumb1_iorsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (ior:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "orr\\t%0, %2" + [(set_attr "length" "2") + (set_attr "conds" "set") + (set_attr "type" "logics_reg")]) + +(define_peephole2 + [(match_scratch:SI 3 "r") + (set (match_operand:SI 0 "arm_general_register_operand" "") + (ior:SI (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_ARM + && !const_ok_for_arm (INTVAL (operands[2])) + && const_ok_for_arm (~INTVAL (operands[2]))" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (ior:SI (match_dup 1) (match_dup 3)))] + "" +) + +(define_insn "*iorsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r") + (match_operand:SI 2 "arm_rhs_operand" "I,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (ior:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "orr%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "logics_imm,logics_reg")] +) + +(define_insn "*iorsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI (match_operand:SI 1 "s_register_operand" "%r,r") + (match_operand:SI 2 "arm_rhs_operand" "I,r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "orr%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "logics_imm,logics_reg")] +) + +(define_expand "xordi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (xor:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "arm_xordi_operand" "")))] + "TARGET_32BIT" + "" +) + +(define_insn_and_split "*xordi3_insn" + [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,&r,&r,?w") + (xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w") + (match_operand:DI 2 "arm_xordi_operand" "w ,r ,r ,Dg,Dg,w")))] + "TARGET_32BIT && !TARGET_IWMMXT" +{ + switch (which_alternative) + { + case 1: + case 2: + case 3: + case 4: /* fall through */ + return "#"; + case 0: /* fall through */ + case 5: return "veor\t%P0, %P1, %P2"; + default: gcc_unreachable (); + } +} + "TARGET_32BIT && !TARGET_IWMMXT && reload_completed + && !(IS_VFP_REGNUM (REGNO (operands[0])))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] + " + { + operands[3] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_highpart (SImode, operands[0]); + + operands[4] = simplify_gen_binary (XOR, SImode, + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2])); + operands[6] = simplify_gen_binary (XOR, SImode, + gen_highpart (SImode, operands[1]), + gen_highpart_mode (SImode, DImode, operands[2])); + + }" + [(set_attr "length" "*,8,8,8,8,*") + (set_attr "type" "neon_logic,multiple,multiple,multiple,multiple,neon_logic") + (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")] +) + +(define_insn "*xordi_zesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (xor:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_32BIT" + "@ + eor%?\\t%Q0, %Q1, %2 + #" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg")] +) + +(define_insn "*xordi_sesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (xor:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_32BIT" + "#" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "type" "multiple")] +) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (xor:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_EITHER" + "if (CONST_INT_P (operands[2])) + { + if (TARGET_32BIT) + { + arm_split_constant (XOR, SImode, NULL_RTX, + INTVAL (operands[2]), operands[0], operands[1], + optimize && can_create_pseudo_p ()); + DONE; + } + else /* TARGET_THUMB1 */ + { + rtx tmp = force_reg (SImode, operands[2]); + if (rtx_equal_p (operands[0], operands[1])) + operands[2] = tmp; + else + { + operands[2] = operands[1]; + operands[1] = tmp; + } + } + }" +) + +(define_insn_and_split "*arm_xorsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r") + (xor:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r") + (match_operand:SI 2 "reg_or_int_operand" "I,l,r,?n")))] + "TARGET_32BIT" + "@ + eor%?\\t%0, %1, %2 + eor%?\\t%0, %1, %2 + eor%?\\t%0, %1, %2 + #" + "TARGET_32BIT + && CONST_INT_P (operands[2]) + && !const_ok_for_arm (INTVAL (operands[2]))" + [(clobber (const_int 0))] +{ + arm_split_constant (XOR, SImode, curr_insn, + INTVAL (operands[2]), operands[0], operands[1], 0); + DONE; +} + [(set_attr "length" "4,4,4,16") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no,yes,no,no") + (set_attr "type" "logic_imm,logic_reg,logic_reg,multiple")] +) + +(define_insn "*thumb1_xorsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=l") + (xor:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "eor\\t%0, %2" + [(set_attr "length" "2") + (set_attr "conds" "set") + (set_attr "type" "logics_reg")] +) + +(define_insn "*xorsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (xor:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_rhs_operand" "I,r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (xor:SI (match_dup 1) (match_dup 2)))] + "TARGET_32BIT" + "eor%.\\t%0, %1, %2" + [(set_attr "conds" "set") + (set_attr "type" "logics_imm,logics_reg")] +) + +(define_insn "*xorsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (xor:SI (match_operand:SI 0 "s_register_operand" "r,r") + (match_operand:SI 1 "arm_rhs_operand" "I,r")) + (const_int 0)))] + "TARGET_32BIT" + "teq%?\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "type" "logics_imm,logics_reg")] +) + +; By splitting (IOR (AND (NOT A) (NOT B)) C) as D = AND (IOR A B) (NOT C), +; (NOT D) we can sometimes merge the final NOT into one of the following +; insns. + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ior:SI (and:SI (not:SI (match_operand:SI 1 "s_register_operand" "")) + (not:SI (match_operand:SI 2 "arm_rhs_operand" ""))) + (match_operand:SI 3 "arm_rhs_operand" ""))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_32BIT" + [(set (match_dup 4) (and:SI (ior:SI (match_dup 1) (match_dup 2)) + (not:SI (match_dup 3)))) + (set (match_dup 0) (not:SI (match_dup 4)))] + "" +) + +(define_insn_and_split "*andsi_iorsi3_notsi" + [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r") + (and:SI (ior:SI (match_operand:SI 1 "s_register_operand" "%0,r,r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")) + (not:SI (match_operand:SI 3 "arm_rhs_operand" "rI,rI,rI"))))] + "TARGET_32BIT" + "#" ; "orr%?\\t%0, %1, %2\;bic%?\\t%0, %0, %3" + "&& reload_completed" + [(set (match_dup 0) (ior:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (and:SI (not:SI (match_dup 3)) (match_dup 0)))] + "" + [(set_attr "length" "8") + (set_attr "ce_count" "2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + +; ??? Are these four splitters still beneficial when the Thumb-2 bitfield +; insns are available? +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(zero_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operator:SI 9 "logical_binary_operator" + [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")])])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(lshiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(match_operator:SI 9 "logical_binary_operator" + [(lshiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")]) + (zero_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" ""))])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(lshiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(sign_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (match_operator:SI 9 "logical_binary_operator" + [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")])])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(ashiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "logical_binary_operator" + [(match_operator:SI 9 "logical_binary_operator" + [(ashiftrt:SI (match_operand:SI 5 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (match_operand:SI 7 "s_register_operand" "")]) + (sign_extract:SI (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "const_int_operand" "") + (match_operand:SI 4 "const_int_operand" ""))])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT + && GET_CODE (operands[1]) == GET_CODE (operands[9]) + && INTVAL (operands[3]) == 32 - INTVAL (operands[6])" + [(set (match_dup 8) + (match_op_dup 1 + [(ashift:SI (match_dup 2) (match_dup 4)) + (match_dup 5)])) + (set (match_dup 0) + (match_op_dup 1 + [(ashiftrt:SI (match_dup 8) (match_dup 6)) + (match_dup 7)]))] + " + operands[4] = GEN_INT (32 - (INTVAL (operands[3]) + INTVAL (operands[4]))); +") + + +;; Minimum and maximum insns + +(define_expand "smaxsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (smax:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + " + if (operands[2] == const0_rtx || operands[2] == constm1_rtx) + { + /* No need for a clobber of the condition code register here. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_SMAX (SImode, operands[1], + operands[2]))); + DONE; + } +") + +(define_insn "*smax_0" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (smax:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 0)))] + "TARGET_32BIT" + "bic%?\\t%0, %1, %1, asr #31" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_shift_reg")] +) + +(define_insn "*smax_m1" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (smax:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int -1)))] + "TARGET_32BIT" + "orr%?\\t%0, %1, %1, asr #31" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_shift_reg")] +) + +(define_insn_and_split "*arm_smax_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (smax:SI (match_operand:SI 1 "s_register_operand" "%0,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + ; cmp\\t%1, %2\;movlt\\t%0, %2 + ; cmp\\t%1, %2\;movge\\t%0, %1\;movlt\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (ge:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_expand "sminsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (smin:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + " + if (operands[2] == const0_rtx) + { + /* No need for a clobber of the condition code register here. */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_SMIN (SImode, operands[1], + operands[2]))); + DONE; + } +") + +(define_insn "*smin_0" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (smin:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 0)))] + "TARGET_32BIT" + "and%?\\t%0, %1, %1, asr #31" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_shift_reg")] +) + +(define_insn_and_split "*arm_smin_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (smin:SI (match_operand:SI 1 "s_register_operand" "%0,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + ; cmp\\t%1, %2\;movge\\t%0, %2 + ; cmp\\t%1, %2\;movlt\\t%0, %1\;movge\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (lt:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple,multiple")] +) + +(define_expand "umaxsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (umax:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + "" +) + +(define_insn_and_split "*arm_umaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (umax:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + ; cmp\\t%1, %2\;movcc\\t%0, %2 + ; cmp\\t%1, %2\;movcs\\t%0, %1 + ; cmp\\t%1, %2\;movcs\\t%0, %1\;movcc\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "length" "8,8,12") + (set_attr "type" "store1")] +) + +(define_expand "uminsi3" + [(parallel [ + (set (match_operand:SI 0 "s_register_operand" "") + (umin:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_32BIT" + "" +) + +(define_insn_and_split "*arm_uminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (umin:SI (match_operand:SI 1 "s_register_operand" "0,r,?r") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + ; cmp\\t%1, %2\;movcs\\t%0, %2 + ; cmp\\t%1, %2\;movcc\\t%0, %1 + ; cmp\\t%1, %2\;movcc\\t%0, %1\;movcs\\t%0, %2" + "TARGET_ARM" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (if_then_else:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "length" "8,8,12") + (set_attr "type" "store1")] +) + +(define_insn "*store_minmaxsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (match_operator:SI 3 "minmax_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && optimize_function_for_size_p (cfun)" + "* + operands[3] = gen_rtx_fmt_ee (minmax_code (operands[3]), SImode, + operands[1], operands[2]); + output_asm_insn (\"cmp\\t%1, %2\", operands); + if (TARGET_THUMB2) + output_asm_insn (\"ite\t%d3\", operands); + output_asm_insn (\"str%d3\\t%1, %0\", operands); + output_asm_insn (\"str%D3\\t%2, %0\", operands); + return \"\"; + " + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 14) + (const_int 12))) + (set_attr "type" "store1")] +) + +; Reject the frame pointer in operand[1], since reloading this after +; it has been eliminated can cause carnage. +(define_insn "*minmax_arithsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 4 "shiftable_operator" + [(match_operator:SI 5 "minmax_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !arm_eliminable_register (operands[1]) && !arm_restrict_it" + "* + { + enum rtx_code code = GET_CODE (operands[4]); + bool need_else; + + if (which_alternative != 0 || operands[3] != const0_rtx + || (code != PLUS && code != IOR && code != XOR)) + need_else = true; + else + need_else = false; + + operands[5] = gen_rtx_fmt_ee (minmax_code (operands[5]), SImode, + operands[2], operands[3]); + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (TARGET_THUMB2) + { + if (need_else) + output_asm_insn (\"ite\\t%d5\", operands); + else + output_asm_insn (\"it\\t%d5\", operands); + } + output_asm_insn (\"%i4%d5\\t%0, %1, %2\", operands); + if (need_else) + output_asm_insn (\"%i4%D5\\t%0, %1, %3\", operands); + return \"\"; + }" + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 14) + (const_int 12))) + (set_attr "type" "multiple")] +) + +; Reject the frame pointer in operand[1], since reloading this after +; it has been eliminated can cause carnage. +(define_insn_and_split "*minmax_arithsi_non_canon" + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") + (minus:SI + (match_operand:SI 1 "s_register_operand" "0,?Ts") + (match_operator:SI 4 "minmax_operator" + [(match_operand:SI 2 "s_register_operand" "Ts,Ts") + (match_operand:SI 3 "arm_rhs_operand" "TsI,TsI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && !arm_eliminable_register (operands[1]) + && !(arm_restrict_it && CONST_INT_P (operands[3]))" + "#" + "TARGET_32BIT && !arm_eliminable_register (operands[1]) && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 3))) + + (cond_exec (match_op_dup 4 [(reg:CC CC_REGNUM) (const_int 0)]) + (set (match_dup 0) + (minus:SI (match_dup 1) + (match_dup 2)))) + (cond_exec (match_op_dup 5 [(reg:CC CC_REGNUM) (const_int 0)]) + (set (match_dup 0) + (match_dup 6)))] + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = minmax_code (operands[4]); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, + operands[2], operands[3]); + + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, SImode, operands[2], operands[3]); + if (CONST_INT_P (operands[3])) + operands[6] = plus_constant (SImode, operands[1], -INTVAL (operands[3])); + else + operands[6] = gen_rtx_MINUS (SImode, operands[1], operands[3]); + } + [(set_attr "conds" "clob") + (set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 14) + (const_int 12))) + (set_attr "type" "multiple")] +) + +(define_code_iterator SAT [smin smax]) +(define_code_iterator SATrev [smin smax]) +(define_code_attr SATlo [(smin "1") (smax "2")]) +(define_code_attr SAThi [(smin "2") (smax "1")]) + +(define_insn "*satsi_" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (SAT:SI (SATrev:SI (match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 1 "const_int_operand" "i")) + (match_operand:SI 2 "const_int_operand" "i")))] + "TARGET_32BIT && arm_arch6 && != + && arm_sat_operator_match (operands[], operands[], NULL, NULL)" +{ + int mask; + bool signed_sat; + if (!arm_sat_operator_match (operands[], operands[], + &mask, &signed_sat)) + gcc_unreachable (); + + operands[1] = GEN_INT (mask); + if (signed_sat) + return "ssat%?\t%0, %1, %3"; + else + return "usat%?\t%0, %1, %3"; +} + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alus_imm")] +) + +(define_insn "*satsi__shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (SAT:SI (SATrev:SI (match_operator:SI 3 "sat_shift_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "const_int_operand" "i")]) + (match_operand:SI 1 "const_int_operand" "i")) + (match_operand:SI 2 "const_int_operand" "i")))] + "TARGET_32BIT && arm_arch6 && != + && arm_sat_operator_match (operands[], operands[], NULL, NULL)" +{ + int mask; + bool signed_sat; + if (!arm_sat_operator_match (operands[], operands[], + &mask, &signed_sat)) + gcc_unreachable (); + + operands[1] = GEN_INT (mask); + if (signed_sat) + return "ssat%?\t%0, %1, %4%S3"; + else + return "usat%?\t%0, %1, %4%S3"; +} + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "3") + (set_attr "type" "logic_shift_reg")]) + +;; Shift and rotation insns + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ashift:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "TARGET_32BIT" + " + if (TARGET_NEON) + { + /* Delay the decision whether to use NEON or core-regs until + register allocation. */ + emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2])); + DONE; + } + else + { + /* Only the NEON case can handle in-memory shift counts. */ + if (!reg_or_int_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + } + + if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT) + ; /* No special preparation statements; expand pattern as above. */ + else + { + rtx scratch1, scratch2; + + if (CONST_INT_P (operands[2]) + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) + { + emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); + DONE; + } + + /* Ideally we should use iwmmxt here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + + /* If we're optimizing for size, we prefer the libgcc calls. */ + if (optimize_function_for_size_p (cfun)) + FAIL; + + /* Expand operation using core-registers. + 'FAIL' would achieve the same thing, but this is a bit smarter. */ + scratch1 = gen_reg_rtx (SImode); + scratch2 = gen_reg_rtx (SImode); + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], + operands[2], scratch1, scratch2); + DONE; + } + " +) + +(define_insn "arm_ashldi3_1bit" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (ashift:DI (match_operand:DI 1 "s_register_operand" "0,r") + (const_int 1))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "movs\\t%Q0, %Q1, asl #1\;adc\\t%R0, %R1, %R1" + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (ashift:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (CONST_INT_P (operands[2]) + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + DONE; + } + " +) + +(define_insn "*thumb1_ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashift:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1" + "lsl\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "type" "shift_imm,shift_reg") + (set_attr "conds" "set")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (TARGET_NEON) + { + /* Delay the decision whether to use NEON or core-regs until + register allocation. */ + emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2])); + DONE; + } + + if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT) + ; /* No special preparation statements; expand pattern as above. */ + else + { + rtx scratch1, scratch2; + + if (CONST_INT_P (operands[2]) + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) + { + emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); + DONE; + } + + /* Ideally we should use iwmmxt here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + + /* If we're optimizing for size, we prefer the libgcc calls. */ + if (optimize_function_for_size_p (cfun)) + FAIL; + + /* Expand operation using core-registers. + 'FAIL' would achieve the same thing, but this is a bit smarter. */ + scratch1 = gen_reg_rtx (SImode); + scratch2 = gen_reg_rtx (SImode); + arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], + operands[2], scratch1, scratch2); + DONE; + } + " +) + +(define_insn "arm_ashrdi3_1bit" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r") + (const_int 1))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "movs\\t%R0, %R1, asr #1\;mov\\t%Q0, %Q1, rrx" + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (ashiftrt:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (CONST_INT_P (operands[2]) + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + operands[2] = GEN_INT (31); + " +) + +(define_insn "*thumb1_ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1" + "asr\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "type" "shift_imm,shift_reg") + (set_attr "conds" "set")]) + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "s_register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (TARGET_NEON) + { + /* Delay the decision whether to use NEON or core-regs until + register allocation. */ + emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2])); + DONE; + } + + if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT) + ; /* No special preparation statements; expand pattern as above. */ + else + { + rtx scratch1, scratch2; + + if (CONST_INT_P (operands[2]) + && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) + { + emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1])); + DONE; + } + + /* Ideally we should use iwmmxt here if we could know that operands[1] + ends up already living in an iwmmxt register. Otherwise it's + cheaper to have the alternate code being generated than moving + values to iwmmxt regs and back. */ + + /* If we're optimizing for size, we prefer the libgcc calls. */ + if (optimize_function_for_size_p (cfun)) + FAIL; + + /* Expand operation using core-registers. + 'FAIL' would achieve the same thing, but this is a bit smarter. */ + scratch1 = gen_reg_rtx (SImode); + scratch2 = gen_reg_rtx (SImode); + arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1], + operands[2], scratch1, scratch2); + DONE; + } + " +) + +(define_insn "arm_lshrdi3_1bit" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" "0,r") + (const_int 1))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "movs\\t%R0, %R1, lsr #1\;mov\\t%Q0, %Q1, rrx" + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (lshiftrt:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (CONST_INT_P (operands[2]) + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + DONE; + } + " +) + +(define_insn "*thumb1_lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,0") + (match_operand:SI 2 "nonmemory_operand" "N,l")))] + "TARGET_THUMB1" + "lsr\\t%0, %1, %2" + [(set_attr "length" "2") + (set_attr "type" "shift_imm,shift_reg") + (set_attr "conds" "set")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (rotatert:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "reg_or_int_operand" "")))] + "TARGET_32BIT" + " + if (CONST_INT_P (operands[2])) + operands[2] = GEN_INT ((32 - INTVAL (operands[2])) % 32); + else + { + rtx reg = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (reg, GEN_INT (32), operands[2])); + operands[2] = reg; + } + " +) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "s_register_operand" "") + (rotatert:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" "")))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (CONST_INT_P (operands[2]) + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + operands[2] = GEN_INT (INTVAL (operands[2]) % 32); + } + else /* TARGET_THUMB1 */ + { + if (CONST_INT_P (operands [2])) + operands [2] = force_reg (SImode, operands[2]); + } + " +) + +(define_insn "*thumb1_rotrsi3" + [(set (match_operand:SI 0 "register_operand" "=l") + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "l")))] + "TARGET_THUMB1" + "ror\\t%0, %0, %2" + [(set_attr "type" "shift_reg") + (set_attr "length" "2")] +) + +(define_insn "*arm_shiftsi3" + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "0,l,r,r") + (match_operand:SI 2 "reg_or_int_operand" "l,M,M,r")]))] + "TARGET_32BIT" + "* return arm_output_shift(operands, 0);" + [(set_attr "predicable" "yes") + (set_attr "arch" "t2,t2,*,*") + (set_attr "predicable_short_it" "yes,yes,no,no") + (set_attr "length" "4") + (set_attr "shift" "1") + (set_attr "type" "alu_shift_reg,alu_shift_imm,alu_shift_imm,alu_shift_reg")] +) + +(define_insn "*shiftsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_rhs_operand" "M,r")]) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_op_dup 3 [(match_dup 1) (match_dup 2)]))] + "TARGET_32BIT" + "* return arm_output_shift(operands, 1);" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "type" "alus_shift_imm,alus_shift_reg")] +) + +(define_insn "*shiftsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "arm_rhs_operand" "M,r")]) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "* return arm_output_shift(operands, 1);" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "type" "shift_imm,shift_reg")] +) + +(define_insn "*not_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (not:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])))] + "TARGET_32BIT" + "mvn%?\\t%0, %1%S3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "1") + (set_attr "arch" "32,a") + (set_attr "type" "mvn_shift,mvn_shift_reg")]) + +(define_insn "*not_shiftsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (not:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))] + "TARGET_32BIT" + "mvn%.\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a") + (set_attr "type" "mvn_shift,mvn_shift_reg")]) + +(define_insn "*not_shiftsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (not:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r") + (match_operand:SI 2 "shift_amount_operand" "M,rM")])) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "mvn%.\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a") + (set_attr "type" "mvn_shift,mvn_shift_reg")]) + +;; We don't really have extzv, but defining this using shifts helps +;; to reduce register pressure later on. + +(define_expand "extzv" + [(set (match_operand 0 "s_register_operand" "") + (zero_extract (match_operand 1 "nonimmediate_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "TARGET_THUMB1 || arm_arch_thumb2" + " + { + HOST_WIDE_INT lshift = 32 - INTVAL (operands[2]) - INTVAL (operands[3]); + HOST_WIDE_INT rshift = 32 - INTVAL (operands[2]); + + if (arm_arch_thumb2) + { + HOST_WIDE_INT width = INTVAL (operands[2]); + HOST_WIDE_INT bitpos = INTVAL (operands[3]); + + if (unaligned_access && MEM_P (operands[1]) + && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0) + { + rtx base_addr; + + if (BYTES_BIG_ENDIAN) + bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width + - bitpos; + + if (width == 32) + { + base_addr = adjust_address (operands[1], SImode, + bitpos / BITS_PER_UNIT); + emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); + } + else + { + rtx dest = operands[0]; + rtx tmp = gen_reg_rtx (SImode); + + /* We may get a paradoxical subreg here. Strip it off. */ + if (GET_CODE (dest) == SUBREG + && GET_MODE (dest) == SImode + && GET_MODE (SUBREG_REG (dest)) == HImode) + dest = SUBREG_REG (dest); + + if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) + FAIL; + + base_addr = adjust_address (operands[1], HImode, + bitpos / BITS_PER_UNIT); + emit_insn (gen_unaligned_loadhiu (tmp, base_addr)); + emit_move_insn (gen_lowpart (SImode, dest), tmp); + } + DONE; + } + else if (s_register_operand (operands[1], GET_MODE (operands[1]))) + { + emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + else + FAIL; + } + + if (!s_register_operand (operands[1], GET_MODE (operands[1]))) + FAIL; + + operands[3] = GEN_INT (rshift); + + if (lshift == 0) + { + emit_insn (gen_lshrsi3 (operands[0], operands[1], operands[3])); + DONE; + } + + emit_insn (gen_extzv_t1 (operands[0], operands[1], GEN_INT (lshift), + operands[3], gen_reg_rtx (SImode))); + DONE; + }" +) + +;; Helper for extzv, for the Thumb-1 register-shifts case. + +(define_expand "extzv_t1" + [(set (match_operand:SI 4 "s_register_operand" "") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 0 "s_register_operand" "") + (lshiftrt:SI (match_dup 4) + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_THUMB1" + "") + +(define_expand "extv" + [(set (match_operand 0 "s_register_operand" "") + (sign_extract (match_operand 1 "nonimmediate_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "arm_arch_thumb2" +{ + HOST_WIDE_INT width = INTVAL (operands[2]); + HOST_WIDE_INT bitpos = INTVAL (operands[3]); + + if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32) + && (bitpos % BITS_PER_UNIT) == 0) + { + rtx base_addr; + + if (BYTES_BIG_ENDIAN) + bitpos = GET_MODE_BITSIZE (GET_MODE (operands[0])) - width - bitpos; + + if (width == 32) + { + base_addr = adjust_address (operands[1], SImode, + bitpos / BITS_PER_UNIT); + emit_insn (gen_unaligned_loadsi (operands[0], base_addr)); + } + else + { + rtx dest = operands[0]; + rtx tmp = gen_reg_rtx (SImode); + + /* We may get a paradoxical subreg here. Strip it off. */ + if (GET_CODE (dest) == SUBREG + && GET_MODE (dest) == SImode + && GET_MODE (SUBREG_REG (dest)) == HImode) + dest = SUBREG_REG (dest); + + if (GET_MODE_BITSIZE (GET_MODE (dest)) != width) + FAIL; + + base_addr = adjust_address (operands[1], HImode, + bitpos / BITS_PER_UNIT); + emit_insn (gen_unaligned_loadhis (tmp, base_addr)); + emit_move_insn (gen_lowpart (SImode, dest), tmp); + } + + DONE; + } + else if (!s_register_operand (operands[1], GET_MODE (operands[1]))) + FAIL; + else if (GET_MODE (operands[0]) == SImode + && GET_MODE (operands[1]) == SImode) + { + emit_insn (gen_extv_regsi (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + + FAIL; +}) + +; Helper to expand register forms of extv with the proper modes. + +(define_expand "extv_regsi" + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "" +{ +}) + +; ARMv6+ unaligned load/store instructions (used for packed structure accesses). + +(define_insn "unaligned_loadsi" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")] + UNSPEC_UNALIGNED_LOAD))] + "unaligned_access && TARGET_32BIT" + "ldr%?\t%0, %1\t@ unaligned" + [(set_attr "arch" "t2,any") + (set_attr "length" "2,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "load1")]) + +(define_insn "unaligned_loadhis" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (sign_extend:SI + (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] + UNSPEC_UNALIGNED_LOAD)))] + "unaligned_access && TARGET_32BIT" + "ldr%(sh%)\t%0, %1\t@ unaligned" + [(set_attr "arch" "t2,any") + (set_attr "length" "2,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "load_byte")]) + +(define_insn "unaligned_loadhiu" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (zero_extend:SI + (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] + UNSPEC_UNALIGNED_LOAD)))] + "unaligned_access && TARGET_32BIT" + "ldr%(h%)\t%0, %1\t@ unaligned" + [(set_attr "arch" "t2,any") + (set_attr "length" "2,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "load_byte")]) + +(define_insn "unaligned_storesi" + [(set (match_operand:SI 0 "memory_operand" "=Uw,m") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")] + UNSPEC_UNALIGNED_STORE))] + "unaligned_access && TARGET_32BIT" + "str%?\t%1, %0\t@ unaligned" + [(set_attr "arch" "t2,any") + (set_attr "length" "2,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "store1")]) + +(define_insn "unaligned_storehi" + [(set (match_operand:HI 0 "memory_operand" "=Uw,m") + (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")] + UNSPEC_UNALIGNED_STORE))] + "unaligned_access && TARGET_32BIT" + "str%(h%)\t%1, %0\t@ unaligned" + [(set_attr "arch" "t2,any") + (set_attr "length" "2,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "type" "store1")]) + +;; Unaligned double-word load and store. +;; Split after reload into two unaligned single-word accesses. +;; It prevents lower_subreg from splitting some other aligned +;; double-word accesses too early. Used for internal memcpy. + +(define_insn_and_split "unaligned_loaddi" + [(set (match_operand:DI 0 "s_register_operand" "=l,r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")] + UNSPEC_UNALIGNED_LOAD))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* If the first destination register overlaps with the base address, + swap the order in which the loads are emitted. */ + if (reg_overlap_mentioned_p (operands[0], operands[1])) + { + rtx tmp = operands[1]; + operands[1] = operands[3]; + operands[3] = tmp; + tmp = operands[0]; + operands[0] = operands[2]; + operands[2] = tmp; + } + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "load2")]) + +(define_insn_and_split "unaligned_storedi" + [(set (match_operand:DI 0 "memory_operand" "=o,o") + (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")] + UNSPEC_UNALIGNED_STORE))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "store2")]) + + +(define_insn "*extv_reg" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M") + (match_operand:SI 3 "const_int_operand" "M")))] + "arm_arch_thumb2" + "sbfx%?\t%0, %1, %3, %2" + [(set_attr "length" "4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] +) + +(define_insn "extzv_t2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M") + (match_operand:SI 3 "const_int_operand" "M")))] + "arm_arch_thumb2" + "ubfx%?\t%0, %1, %3, %2" + [(set_attr "length" "4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "bfm")] +) + + +;; Division instructions +(define_insn "divsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (div:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_IDIV" + "sdiv%?\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "sdiv")] +) + +(define_insn "udivsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (udiv:SI (match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_IDIV" + "udiv%?\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "udiv")] +) + + +;; Unary arithmetic insns + +(define_expand "negdi2" + [(parallel + [(set (match_operand:DI 0 "s_register_operand" "") + (neg:DI (match_operand:DI 1 "s_register_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_EITHER" + { + if (TARGET_NEON) + { + emit_insn (gen_negdi2_neon (operands[0], operands[1])); + DONE; + } + } +) + +;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1). +;; The first alternative allows the common case of a *full* overlap. +(define_insn_and_split "*arm_negdi2" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (neg:DI (match_operand:DI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0" + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 2) (minus:SI (minus:SI (const_int 0) (match_dup 3)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb1_negdi2" + [(set (match_operand:DI 0 "register_operand" "=&l") + (neg:DI (match_operand:DI 1 "register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB1" + "mov\\t%R0, #0\;neg\\t%Q0, %Q1\;sbc\\t%R0, %R1" + [(set_attr "length" "6") + (set_attr "type" "multiple")] +) + +(define_expand "negsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (neg:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_EITHER" + "" +) + +(define_insn "*arm_negsi2" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))] + "TARGET_32BIT" + "rsb%?\\t%0, %1, #0" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "arch" "t2,*") + (set_attr "length" "4") + (set_attr "type" "alu_reg")] +) + +(define_insn "*thumb1_negsi2" + [(set (match_operand:SI 0 "register_operand" "=l") + (neg:SI (match_operand:SI 1 "register_operand" "l")))] + "TARGET_THUMB1" + "neg\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "alu_imm")] +) + +(define_expand "negsf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (neg:SF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "" +) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (neg:DF (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "") + +(define_insn_and_split "*zextendsidi_negsi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (zero_extend:DI (neg:SI (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT" + "#" + "" + [(set (match_dup 2) + (neg:SI (match_dup 1))) + (set (match_dup 3) + (const_int 0))] + { + operands[2] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[0]); + } + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + +;; Negate an extended 32-bit value. +(define_insn_and_split "*negdi_extendsidi" + [(set (match_operand:DI 0 "s_register_operand" "=l,r") + (neg:DI (sign_extend:DI + (match_operand:SI 1 "s_register_operand" "l,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx low = gen_lowpart (SImode, operands[0]); + rtx high = gen_highpart (SImode, operands[0]); + + if (reg_overlap_mentioned_p (low, operands[1])) + { + /* Input overlaps the low word of the output. Use: + asr Rhi, Rin, #31 + rsbs Rlo, Rin, #0 + rsc Rhi, Rhi, #0 (thumb2: sbc Rhi, Rhi, Rhi, lsl #1). */ + rtx cc_reg = gen_rtx_REG (CC_Cmode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_ASHIFTRT (SImode, operands[1], + GEN_INT (31)))); + + emit_insn (gen_subsi3_compare (low, const0_rtx, operands[1])); + if (TARGET_ARM) + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_MINUS (SImode, + gen_rtx_MINUS (SImode, + const0_rtx, + high), + gen_rtx_LTU (SImode, + cc_reg, + const0_rtx)))); + else + { + rtx two_x = gen_rtx_ASHIFT (SImode, high, GEN_INT (1)); + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_MINUS (SImode, + gen_rtx_MINUS (SImode, + high, + two_x), + gen_rtx_LTU (SImode, + cc_reg, + const0_rtx)))); + } + } + else + { + /* No overlap, or overlap on high word. Use: + rsb Rlo, Rin, #0 + bic Rhi, Rlo, Rin + asr Rhi, Rhi, #31 + Flags not needed for this sequence. */ + emit_insn (gen_rtx_SET (VOIDmode, low, + gen_rtx_NEG (SImode, operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_AND (SImode, + gen_rtx_NOT (SImode, operands[1]), + low))); + emit_insn (gen_rtx_SET (VOIDmode, high, + gen_rtx_ASHIFTRT (SImode, high, + GEN_INT (31)))); + } + DONE; + } + [(set_attr "length" "12") + (set_attr "arch" "t2,*") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*negdi_zero_extendsidi" + [(set (match_operand:DI 0 "s_register_operand" "=r,&r") + (neg:DI (zero_extend:DI (match_operand:SI 1 "s_register_operand" "0,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" ; "rsbs\\t%Q0, %1, #0\;sbc\\t%R0,%R0,%R0" + ;; Don't care what register is input to sbc, + ;; since we just just need to propagate the carry. + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 2) (minus:SI (minus:SI (match_dup 2) (match_dup 2)) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] ;; length in thumb is 4 +) + +;; abssi2 doesn't really clobber the condition codes if a different register +;; is being set. To keep things simple, assume during rtl manipulations that +;; it does, but tell the final scan operator the truth. Similarly for +;; (neg (abs...)) + +(define_expand "abssi2" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (abs:SI (match_operand:SI 1 "s_register_operand" ""))) + (clobber (match_dup 2))])] + "TARGET_EITHER" + " + if (TARGET_THUMB1) + operands[2] = gen_rtx_SCRATCH (SImode); + else + operands[2] = gen_rtx_REG (CCmode, CC_REGNUM); +") + +(define_insn_and_split "*arm_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,&r") + (abs:SI (match_operand:SI 1 "s_register_operand" "0,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + "&& reload_completed" + [(const_int 0)] + { + /* if (which_alternative == 0) */ + if (REGNO(operands[0]) == REGNO(operands[1])) + { + /* Emit the pattern: + cmp\\t%0, #0\;rsblt\\t%0, %0, #0 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (const_int 0))) + (cond_exec (lt:CC (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1))))] + */ + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + (gen_rtx_LT (SImode, + gen_rtx_REG (CCmode, CC_REGNUM), + const0_rtx)), + (gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1])))))); + DONE; + } + else + { + /* Emit the pattern: + alt1: eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31 + [(set (match_dup 0) + (xor:SI (match_dup 1) + (ashiftrt:SI (match_dup 1) (const_int 31)))) + (set (match_dup 0) + (minus:SI (match_dup 0) + (ashiftrt:SI (match_dup 1) (const_int 31))))] + */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31))))); + DONE; + } + } + [(set_attr "conds" "clob,*") + (set_attr "shift" "1") + (set_attr "predicable" "no, yes") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb1_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (abs:SI (match_operand:SI 1 "s_register_operand" "l"))) + (clobber (match_scratch:SI 2 "=&l"))] + "TARGET_THUMB1" + "#" + "TARGET_THUMB1 && reload_completed" + [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))] + "" + [(set_attr "length" "6") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*arm_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,&r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "0,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + "&& reload_completed" + [(const_int 0)] + { + /* if (which_alternative == 0) */ + if (REGNO (operands[0]) == REGNO (operands[1])) + { + /* Emit the pattern: + cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 + */ + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_GT (SImode, + gen_rtx_REG (CCmode, CC_REGNUM), + const0_rtx), + gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1]))))); + } + else + { + /* Emit the pattern: + eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31 + */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[0]))); + } + DONE; + } + [(set_attr "conds" "clob,*") + (set_attr "shift" "1") + (set_attr "predicable" "no, yes") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb1_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "l")))) + (clobber (match_scratch:SI 2 "=&l"))] + "TARGET_THUMB1" + "#" + "TARGET_THUMB1 && reload_completed" + [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31))) + (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 1))) + (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 2)))] + "" + [(set_attr "length" "6") + (set_attr "type" "multiple")] +) + +(define_expand "abssf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (abs:SF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + "") + +(define_expand "absdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (abs:DF (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "") + +(define_expand "sqrtsf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (sqrt:SF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "") + +(define_expand "sqrtdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "") + +(define_insn_and_split "one_cmpldi2" + [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,?w") + (not:DI (match_operand:DI 1 "s_register_operand" " w, 0, r, w")))] + "TARGET_32BIT" + "@ + vmvn\t%P0, %P1 + # + # + vmvn\t%P0, %P1" + "TARGET_32BIT && reload_completed + && arm_general_register_operand (operands[0], DImode)" + [(set (match_dup 0) (not:SI (match_dup 1))) + (set (match_dup 2) (not:SI (match_dup 3)))] + " + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "*,8,8,*") + (set_attr "predicable" "no,yes,yes,no") + (set_attr "type" "neon_move,multiple,multiple,neon_move") + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")] +) + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (not:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_EITHER" + "" +) + +(define_insn "*arm_one_cmplsi2" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (not:SI (match_operand:SI 1 "s_register_operand" "l,r")))] + "TARGET_32BIT" + "mvn%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no") + (set_attr "arch" "t2,*") + (set_attr "length" "4") + (set_attr "type" "mvn_reg")] +) + +(define_insn "*thumb1_one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=l") + (not:SI (match_operand:SI 1 "register_operand" "l")))] + "TARGET_THUMB1" + "mvn\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "mvn_reg")] +) + +(define_insn "*notsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_dup 1)))] + "TARGET_32BIT" + "mvn%.\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "type" "mvn_reg")] +) + +(define_insn "*notsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "TARGET_32BIT" + "mvn%.\\t%0, %1" + [(set_attr "conds" "set") + (set_attr "type" "mvn_reg")] +) + +;; Fixed <--> Floating conversion insns + +(define_expand "floatsihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:SI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatdihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:DI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (float:SF (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " +") + +(define_expand "floatsidf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (float:DF (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " +") + +(define_expand "fix_trunchfsi2" + [(set (match_operand:SI 0 "general_operand" "") + (fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_trunchfdi2" + [(set (match_operand:DI 0 "general_operand" "") + (fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " +") + +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" ""))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + " +") + +;; Truncation insns + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "s_register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "" +) + +/* DFmode -> HFmode conversions have to go through SFmode. */ +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "general_operand" "") + (float_truncate:HF + (match_operand:DF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +;; Zero and sign extension instructions. + +(define_insn "zero_extenddi2" + [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,w") + (zero_extend:DI (match_operand:QHSI 1 "" + "")))] + "TARGET_32BIT " + "#" + [(set_attr "length" "8,4,8,8") + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits") + (set_attr "ce_count" "2") + (set_attr "predicable" "yes") + (set_attr "type" "multiple,mov_reg,multiple,multiple")] +) + +(define_insn "extenddi2" + [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,?r,w") + (sign_extend:DI (match_operand:QHSI 1 "" + "")))] + "TARGET_32BIT " + "#" + [(set_attr "length" "8,4,8,8,8") + (set_attr "ce_count" "2") + (set_attr "shift" "1") + (set_attr "predicable" "yes") + (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits") + (set_attr "type" "multiple,mov_reg,multiple,multiple,multiple")] +) + +;; Splits for all extensions to DImode +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand 1 "nonimmediate_operand" "")))] + "TARGET_32BIT && reload_completed && !IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 0) (match_dup 1))] +{ + rtx lo_part = gen_lowpart (SImode, operands[0]); + enum machine_mode src_mode = GET_MODE (operands[1]); + + if (REG_P (operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1])) + emit_clobber (operands[0]); + if (!REG_P (lo_part) || src_mode != SImode + || !rtx_equal_p (lo_part, operands[1])) + { + if (src_mode == SImode) + emit_move_insn (lo_part, operands[1]); + else + emit_insn (gen_rtx_SET (VOIDmode, lo_part, + gen_rtx_ZERO_EXTEND (SImode, operands[1]))); + operands[1] = lo_part; + } + operands[0] = gen_highpart (SImode, operands[0]); + operands[1] = const0_rtx; +}) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand 1 "nonimmediate_operand" "")))] + "TARGET_32BIT && reload_completed && !IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 0) (ashiftrt:SI (match_dup 1) (const_int 31)))] +{ + rtx lo_part = gen_lowpart (SImode, operands[0]); + enum machine_mode src_mode = GET_MODE (operands[1]); + + if (REG_P (operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1])) + emit_clobber (operands[0]); + + if (!REG_P (lo_part) || src_mode != SImode + || !rtx_equal_p (lo_part, operands[1])) + { + if (src_mode == SImode) + emit_move_insn (lo_part, operands[1]); + else + emit_insn (gen_rtx_SET (VOIDmode, lo_part, + gen_rtx_SIGN_EXTEND (SImode, operands[1]))); + operands[1] = lo_part; + } + operands[0] = gen_highpart (SImode, operands[0]); +}) + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_EITHER" +{ + if (TARGET_ARM && !arm_arch4 && MEM_P (operands[1])) + { + emit_insn (gen_movhi_bytes (operands[0], operands[1])); + DONE; + } + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16))); + emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (16))); + DONE; + } +}) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:HI 1 "s_register_operand" "")))] + "!TARGET_THUMB2 && !arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))] +{ + operands[2] = gen_lowpart (SImode, operands[1]); +}) + +(define_insn "*thumb1_zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m")))] + "TARGET_THUMB1" +{ + rtx mem; + + if (which_alternative == 0 && arm_arch6) + return "uxth\t%0, %1"; + if (which_alternative == 0) + return "#"; + + mem = XEXP (operands[1], 0); + + if (GET_CODE (mem) == CONST) + mem = XEXP (mem, 0); + + if (GET_CODE (mem) == PLUS) + { + rtx a = XEXP (mem, 0); + + /* This can happen due to bugs in reload. */ + if (REG_P (a) && REGNO (a) == SP_REGNUM) + { + rtx ops[2]; + ops[0] = operands[0]; + ops[1] = a; + + output_asm_insn ("mov\t%0, %1", ops); + + XEXP (mem, 0) = operands[0]; + } + } + + return "ldrh\t%0, %1"; +} + [(set_attr_alternative "length" + [(if_then_else (eq_attr "is_arch6" "yes") + (const_int 2) (const_int 4)) + (const_int 4)]) + (set_attr "type" "extend,load_byte")] +) + +(define_insn "*arm_zero_extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch4 && !arm_arch6" + "@ + # + ldr%(h%)\\t%0, %1" + [(set_attr "type" "alu_shift_reg,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendhisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch6" + "@ + uxth%?\\t%0, %1 + ldr%(h%)\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "extend,load_byte")] +) + +(define_insn "*arm_zero_extendhisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:HI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "uxtah%?\\t%0, %2, %1" + [(set_attr "type" "alu_shift_reg") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_expand "zero_extendqisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))] + "TARGET_EITHER" +{ + if (TARGET_ARM && !arm_arch6 && !MEM_P (operands[1])) + { + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, operands[1]), + GEN_INT (255))); + DONE; + } + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24))); + emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (24))); + DONE; + } +}) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (match_operand:QI 1 "s_register_operand" "")))] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0); + if (TARGET_ARM) + { + emit_insn (gen_andsi3 (operands[0], operands[2], GEN_INT (255))); + DONE; + } +}) + +(define_insn "*thumb1_zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))] + "TARGET_THUMB1 && !arm_arch6" + "@ + # + ldrb\\t%0, %1" + [(set_attr "length" "4,2") + (set_attr "type" "alu_shift_reg,load_byte") + (set_attr "pool_range" "*,32")] +) + +(define_insn "*thumb1_zero_extendqisi2_v6" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,m")))] + "TARGET_THUMB1 && arm_arch6" + "@ + uxtb\\t%0, %1 + ldrb\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "extend,load_byte")] +) + +(define_insn "*arm_zero_extendqisi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && !arm_arch6" + "@ + # + ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" + [(set_attr "length" "8,4") + (set_attr "type" "alu_shift_reg,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendqisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch6" + "@ + uxtb%(%)\\t%0, %1 + ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" + [(set_attr "type" "extend,load_byte") + (set_attr "predicable" "yes")] +) + +(define_insn "*arm_zero_extendqisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:QI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "uxtab%?\\t%0, %2, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "alu_shift_reg")] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 0))) + (clobber (match_operand:SI 2 "s_register_operand" ""))] + "TARGET_32BIT && (!MEM_P (operands[1])) && ! BYTES_BIG_ENDIAN" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))] + "" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (zero_extend:SI (subreg:QI (match_operand:SI 1 "" "") 3))) + (clobber (match_operand:SI 2 "s_register_operand" ""))] + "TARGET_32BIT && (!MEM_P (operands[1])) && BYTES_BIG_ENDIAN" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (and:SI (match_dup 2) (const_int 255)))] + "" +) + + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ior_xor:SI (and:SI (ashift:SI + (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")) + (zero_extend:SI + (match_operator 5 "subreg_lowpart_operator" + [(match_operand:SI 4 "s_register_operand" "")]))))] + "TARGET_32BIT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) + == (GET_MODE_MASK (GET_MODE (operands[5])) + & (GET_MODE_MASK (GET_MODE (operands[5])) + << (INTVAL (operands[2])))))" + [(set (match_dup 0) (ior_xor:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 4))) + (set (match_dup 0) (zero_extend:SI (match_dup 5)))] + "operands[5] = gen_lowpart (GET_MODE (operands[5]), operands[0]);" +) + +(define_insn "*compareqi_eq0" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:QI 0 "s_register_operand" "r") + (const_int 0)))] + "TARGET_32BIT" + "tst%?\\t%0, #255" + [(set_attr "conds" "set") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_imm")] +) + +(define_expand "extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_EITHER" +{ + if (TARGET_THUMB1) + { + emit_insn (gen_thumb1_extendhisi2 (operands[0], operands[1])); + DONE; + } + if (MEM_P (operands[1]) && TARGET_ARM && !arm_arch4) + { + emit_insn (gen_extendhisi2_mem (operands[0], operands[1])); + DONE; + } + + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (16))); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (16))); + DONE; + } +}) + +(define_split + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "register_operand" ""))) + (clobber (match_scratch:SI 2 ""))])] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0); +}) + +;; We used to have an early-clobber on the scratch register here. +;; However, there's a bug somewhere in reload which means that this +;; can be partially ignored during spill allocation if the memory +;; address also needs reloading; this causes us to die later on when +;; we try to verify the operands. Fortunately, we don't really need +;; the early-clobber: we can always use operand 0 if operand 2 +;; overlaps the address. +(define_insn "thumb1_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "l,m"))) + (clobber (match_scratch:SI 2 "=X,l"))] + "TARGET_THUMB1" + "* + { + rtx ops[4]; + rtx mem; + + if (which_alternative == 0 && !arm_arch6) + return \"#\"; + if (which_alternative == 0) + return \"sxth\\t%0, %1\"; + + mem = XEXP (operands[1], 0); + + /* This code used to try to use 'V', and fix the address only if it was + offsettable, but this fails for e.g. REG+48 because 48 is outside the + range of QImode offsets, and offsettable_address_p does a QImode + address check. */ + + if (GET_CODE (mem) == CONST) + mem = XEXP (mem, 0); + + if (GET_CODE (mem) == LABEL_REF) + return \"ldr\\t%0, %1\"; + + if (GET_CODE (mem) == PLUS) + { + rtx a = XEXP (mem, 0); + rtx b = XEXP (mem, 1); + + if (GET_CODE (a) == LABEL_REF + && CONST_INT_P (b)) + return \"ldr\\t%0, %1\"; + + if (REG_P (b)) + return \"ldrsh\\t%0, %1\"; + + ops[1] = a; + ops[2] = b; + } + else + { + ops[1] = mem; + ops[2] = const0_rtx; + } + + gcc_assert (REG_P (ops[1])); + + ops[0] = operands[0]; + if (reg_mentioned_p (operands[2], ops[1])) + ops[3] = ops[0]; + else + ops[3] = operands[2]; + output_asm_insn (\"mov\\t%3, %2\;ldrsh\\t%0, [%1, %3]\", ops); + return \"\"; + }" + [(set_attr_alternative "length" + [(if_then_else (eq_attr "is_arch6" "yes") + (const_int 2) (const_int 4)) + (const_int 4)]) + (set_attr "type" "extend,load_byte") + (set_attr "pool_range" "*,1018")] +) + +;; This pattern will only be used when ldsh is not available +(define_expand "extendhisi2_mem" + [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" ""))) + (set (match_dup 3) + (zero_extend:SI (match_dup 7))) + (set (match_dup 6) (ashift:SI (match_dup 4) (const_int 24))) + (set (match_operand:SI 0 "" "") + (ior:SI (ashiftrt:SI (match_dup 6) (const_int 16)) (match_dup 5)))] + "TARGET_ARM" + " + { + rtx mem1, mem2; + rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); + + mem1 = change_address (operands[1], QImode, addr); + mem2 = change_address (operands[1], QImode, + plus_constant (Pmode, addr, 1)); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = mem1; + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = mem2; + + if (BYTES_BIG_ENDIAN) + { + operands[4] = operands[2]; + operands[5] = operands[3]; + } + else + { + operands[4] = operands[3]; + operands[5] = operands[2]; + } + }" +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "register_operand" "")))] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], HImode, 0); +}) + +(define_insn "*arm_extendhisi2" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_ARM && arm_arch4 && !arm_arch6" + "@ + # + ldr%(sh%)\\t%0, %1" + [(set_attr "length" "8,4") + (set_attr "type" "alu_shift_reg,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +;; ??? Check Thumb-2 pool range +(define_insn "*arm_extendhisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_32BIT && arm_arch6" + "@ + sxth%?\\t%0, %1 + ldr%(sh%)\\t%0, %1" + [(set_attr "type" "extend,load_byte") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +(define_insn "*arm_extendhisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (sign_extend:SI (match_operand:HI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "sxtah%?\\t%0, %2, %1" + [(set_attr "type" "alu_shift_reg")] +) + +(define_expand "extendqihi2" + [(set (match_dup 2) + (ashift:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "") + (const_int 24))) + (set (match_operand:HI 0 "s_register_operand" "") + (ashiftrt:SI (match_dup 2) + (const_int 24)))] + "TARGET_ARM" + " + { + if (arm_arch4 && MEM_P (operands[1])) + { + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_SIGN_EXTEND (HImode, operands[1]))); + DONE; + } + if (!s_register_operand (operands[1], QImode)) + operands[1] = copy_to_mode_reg (QImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_reg_rtx (SImode); + }" +) + +(define_insn "*arm_extendqihi_insn" + [(set (match_operand:HI 0 "s_register_operand" "=r") + (sign_extend:HI (match_operand:QI 1 "arm_extendqisi_mem_op" "Uq")))] + "TARGET_ARM && arm_arch4" + "ldr%(sb%)\\t%0, %1" + [(set_attr "type" "load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "256") + (set_attr "neg_pool_range" "244")] +) + +(define_expand "extendqisi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "")))] + "TARGET_EITHER" +{ + if (!arm_arch4 && MEM_P (operands[1])) + operands[1] = copy_to_mode_reg (QImode, operands[1]); + + if (!arm_arch6 && !MEM_P (operands[1])) + { + rtx t = gen_lowpart (SImode, operands[1]); + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp, t, GEN_INT (24))); + emit_insn (gen_ashrsi3 (operands[0], tmp, GEN_INT (24))); + DONE; + } +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "register_operand" "")))] + "!arm_arch6" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0); +}) + +(define_insn "*arm_extendqisi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))] + "TARGET_ARM && arm_arch4 && !arm_arch6" + "@ + # + ldr%(sb%)\\t%0, %1" + [(set_attr "length" "8,4") + (set_attr "type" "alu_shift_reg,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +(define_insn "*arm_extendqisi_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI + (match_operand:QI 1 "arm_reg_or_extendqisi_mem_op" "r,Uq")))] + "TARGET_ARM && arm_arch6" + "@ + sxtb%?\\t%0, %1 + ldr%(sb%)\\t%0, %1" + [(set_attr "type" "extend,load_byte") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,256") + (set_attr "neg_pool_range" "*,244")] +) + +(define_insn "*arm_extendqisi2addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (plus:SI (sign_extend:SI (match_operand:QI 1 "s_register_operand" "r")) + (match_operand:SI 2 "s_register_operand" "r")))] + "TARGET_INT_SIMD" + "sxtab%?\\t%0, %2, %1" + [(set_attr "type" "alu_shift_reg") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")] +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "memory_operand" "")))] + "TARGET_THUMB1 && reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) (sign_extend:SI (match_dup 3)))] +{ + rtx addr = XEXP (operands[1], 0); + + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1))) + /* No split necessary. */ + FAIL; + + if (GET_CODE (addr) == PLUS + && !REG_P (XEXP (addr, 0)) && !REG_P (XEXP (addr, 1))) + FAIL; + + if (reg_overlap_mentioned_p (operands[0], addr)) + { + rtx t = gen_lowpart (QImode, operands[0]); + emit_move_insn (t, operands[1]); + emit_insn (gen_thumb1_extendqisi2 (operands[0], t)); + DONE; + } + + if (REG_P (addr)) + { + addr = gen_rtx_PLUS (Pmode, addr, operands[0]); + operands[2] = const0_rtx; + } + else if (GET_CODE (addr) != PLUS) + FAIL; + else if (REG_P (XEXP (addr, 0))) + { + operands[2] = XEXP (addr, 1); + addr = gen_rtx_PLUS (Pmode, XEXP (addr, 0), operands[0]); + } + else + { + operands[2] = XEXP (addr, 0); + addr = gen_rtx_PLUS (Pmode, XEXP (addr, 1), operands[0]); + } + + operands[3] = change_address (operands[1], QImode, addr); +}) + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_dup 0) (match_operand 1 "const_int_operand"))) + (set (match_operand:SI 2 "register_operand" "") (const_int 0)) + (set (match_operand:SI 3 "register_operand" "") + (sign_extend:SI (match_operand:QI 4 "memory_operand" "")))] + "TARGET_THUMB1 + && GET_CODE (XEXP (operands[4], 0)) == PLUS + && rtx_equal_p (operands[0], XEXP (XEXP (operands[4], 0), 0)) + && rtx_equal_p (operands[2], XEXP (XEXP (operands[4], 0), 1)) + && (peep2_reg_dead_p (3, operands[0]) + || rtx_equal_p (operands[0], operands[3])) + && (peep2_reg_dead_p (3, operands[2]) + || rtx_equal_p (operands[2], operands[3]))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (sign_extend:SI (match_dup 4)))] +{ + rtx addr = gen_rtx_PLUS (Pmode, operands[0], operands[2]); + operands[4] = change_address (operands[4], QImode, addr); +}) + +(define_insn "thumb1_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=l,l,l") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "l,V,m")))] + "TARGET_THUMB1" +{ + rtx addr; + + if (which_alternative == 0 && arm_arch6) + return "sxtb\\t%0, %1"; + if (which_alternative == 0) + return "#"; + + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) && REG_P (XEXP (addr, 1))) + return "ldrsb\\t%0, %1"; + + return "#"; +} + [(set_attr_alternative "length" + [(if_then_else (eq_attr "is_arch6" "yes") + (const_int 2) (const_int 4)) + (const_int 2) + (if_then_else (eq_attr "is_arch6" "yes") + (const_int 4) (const_int 6))]) + (set_attr "type" "extend,load_byte,load_byte")] +) + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "s_register_operand" "") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "" +) + +/* HFmode -> DFmode conversions have to go through SFmode. */ +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "general_operand" "") + (float_extend:DF (match_operand:HF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (DFmode, op1, 0); + emit_insn (gen_movdf (operands[0], op1)); + DONE; + }" +) + +;; Move insns (including loads and stores) + +;; XXX Just some ideas about movti. +;; I don't think these are a good idea on the arm, there just aren't enough +;; registers +;;(define_expand "loadti" +;; [(set (match_operand:TI 0 "s_register_operand" "") +;; (mem:TI (match_operand:SI 1 "address_operand" "")))] +;; "" "") + +;;(define_expand "storeti" +;; [(set (mem:TI (match_operand:TI 0 "address_operand" "")) +;; (match_operand:TI 1 "s_register_operand" ""))] +;; "" "") + +;;(define_expand "movti" +;; [(set (match_operand:TI 0 "general_operand" "") +;; (match_operand:TI 1 "general_operand" ""))] +;; "" +;; " +;;{ +;; rtx insn; +;; +;; if (MEM_P (operands[0]) && MEM_P (operands[1])) +;; operands[1] = copy_to_reg (operands[1]); +;; if (MEM_P (operands[0])) +;; insn = gen_storeti (XEXP (operands[0], 0), operands[1]); +;; else if (MEM_P (operands[1])) +;; insn = gen_loadti (operands[0], XEXP (operands[1], 0)); +;; else +;; FAIL; +;; +;; emit_insn (insn); +;; DONE; +;;}") + +;; Recognize garbage generated above. + +;;(define_insn "" +;; [(set (match_operand:TI 0 "general_operand" "=r,r,r,<,>,m") +;; (match_operand:TI 1 "general_operand" "<,>,m,r,r,r"))] +;; "" +;; "* +;; { +;; register mem = (which_alternative < 3); +;; register const char *template; +;; +;; operands[mem] = XEXP (operands[mem], 0); +;; switch (which_alternative) +;; { +;; case 0: template = \"ldmdb\\t%1!, %M0\"; break; +;; case 1: template = \"ldmia\\t%1!, %M0\"; break; +;; case 2: template = \"ldmia\\t%1, %M0\"; break; +;; case 3: template = \"stmdb\\t%0!, %M1\"; break; +;; case 4: template = \"stmia\\t%0!, %M1\"; break; +;; case 5: template = \"stmia\\t%0, %M1\"; break; +;; } +;; output_asm_insn (template, operands); +;; return \"\"; +;; }") + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (DImode, operands[1]); + } + " +) + +(define_insn "*arm_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m") + (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))] + "TARGET_32BIT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && !TARGET_IWMMXT + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + return \"#\"; + default: + return output_move_double (operands, true, NULL); + } + " + [(set_attr "length" "8,12,16,8,8") + (set_attr "type" "multiple,multiple,multiple,load2,store2") + (set_attr "arm_pool_range" "*,*,*,1020,*") + (set_attr "arm_neg_pool_range" "*,*,*,1004,*") + (set_attr "thumb2_pool_range" "*,*,*,4094,*") + (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")] +) + +(define_split + [(set (match_operand:ANY64 0 "arm_general_register_operand" "") + (match_operand:ANY64 1 "const_double_operand" ""))] + "TARGET_32BIT + && reload_completed + && (arm_const_double_inline_cost (operands[1]) + <= arm_max_const_double_inline_cost ())" + [(const_int 0)] + " + arm_split_constant (SET, SImode, curr_insn, + INTVAL (gen_lowpart (SImode, operands[1])), + gen_lowpart (SImode, operands[0]), NULL_RTX, 0); + arm_split_constant (SET, SImode, curr_insn, + INTVAL (gen_highpart_mode (SImode, + GET_MODE (operands[0]), + operands[1])), + gen_highpart (SImode, operands[0]), NULL_RTX, 0); + DONE; + " +) + +; If optimizing for size, or if we have load delay slots, then +; we want to split the constant into two separate operations. +; In both cases this may split a trivial part into a single data op +; leaving a single complex constant to load. We can also get longer +; offsets in a LDR which means we get better chances of sharing the pool +; entries. Finally, we can normally do a better job of scheduling +; LDR instructions than we can with LDM. +; This pattern will only match if the one above did not. +(define_split + [(set (match_operand:ANY64 0 "arm_general_register_operand" "") + (match_operand:ANY64 1 "const_double_operand" ""))] + "TARGET_ARM && reload_completed + && arm_const_double_by_parts (operands[1])" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + " + operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart_mode (SImode, GET_MODE (operands[0]), + operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + " +) + +(define_split + [(set (match_operand:ANY64 0 "arm_general_register_operand" "") + (match_operand:ANY64 1 "arm_general_register_operand" ""))] + "TARGET_EITHER && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + " + operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* Handle a partial overlap. */ + if (rtx_equal_p (operands[0], operands[3])) + { + rtx tmp0 = operands[0]; + rtx tmp1 = operands[1]; + + operands[0] = operands[2]; + operands[1] = operands[3]; + operands[2] = tmp0; + operands[3] = tmp1; + } + " +) + +;; We can't actually do base+index doubleword loads if the index and +;; destination overlap. Split here so that we at least have chance to +;; schedule. +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (mem:DI (plus:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "s_register_operand" ""))))] + "TARGET_LDRD + && reg_overlap_mentioned_p (operands[0], operands[1]) + && reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 4) + (plus:SI (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (mem:DI (match_dup 4)))] + " + operands[4] = gen_rtx_REG (SImode, REGNO(operands[0])); + " +) + +;;; ??? This should have alternatives for constants. +;;; ??? This was originally identical to the movdf_insn pattern. +;;; ??? The 'i' constraint looks funny, but it should always be replaced by +;;; thumb_reorg with a memory reference. +(define_insn "*thumb1_movdi_insn" + [(set (match_operand:DI 0 "nonimmediate_operand" "=l,l,l,l,>,l, m,*r") + (match_operand:DI 1 "general_operand" "l, I,J,>,l,mi,l,*r"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + { + switch (which_alternative) + { + default: + case 0: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"add\\t%0, %1, #0\;add\\t%H0, %H1, #0\"; + return \"add\\t%H0, %H1, #0\;add\\t%0, %1, #0\"; + case 1: + return \"mov\\t%Q0, %1\;mov\\t%R0, #0\"; + case 2: + operands[1] = GEN_INT (- INTVAL (operands[1])); + return \"mov\\t%Q0, %1\;neg\\t%Q0, %Q0\;asr\\t%R0, %Q0, #31\"; + case 3: + return \"ldmia\\t%1, {%0, %H0}\"; + case 4: + return \"stmia\\t%0, {%1, %H1}\"; + case 5: + return thumb_load_double_from_address (operands); + case 6: + operands[2] = gen_rtx_MEM (SImode, + plus_constant (Pmode, XEXP (operands[0], 0), 4)); + output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands); + return \"\"; + case 7: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"mov\\t%0, %1\;mov\\t%H0, %H1\"; + return \"mov\\t%H0, %H1\;mov\\t%0, %1\"; + } + }" + [(set_attr "length" "4,4,6,2,2,6,4,4") + (set_attr "type" "multiple,multiple,multiple,load2,store2,load2,store2,multiple") + (set_attr "pool_range" "*,*,*,*,*,1018,*,*")] +) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "TARGET_EITHER" + " + { + rtx base, offset, tmp; + + if (TARGET_32BIT) + { + /* Everything except mem = const or mem = mem can be done easily. */ + if (MEM_P (operands[0])) + operands[1] = force_reg (SImode, operands[1]); + if (arm_general_register_operand (operands[0], SImode) + && CONST_INT_P (operands[1]) + && !(const_ok_for_arm (INTVAL (operands[1])) + || const_ok_for_arm (~INTVAL (operands[1])))) + { + arm_split_constant (SET, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], NULL_RTX, + optimize && can_create_pseudo_p ()); + DONE; + } + } + else /* TARGET_THUMB1... */ + { + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (SImode, operands[1]); + } + } + + if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) + { + split_const (operands[1], &base, &offset); + if (GET_CODE (base) == SYMBOL_REF + && !offset_within_block_p (base, INTVAL (offset))) + { + tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + emit_move_insn (tmp, base); + emit_insn (gen_addsi3 (operands[0], tmp, offset)); + DONE; + } + } + + /* Recognize the case where operand[1] is a reference to thread-local + data and load its address to a register. */ + if (arm_tls_referenced_p (operands[1])) + { + rtx tmp = operands[1]; + rtx addend = NULL; + + if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) + { + addend = XEXP (XEXP (tmp, 0), 1); + tmp = XEXP (XEXP (tmp, 0), 0); + } + + gcc_assert (GET_CODE (tmp) == SYMBOL_REF); + gcc_assert (SYMBOL_REF_TLS_MODEL (tmp) != 0); + + tmp = legitimize_tls_address (tmp, + !can_create_pseudo_p () ? operands[0] : 0); + if (addend) + { + tmp = gen_rtx_PLUS (SImode, tmp, addend); + tmp = force_operand (tmp, operands[0]); + } + operands[1] = tmp; + } + else if (flag_pic + && (CONSTANT_P (operands[1]) + || symbol_mentioned_p (operands[1]) + || label_mentioned_p (operands[1]))) + operands[1] = legitimize_pic_address (operands[1], SImode, + (!can_create_pseudo_p () + ? operands[0] + : 0)); + } + " +) + +;; The ARM LO_SUM and HIGH are backwards - HIGH sets the low bits, and +;; LO_SUM adds in the high bits. Fortunately these are opaque operations +;; so this does not matter. +(define_insn "*arm_movt" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "i")))] + "arm_arch_thumb2" + "movt%?\t%0, #:upper16:%c2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "length" "4") + (set_attr "type" "mov_imm")] +) + +(define_insn "*arm_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))] + "TARGET_ARM && ! TARGET_IWMMXT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + movw%?\\t%0, %1 + ldr%?\\t%0, %1 + str%?\\t%1, %0" + [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load1,store1") + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,*,4096,*") + (set_attr "neg_pool_range" "*,*,*,*,4084,*")] +) + +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_32BIT + && (!(const_ok_for_arm (INTVAL (operands[1])) + || const_ok_for_arm (~INTVAL (operands[1]))))" + [(clobber (const_int 0))] + " + arm_split_constant (SET, SImode, NULL_RTX, + INTVAL (operands[1]), operands[0], NULL_RTX, 0); + DONE; + " +) + +;; A normal way to do (symbol + offset) requires three instructions at least +;; (depends on how big the offset is) as below: +;; movw r0, #:lower16:g +;; movw r0, #:upper16:g +;; adds r0, #4 +;; +;; A better way would be: +;; movw r0, #:lower16:g+4 +;; movw r0, #:upper16:g+4 +;; +;; The limitation of this way is that the length of offset should be a 16-bit +;; signed value, because current assembler only supports REL type relocation for +;; such case. If the more powerful RELA type is supported in future, we should +;; update this pattern to go with better way. +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (const:SI (plus:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" ""))))] + "TARGET_THUMB2 + && arm_disable_literal_pool + && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + [(clobber (const_int 0))] + " + int offset = INTVAL (operands[2]); + + if (offset < -0x8000 || offset > 0x7fff) + { + arm_emit_movpair (operands[0], operands[1]); + emit_insn (gen_rtx_SET (SImode, operands[0], + gen_rtx_PLUS (SImode, operands[0], operands[2]))); + } + else + { + rtx op = gen_rtx_CONST (SImode, + gen_rtx_PLUS (SImode, operands[1], operands[2])); + arm_emit_movpair (operands[0], op); + } + " +) + +;; Split symbol_refs at the later stage (after cprop), instead of generating +;; movt/movw pair directly at expand. Otherwise corresponding high_sum +;; and lo_sum would be merged back into memory load at cprop. However, +;; if the default is to prefer movt/movw rather than a load from the constant +;; pool, the performance is better. +(define_split + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "TARGET_32BIT + && TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF + && !flag_pic && !target_word_relocations + && !arm_tls_referenced_p (operands[1])" + [(clobber (const_int 0))] +{ + arm_emit_movpair (operands[0], operands[1]); + DONE; +}) + +(define_insn "*thumb1_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k") + (match_operand:SI 1 "general_operand" "l, I,J,K,>,l,mi,l,*l*h*k"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov %0, %1 + mov %0, %1 + # + # + ldmia\\t%1, {%0} + stmia\\t%0, {%1} + ldr\\t%0, %1 + str\\t%1, %0 + mov\\t%0, %1" + [(set_attr "length" "2,2,4,4,2,2,2,2,2") + (set_attr "type" "mov_reg,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg") + (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*") + (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_J (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (neg:SI (match_dup 2)))] + " + { + operands[1] = GEN_INT (- INTVAL (operands[1])); + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + }" +) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] + " + { + unsigned HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffffffffu; + unsigned HOST_WIDE_INT mask = 0xff; + int i; + + for (i = 0; i < 25; i++) + if ((val & (mask << i)) == val) + break; + + /* Don't split if the shift is zero. */ + if (i == 0) + FAIL; + + operands[1] = GEN_INT (val >> i); + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + operands[3] = GEN_INT (i); + }" +) + +;; For thumb1 split imm move [256-510] into mov [1-255] and add #255 +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_THUMB1 && satisfies_constraint_Pe (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 3)))] + " + { + operands[1] = GEN_INT (INTVAL (operands[1]) - 255); + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + operands[3] = GEN_INT (255); + }" +) + +;; When generating pic, we need to load the symbol offset into a register. +;; So that the optimizer does not confuse this with a normal symbol load +;; we use an unspec. The offset will be loaded from a constant pool entry, +;; since that is the only type of relocation we can use. + +;; Wrap calculation of the whole PIC address in a single pattern for the +;; benefit of optimizers, particularly, PRE and HOIST. Calculation of +;; a PIC address involves two loads from memory, so we want to CSE it +;; as often as possible. +;; This pattern will be split into one of the pic_load_addr_* patterns +;; and a move after GCSE optimizations. +;; +;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. +(define_expand "calculate_pic_address" + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" +) + +;; Split calculate_pic_address into pic_load_addr_* and a move. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" + [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) + (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] + "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" +) + +;; operand1 is the memory address to go into +;; pic_load_addr_32bit. +;; operand2 is the PIC label to be emitted +;; from pic_add_dot_plus_eight. +;; We do this to allow hoisting of the entire insn. +(define_insn_and_split "pic_load_addr_unified" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,l") + (unspec:SI [(match_operand:SI 1 "" "mX,mX,mX") + (match_operand:SI 2 "" "")] + UNSPEC_PIC_UNIFIED))] + "flag_pic" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_PIC_SYM)) + (set (match_dup 0) (unspec:SI [(match_dup 0) (match_dup 3) + (match_dup 2)] UNSPEC_PIC_BASE))] + "operands[3] = TARGET_THUMB ? GEN_INT (4) : GEN_INT (8);" + [(set_attr "type" "load1,load1,load1") + (set_attr "pool_range" "4096,4094,1022") + (set_attr "neg_pool_range" "4084,0,0") + (set_attr "arch" "a,t2,t1") + (set_attr "length" "8,6,4")] +) + +;; The rather odd constraints on the following are to force reload to leave +;; the insn alone, and to force the minipool generation pass to then move +;; the GOT symbol to memory. + +(define_insn "pic_load_addr_32bit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))] + "TARGET_32BIT && flag_pic" + "ldr%?\\t%0, %1" + [(set_attr "type" "load1") + (set (attr "pool_range") + (if_then_else (eq_attr "is_thumb" "no") + (const_int 4096) + (const_int 4094))) + (set (attr "neg_pool_range") + (if_then_else (eq_attr "is_thumb" "no") + (const_int 4084) + (const_int 0)))] +) + +(define_insn "pic_load_addr_thumb1" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (unspec:SI [(match_operand:SI 1 "" "mX")] UNSPEC_PIC_SYM))] + "TARGET_THUMB1 && flag_pic" + "ldr\\t%0, %1" + [(set_attr "type" "load1") + (set (attr "pool_range") (const_int 1018))] +) + +(define_insn "pic_add_dot_plus_four" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "0") + (const_int 4) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE))] + "TARGET_THUMB" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); + return \"add\\t%0, %|pc\"; + " + [(set_attr "length" "2") + (set_attr "type" "alu_reg")] +) + +(define_insn "pic_add_dot_plus_eight" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (const_int 8) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE))] + "TARGET_ARM" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); + return \"add%?\\t%0, %|pc, %1\"; + " + [(set_attr "predicable" "yes") + (set_attr "type" "alu_reg")] +) + +(define_insn "tls_load_dot_plus_eight" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (const_int 8) + (match_operand 2 "" "")] + UNSPEC_PIC_BASE)))] + "TARGET_ARM" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[2])); + return \"ldr%?\\t%0, [%|pc, %1]\t\t@ tls_load_dot_plus_eight\"; + " + [(set_attr "predicable" "yes") + (set_attr "type" "load1")] +) + +;; PIC references to local variables can generate pic_add_dot_plus_eight +;; followed by a load. These sequences can be crunched down to +;; tls_load_dot_plus_eight by a peephole. + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 3 "register_operand" "") + (const_int 8) + (match_operand 1 "" "")] + UNSPEC_PIC_BASE)) + (set (match_operand:SI 2 "arm_general_register_operand" "") + (mem:SI (match_dup 0)))] + "TARGET_ARM && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (mem:SI (unspec:SI [(match_dup 3) + (const_int 8) + (match_dup 1)] + UNSPEC_PIC_BASE)))] + "" +) + +(define_insn "pic_offset_arm" + [(set (match_operand:SI 0 "register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (unspec:SI [(match_operand:SI 2 "" "X")] + UNSPEC_PIC_OFFSET))))] + "TARGET_VXWORKS_RTP && TARGET_ARM && flag_pic" + "ldr%?\\t%0, [%1,%2]" + [(set_attr "type" "load1")] +) + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "flag_pic" + " +{ + /* r3 is clobbered by set/longjmp, so we can use it as a scratch + register. */ + if (arm_pic_register != INVALID_REGNUM) + arm_load_pic_register (1UL << 3); + DONE; +}") + +;; If copying one reg to another we can set the condition codes according to +;; its value. Such a move is common after a return from subroutine and the +;; result is being tested against zero. + +(define_insn "*movsi_compare0" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "s_register_operand" "0,r") + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_dup 1))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, #0 + sub%.\\t%0, %1, #0" + [(set_attr "conds" "set") + (set_attr "type" "alus_imm,alus_imm")] +) + +;; Subroutine to store a half word from a register into memory. +;; Operand 0 is the source register (HImode) +;; Operand 1 is the destination address in a register (SImode) + +;; In both this routine and the next, we must be careful not to spill +;; a memory address of reg+large_const into a separate PLUS insn, since this +;; can generate unrecognizable rtl. + +(define_expand "storehi" + [;; store the low byte + (set (match_operand 1 "" "") (match_dup 3)) + ;; extract the high byte + (set (match_dup 2) + (ashiftrt:SI (match_operand 0 "" "") (const_int 8))) + ;; store the high byte + (set (match_dup 4) (match_dup 5))] + "TARGET_ARM" + " + { + rtx op1 = operands[1]; + rtx addr = XEXP (op1, 0); + enum rtx_code code = GET_CODE (addr); + + if ((code == PLUS && !CONST_INT_P (XEXP (addr, 1))) + || code == MINUS) + op1 = replace_equiv_address (operands[1], force_reg (SImode, addr)); + + operands[4] = adjust_address (op1, QImode, 1); + operands[1] = adjust_address (operands[1], QImode, 0); + operands[3] = gen_lowpart (QImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_reg_rtx (SImode); + operands[5] = gen_lowpart (QImode, operands[2]); + }" +) + +(define_expand "storehi_bigend" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 2) + (ashiftrt:SI (match_operand 0 "" "") (const_int 8))) + (set (match_operand 1 "" "") (match_dup 5))] + "TARGET_ARM" + " + { + rtx op1 = operands[1]; + rtx addr = XEXP (op1, 0); + enum rtx_code code = GET_CODE (addr); + + if ((code == PLUS && !CONST_INT_P (XEXP (addr, 1))) + || code == MINUS) + op1 = replace_equiv_address (op1, force_reg (SImode, addr)); + + operands[4] = adjust_address (op1, QImode, 1); + operands[1] = adjust_address (operands[1], QImode, 0); + operands[3] = gen_lowpart (QImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_reg_rtx (SImode); + operands[5] = gen_lowpart (QImode, operands[2]); + }" +) + +;; Subroutine to store a half word integer constant into memory. +(define_expand "storeinthi" + [(set (match_operand 0 "" "") + (match_operand 1 "" "")) + (set (match_dup 3) (match_dup 2))] + "TARGET_ARM" + " + { + HOST_WIDE_INT value = INTVAL (operands[1]); + rtx addr = XEXP (operands[0], 0); + rtx op0 = operands[0]; + enum rtx_code code = GET_CODE (addr); + + if ((code == PLUS && !CONST_INT_P (XEXP (addr, 1))) + || code == MINUS) + op0 = replace_equiv_address (op0, force_reg (SImode, addr)); + + operands[1] = gen_reg_rtx (SImode); + if (BYTES_BIG_ENDIAN) + { + emit_insn (gen_movsi (operands[1], GEN_INT ((value >> 8) & 255))); + if ((value & 255) == ((value >> 8) & 255)) + operands[2] = operands[1]; + else + { + operands[2] = gen_reg_rtx (SImode); + emit_insn (gen_movsi (operands[2], GEN_INT (value & 255))); + } + } + else + { + emit_insn (gen_movsi (operands[1], GEN_INT (value & 255))); + if ((value & 255) == ((value >> 8) & 255)) + operands[2] = operands[1]; + else + { + operands[2] = gen_reg_rtx (SImode); + emit_insn (gen_movsi (operands[2], GEN_INT ((value >> 8) & 255))); + } + } + + operands[3] = adjust_address (op0, QImode, 1); + operands[0] = adjust_address (operands[0], QImode, 0); + operands[2] = gen_lowpart (QImode, operands[2]); + operands[1] = gen_lowpart (QImode, operands[1]); + }" +) + +(define_expand "storehi_single_op" + [(set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "TARGET_32BIT && arm_arch4" + " + if (!s_register_operand (operands[1], HImode)) + operands[1] = copy_to_mode_reg (HImode, operands[1]); + " +) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_ARM) + { + if (can_create_pseudo_p ()) + { + if (MEM_P (operands[0])) + { + if (arm_arch4) + { + emit_insn (gen_storehi_single_op (operands[0], operands[1])); + DONE; + } + if (CONST_INT_P (operands[1])) + emit_insn (gen_storeinthi (operands[0], operands[1])); + else + { + if (MEM_P (operands[1])) + operands[1] = force_reg (HImode, operands[1]); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_storehi_bigend (operands[1], operands[0])); + else + emit_insn (gen_storehi (operands[1], operands[0])); + } + DONE; + } + /* Sign extend a constant, and keep it in an SImode reg. */ + else if (CONST_INT_P (operands[1])) + { + rtx reg = gen_reg_rtx (SImode); + HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff; + + /* If the constant is already valid, leave it alone. */ + if (!const_ok_for_arm (val)) + { + /* If setting all the top bits will make the constant + loadable in a single instruction, then set them. + Otherwise, sign extend the number. */ + + if (const_ok_for_arm (~(val | ~0xffff))) + val |= ~0xffff; + else if (val & 0x8000) + val |= ~0xffff; + } + + emit_insn (gen_movsi (reg, GEN_INT (val))); + operands[1] = gen_lowpart (HImode, reg); + } + else if (arm_arch4 && optimize && can_create_pseudo_p () + && MEM_P (operands[1])) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendhisi2 (reg, operands[1])); + operands[1] = gen_lowpart (HImode, reg); + } + else if (!arm_arch4) + { + if (MEM_P (operands[1])) + { + rtx base; + rtx offset = const0_rtx; + rtx reg = gen_reg_rtx (SImode); + + if ((REG_P (base = XEXP (operands[1], 0)) + || (GET_CODE (base) == PLUS + && (CONST_INT_P (offset = XEXP (base, 1))) + && ((INTVAL(offset) & 1) != 1) + && REG_P (base = XEXP (base, 0)))) + && REGNO_POINTER_ALIGN (REGNO (base)) >= 32) + { + rtx new_rtx; + + new_rtx = widen_memory_access (operands[1], SImode, + ((INTVAL (offset) & ~3) + - INTVAL (offset))); + emit_insn (gen_movsi (reg, new_rtx)); + if (((INTVAL (offset) & 2) != 0) + ^ (BYTES_BIG_ENDIAN ? 1 : 0)) + { + rtx reg2 = gen_reg_rtx (SImode); + + emit_insn (gen_lshrsi3 (reg2, reg, GEN_INT (16))); + reg = reg2; + } + } + else + emit_insn (gen_movhi_bytes (reg, operands[1])); + + operands[1] = gen_lowpart (HImode, reg); + } + } + } + /* Handle loading a large integer during reload. */ + else if (CONST_INT_P (operands[1]) + && !const_ok_for_arm (INTVAL (operands[1])) + && !const_ok_for_arm (~INTVAL (operands[1]))) + { + /* Writing a constant to memory needs a scratch, which should + be handled with SECONDARY_RELOADs. */ + gcc_assert (REG_P (operands[0])); + + operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + } + else if (TARGET_THUMB2) + { + /* Thumb-2 can do everything except mem=mem and mem=const easily. */ + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (HImode, operands[1]); + /* Zero extend a constant, and keep it in an SImode reg. */ + else if (CONST_INT_P (operands[1])) + { + rtx reg = gen_reg_rtx (SImode); + HOST_WIDE_INT val = INTVAL (operands[1]) & 0xffff; + + emit_insn (gen_movsi (reg, GEN_INT (val))); + operands[1] = gen_lowpart (HImode, reg); + } + } + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (CONST_INT_P (operands[1])) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (reg, operands[1])); + operands[1] = gen_lowpart (HImode, reg); + } + + /* ??? We shouldn't really get invalid addresses here, but this can + happen if we are passed a SP (never OK for HImode/QImode) or + virtual register (also rejected as illegitimate for HImode/QImode) + relative address. */ + /* ??? This should perhaps be fixed elsewhere, for instance, in + fixup_stack_1, by checking for other kinds of invalid addresses, + e.g. a bare reference to a virtual register. This may confuse the + alpha though, which must handle this case differently. */ + if (MEM_P (operands[0]) + && !memory_address_p (GET_MODE (operands[0]), + XEXP (operands[0], 0))) + operands[0] + = replace_equiv_address (operands[0], + copy_to_reg (XEXP (operands[0], 0))); + + if (MEM_P (operands[1]) + && !memory_address_p (GET_MODE (operands[1]), + XEXP (operands[1], 0))) + operands[1] + = replace_equiv_address (operands[1], + copy_to_reg (XEXP (operands[1], 0))); + + if (MEM_P (operands[1]) && optimize > 0) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendhisi2 (reg, operands[1])); + operands[1] = gen_lowpart (HImode, reg); + } + + if (MEM_P (operands[0])) + operands[1] = force_reg (HImode, operands[1]); + } + else if (CONST_INT_P (operands[1]) + && !satisfies_constraint_I (operands[1])) + { + /* Handle loading a large integer during reload. */ + + /* Writing a constant to memory needs a scratch, which should + be handled with SECONDARY_RELOADs. */ + gcc_assert (REG_P (operands[0])); + + operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + } + " +) + +(define_insn "*thumb1_movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l") + (match_operand:HI 1 "general_operand" "l,m,l,*h,*r,I"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" + "* + switch (which_alternative) + { + case 0: return \"add %0, %1, #0\"; + case 2: return \"strh %1, %0\"; + case 3: return \"mov %0, %1\"; + case 4: return \"mov %0, %1\"; + case 5: return \"mov %0, %1\"; + default: gcc_unreachable (); + case 1: + /* The stack pointer can end up being taken as an index register. + Catch this case here and deal with it. */ + if (GET_CODE (XEXP (operands[1], 0)) == PLUS + && REG_P (XEXP (XEXP (operands[1], 0), 0)) + && REGNO (XEXP (XEXP (operands[1], 0), 0)) == SP_REGNUM) + { + rtx ops[2]; + ops[0] = operands[0]; + ops[1] = XEXP (XEXP (operands[1], 0), 0); + + output_asm_insn (\"mov %0, %1\", ops); + + XEXP (XEXP (operands[1], 0), 0) = operands[0]; + + } + return \"ldrh %0, %1\"; + }" + [(set_attr "length" "2,4,2,2,2,2") + (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) + + +(define_expand "movhi_bytes" + [(set (match_dup 2) (zero_extend:SI (match_operand:HI 1 "" ""))) + (set (match_dup 3) + (zero_extend:SI (match_dup 6))) + (set (match_operand:SI 0 "" "") + (ior:SI (ashift:SI (match_dup 4) (const_int 8)) (match_dup 5)))] + "TARGET_ARM" + " + { + rtx mem1, mem2; + rtx addr = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); + + mem1 = change_address (operands[1], QImode, addr); + mem2 = change_address (operands[1], QImode, + plus_constant (Pmode, addr, 1)); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = mem1; + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (SImode); + operands[6] = mem2; + + if (BYTES_BIG_ENDIAN) + { + operands[4] = operands[2]; + operands[5] = operands[3]; + } + else + { + operands[4] = operands[3]; + operands[5] = operands[2]; + } + }" +) + +(define_expand "movhi_bigend" + [(set (match_dup 2) + (rotate:SI (subreg:SI (match_operand:HI 1 "memory_operand" "") 0) + (const_int 16))) + (set (match_dup 3) + (ashiftrt:SI (match_dup 2) (const_int 16))) + (set (match_operand:HI 0 "s_register_operand" "") + (match_dup 4))] + "TARGET_ARM" + " + operands[2] = gen_reg_rtx (SImode); + operands[3] = gen_reg_rtx (SImode); + operands[4] = gen_lowpart (HImode, operands[3]); + " +) + +;; Pattern to recognize insn generated default case above +(define_insn "*movhi_insn_arch4" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,m,r") + (match_operand:HI 1 "general_operand" "rI,K,r,mi"))] + "TARGET_ARM + && arm_arch4 + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" + "@ + mov%?\\t%0, %1\\t%@ movhi + mvn%?\\t%0, #%B1\\t%@ movhi + str%(h%)\\t%1, %0\\t%@ movhi + ldr%(h%)\\t%0, %1\\t%@ movhi" + [(set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,256") + (set_attr "neg_pool_range" "*,*,*,244") + (set_attr_alternative "type" + [(if_then_else (match_operand 1 "const_int_operand" "") + (const_string "mov_imm" ) + (const_string "mov_reg")) + (const_string "mvn_imm") + (const_string "store1") + (const_string "load1")])] +) + +(define_insn "*movhi_bytes" + [(set (match_operand:HI 0 "s_register_operand" "=r,r,r") + (match_operand:HI 1 "arm_rhs_operand" "I,r,K"))] + "TARGET_ARM" + "@ + mov%?\\t%0, %1\\t%@ movhi + mov%?\\t%0, %1\\t%@ movhi + mvn%?\\t%0, #%B1\\t%@ movhi" + [(set_attr "predicable" "yes") + (set_attr "type" "mov_imm,mov_reg,mvn_imm")] +) + +(define_expand "thumb_movhi_clobber" + [(set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "register_operand" "")) + (clobber (match_operand:DI 2 "register_operand" ""))] + "TARGET_THUMB1" + " + if (strict_memory_address_p (HImode, XEXP (operands[0], 0)) + && REGNO (operands[1]) <= LAST_LO_REGNUM) + { + emit_insn (gen_movhi (operands[0], operands[1])); + DONE; + } + /* XXX Fixme, need to handle other cases here as well. */ + gcc_unreachable (); + " +) + +;; We use a DImode scratch because we may occasionally need an additional +;; temporary if the address isn't offsettable -- push_reload doesn't seem +;; to take any notice of the "o" constraints on reload_memory_operand operand. +(define_expand "reload_outhi" + [(parallel [(match_operand:HI 0 "arm_reload_memory_operand" "=o") + (match_operand:HI 1 "s_register_operand" "r") + (match_operand:DI 2 "s_register_operand" "=&l")])] + "TARGET_EITHER" + "if (TARGET_ARM) + arm_reload_out_hi (operands); + else + thumb_reload_out_hi (operands); + DONE; + " +) + +(define_expand "reload_inhi" + [(parallel [(match_operand:HI 0 "s_register_operand" "=r") + (match_operand:HI 1 "arm_reload_memory_operand" "o") + (match_operand:DI 2 "s_register_operand" "=&r")])] + "TARGET_EITHER" + " + if (TARGET_ARM) + arm_reload_in_hi (operands); + else + thumb_reload_out_hi (operands); + DONE; +") + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "TARGET_EITHER" + " + /* Everything except mem = const or mem = mem can be done easily */ + + if (can_create_pseudo_p ()) + { + if (CONST_INT_P (operands[1])) + { + rtx reg = gen_reg_rtx (SImode); + + /* For thumb we want an unsigned immediate, then we are more likely + to be able to use a movs insn. */ + if (TARGET_THUMB) + operands[1] = GEN_INT (INTVAL (operands[1]) & 255); + + emit_insn (gen_movsi (reg, operands[1])); + operands[1] = gen_lowpart (QImode, reg); + } + + if (TARGET_THUMB) + { + /* ??? We shouldn't really get invalid addresses here, but this can + happen if we are passed a SP (never OK for HImode/QImode) or + virtual register (also rejected as illegitimate for HImode/QImode) + relative address. */ + /* ??? This should perhaps be fixed elsewhere, for instance, in + fixup_stack_1, by checking for other kinds of invalid addresses, + e.g. a bare reference to a virtual register. This may confuse the + alpha though, which must handle this case differently. */ + if (MEM_P (operands[0]) + && !memory_address_p (GET_MODE (operands[0]), + XEXP (operands[0], 0))) + operands[0] + = replace_equiv_address (operands[0], + copy_to_reg (XEXP (operands[0], 0))); + if (MEM_P (operands[1]) + && !memory_address_p (GET_MODE (operands[1]), + XEXP (operands[1], 0))) + operands[1] + = replace_equiv_address (operands[1], + copy_to_reg (XEXP (operands[1], 0))); + } + + if (MEM_P (operands[1]) && optimize > 0) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (reg, operands[1])); + operands[1] = gen_lowpart (QImode, reg); + } + + if (MEM_P (operands[0])) + operands[1] = force_reg (QImode, operands[1]); + } + else if (TARGET_THUMB + && CONST_INT_P (operands[1]) + && !satisfies_constraint_I (operands[1])) + { + /* Handle loading a large integer during reload. */ + + /* Writing a constant to memory needs a scratch, which should + be handled with SECONDARY_RELOADs. */ + gcc_assert (REG_P (operands[0])); + + operands[0] = gen_rtx_SUBREG (SImode, operands[0], 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + " +) + +(define_insn "*arm_movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m") + (match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))] + "TARGET_32BIT + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" + "@ + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + ldr%(b%)\\t%0, %1 + str%(b%)\\t%1, %0 + ldr%(b%)\\t%0, %1 + str%(b%)\\t%1, %0" + [(set_attr "type" "mov_reg,mov_reg,mov_imm,mov_imm,mvn_imm,load1,store1,load1,store1") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no") + (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any") + (set_attr "length" "2,4,4,2,4,2,2,4,4")] +) + +(define_insn "*thumb1_movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=l,l,m,*r,*h,l") + (match_operand:QI 1 "general_operand" "l, m,l,*h,*r,I"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], QImode) + || register_operand (operands[1], QImode))" + "@ + add\\t%0, %1, #0 + ldrb\\t%0, %1 + strb\\t%1, %0 + mov\\t%0, %1 + mov\\t%0, %1 + mov\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "alu_imm,load1,store1,mov_reg,mov_imm,mov_imm") + (set_attr "pool_range" "*,32,*,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) + +;; HFmode moves +(define_expand "movhf" + [(set (match_operand:HF 0 "general_operand" "") + (match_operand:HF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (MEM_P (operands[0])) + operands[1] = force_reg (HFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (HFmode, operands[1]); + } + } + " +) + +(define_insn "*arm32_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") + (match_operand:HF 1 "general_operand" " m,r,r,F"))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) && !arm_restrict_it + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"str%(h%)\\t%1, %0\\t%@ __fp16\"; + case 2: /* ARM register from ARM register */ + return \"mov%?\\t%0, %1\\t%@ __fp16\"; + case 3: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw%?\\t%0, %1\", ops); + else + output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,mov_reg,multiple") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes")] +) + +(define_insn "*thumb1_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") + (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 1: + { + rtx addr; + gcc_assert (MEM_P (operands[1])); + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == LABEL_REF + || (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF + && CONST_INT_P (XEXP (XEXP (addr, 0), 1)))) + { + /* Constant pool entry. */ + return \"ldr\\t%0, %1\"; + } + return \"ldrh\\t%0, %1\"; + } + case 2: return \"strh\\t%1, %0\"; + default: return \"mov\\t%0, %1\"; + } + " + [(set_attr "length" "2") + (set_attr "type" "mov_reg,load1,store1,mov_reg,mov_reg") + (set_attr "pool_range" "*,1018,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond")]) + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (MEM_P (operands[0])) + operands[1] = force_reg (SFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (SFmode, operands[1]); + } + } + " +) + +;; Transform a floating-point move of a constant into a core register into +;; an SImode operation. +(define_split + [(set (match_operand:SF 0 "arm_general_register_operand" "") + (match_operand:SF 1 "immediate_operand" ""))] + "TARGET_EITHER + && reload_completed + && CONST_DOUBLE_P (operands[1])" + [(set (match_dup 2) (match_dup 3))] + " + operands[2] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_lowpart (SImode, operands[1]); + if (operands[2] == 0 || operands[3] == 0) + FAIL; + " +) + +(define_insn "*arm_movsf_soft_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,m") + (match_operand:SF 1 "general_operand" "r,mE,r"))] + "TARGET_32BIT + && TARGET_SOFT_FLOAT + && (!MEM_P (operands[0]) + || register_operand (operands[1], SFmode))" + "@ + mov%?\\t%0, %1 + ldr%?\\t%0, %1\\t%@ float + str%?\\t%1, %0\\t%@ float" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "mov_reg,load1,store1") + (set_attr "arm_pool_range" "*,4096,*") + (set_attr "thumb2_pool_range" "*,4094,*") + (set_attr "arm_neg_pool_range" "*,4084,*") + (set_attr "thumb2_neg_pool_range" "*,0,*")] +) + +;;; ??? This should have alternatives for constants. +(define_insn "*thumb1_movsf_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h") + (match_operand:SF 1 "general_operand" "l, >,l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], SFmode) + || register_operand (operands[1], SFmode))" + "@ + add\\t%0, %1, #0 + ldmia\\t%1, {%0} + stmia\\t%0, {%1} + ldr\\t%0, %1 + str\\t%1, %0 + mov\\t%0, %1 + mov\\t%0, %1" + [(set_attr "length" "2") + (set_attr "type" "alus_imm,load1,store1,load1,store1,mov_reg,mov_reg") + (set_attr "pool_range" "*,*,*,1018,*,*,*") + (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond,nocond")] +) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (MEM_P (operands[0])) + operands[1] = force_reg (DFmode, operands[1]); + } + else /* TARGET_THUMB */ + { + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (DFmode, operands[1]); + } + } + " +) + +;; Reloading a df mode value stored in integer regs to memory can require a +;; scratch reg. +(define_expand "reload_outdf" + [(match_operand:DF 0 "arm_reload_memory_operand" "=o") + (match_operand:DF 1 "s_register_operand" "r") + (match_operand:SI 2 "s_register_operand" "=&r")] + "TARGET_THUMB2" + " + { + enum rtx_code code = GET_CODE (XEXP (operands[0], 0)); + + if (code == REG) + operands[2] = XEXP (operands[0], 0); + else if (code == POST_INC || code == PRE_DEC) + { + operands[0] = gen_rtx_SUBREG (DImode, operands[0], 0); + operands[1] = gen_rtx_SUBREG (DImode, operands[1], 0); + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + else if (code == PRE_INC) + { + rtx reg = XEXP (XEXP (operands[0], 0), 0); + + emit_insn (gen_addsi3 (reg, reg, GEN_INT (8))); + operands[2] = reg; + } + else if (code == POST_DEC) + operands[2] = XEXP (XEXP (operands[0], 0), 0); + else + emit_insn (gen_addsi3 (operands[2], XEXP (XEXP (operands[0], 0), 0), + XEXP (XEXP (operands[0], 0), 1))); + + emit_insn (gen_rtx_SET (VOIDmode, + replace_equiv_address (operands[0], operands[2]), + operands[1])); + + if (code == POST_DEC) + emit_insn (gen_addsi3 (operands[2], operands[2], GEN_INT (-8))); + + DONE; + }" +) + +(define_insn "*movdf_soft_insn" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m") + (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))] + "TARGET_32BIT && TARGET_SOFT_FLOAT + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + return \"#\"; + default: + return output_move_double (operands, true, NULL); + } + " + [(set_attr "length" "8,12,16,8,8") + (set_attr "type" "multiple,multiple,multiple,load2,store2") + (set_attr "arm_pool_range" "*,*,*,1020,*") + (set_attr "thumb2_pool_range" "*,*,*,1018,*") + (set_attr "arm_neg_pool_range" "*,*,*,1004,*") + (set_attr "thumb2_neg_pool_range" "*,*,*,0,*")] +) + +;;; ??? This should have alternatives for constants. +;;; ??? This was originally identical to the movdi_insn pattern. +;;; ??? The 'F' constraint looks funny, but it should always be replaced by +;;; thumb_reorg with a memory reference. +(define_insn "*thumb_movdf_insn" + [(set (match_operand:DF 0 "nonimmediate_operand" "=l,l,>,l, m,*r") + (match_operand:DF 1 "general_operand" "l, >,l,mF,l,*r"))] + "TARGET_THUMB1 + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + switch (which_alternative) + { + default: + case 0: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"add\\t%0, %1, #0\;add\\t%H0, %H1, #0\"; + return \"add\\t%H0, %H1, #0\;add\\t%0, %1, #0\"; + case 1: + return \"ldmia\\t%1, {%0, %H0}\"; + case 2: + return \"stmia\\t%0, {%1, %H1}\"; + case 3: + return thumb_load_double_from_address (operands); + case 4: + operands[2] = gen_rtx_MEM (SImode, + plus_constant (Pmode, + XEXP (operands[0], 0), 4)); + output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands); + return \"\"; + case 5: + if (REGNO (operands[1]) == REGNO (operands[0]) + 1) + return \"mov\\t%0, %1\;mov\\t%H0, %H1\"; + return \"mov\\t%H0, %H1\;mov\\t%0, %1\"; + } + " + [(set_attr "length" "4,2,2,6,4,4") + (set_attr "type" "multiple,load2,store2,load2,store2,multiple") + (set_attr "pool_range" "*,*,*,1018,*,*")] +) + + +;; load- and store-multiple insns +;; The arm can load/store any set of registers, provided that they are in +;; ascending order, but these expanders assume a contiguous set. + +(define_expand "load_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "TARGET_32BIT" +{ + HOST_WIDE_INT offset = 0; + + /* Support only fixed point registers. */ + if (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 14 + || INTVAL (operands[2]) < 2 + || !MEM_P (operands[1]) + || !REG_P (operands[0]) + || REGNO (operands[0]) > (LAST_ARM_REGNUM - 1) + || REGNO (operands[0]) + INTVAL (operands[2]) > LAST_ARM_REGNUM) + FAIL; + + operands[3] + = arm_gen_load_multiple (arm_regs_in_sequence + REGNO (operands[0]), + INTVAL (operands[2]), + force_reg (SImode, XEXP (operands[1], 0)), + FALSE, operands[1], &offset); +}) + +(define_expand "store_multiple" + [(match_par_dup 3 [(set (match_operand:SI 0 "" "") + (match_operand:SI 1 "" "")) + (use (match_operand:SI 2 "" ""))])] + "TARGET_32BIT" +{ + HOST_WIDE_INT offset = 0; + + /* Support only fixed point registers. */ + if (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 14 + || INTVAL (operands[2]) < 2 + || !REG_P (operands[1]) + || !MEM_P (operands[0]) + || REGNO (operands[1]) > (LAST_ARM_REGNUM - 1) + || REGNO (operands[1]) + INTVAL (operands[2]) > LAST_ARM_REGNUM) + FAIL; + + operands[3] + = arm_gen_store_multiple (arm_regs_in_sequence + REGNO (operands[1]), + INTVAL (operands[2]), + force_reg (SImode, XEXP (operands[0], 0)), + FALSE, operands[0], &offset); +}) + + +;; Move a block of memory if it is word aligned and MORE than 2 words long. +;; We could let this apply for blocks of less than this, but it clobbers so +;; many registers that there is then probably a better way. + +(define_expand "movmemqi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" + " + if (TARGET_32BIT) + { + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { + if (gen_movmem_ldrd_strd (operands)) + DONE; + FAIL; + } + + if (arm_gen_movmemqi (operands)) + DONE; + FAIL; + } + else /* TARGET_THUMB1 */ + { + if ( INTVAL (operands[3]) != 4 + || INTVAL (operands[2]) > 48) + FAIL; + + thumb_expand_movmemqi (operands); + DONE; + } + " +) + +;; Thumb block-move insns + +(define_insn "movmem12b" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) + (mem:SI (plus:SI (match_dup 3) (const_int 4)))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 8))) + (mem:SI (plus:SI (match_dup 3) (const_int 8)))) + (set (match_operand:SI 0 "register_operand" "=l") + (plus:SI (match_dup 2) (const_int 12))) + (set (match_operand:SI 1 "register_operand" "=l") + (plus:SI (match_dup 3) (const_int 12))) + (clobber (match_scratch:SI 4 "=&l")) + (clobber (match_scratch:SI 5 "=&l")) + (clobber (match_scratch:SI 6 "=&l"))] + "TARGET_THUMB1" + "* return thumb_output_move_mem_multiple (3, operands);" + [(set_attr "length" "4") + ; This isn't entirely accurate... It loads as well, but in terms of + ; scheduling the following insn it is better to consider it as a store + (set_attr "type" "store3")] +) + +(define_insn "movmem8b" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) + (mem:SI (plus:SI (match_dup 3) (const_int 4)))) + (set (match_operand:SI 0 "register_operand" "=l") + (plus:SI (match_dup 2) (const_int 8))) + (set (match_operand:SI 1 "register_operand" "=l") + (plus:SI (match_dup 3) (const_int 8))) + (clobber (match_scratch:SI 4 "=&l")) + (clobber (match_scratch:SI 5 "=&l"))] + "TARGET_THUMB1" + "* return thumb_output_move_mem_multiple (2, operands);" + [(set_attr "length" "4") + ; This isn't entirely accurate... It loads as well, but in terms of + ; scheduling the following insn it is better to consider it as a store + (set_attr "type" "store2")] +) + + + +;; Compare & branch insns +;; The range calculations are based as follows: +;; For forward branches, the address calculation returns the address of +;; the next instruction. This is 2 beyond the branch instruction. +;; For backward branches, the address calculation returns the address of +;; the first instruction in this pattern (cmp). This is 2 before the branch +;; instruction for the shortest sequence, and 4 before the branch instruction +;; if we have to jump around an unconditional branch. +;; To the basic branch range the PC offset must be added (this is +4). +;; So for forward branches we have +;; (pos_range - pos_base_offs + pc_offs) = (pos_range - 2 + 4). +;; And for backward branches we have +;; (neg_range - neg_base_offs + pc_offs) = (neg_range - (-2 or -4) + 4). +;; +;; For a 'b' pos_range = 2046, neg_range = -2048 giving (-2040->2048). +;; For a 'b' pos_range = 254, neg_range = -256 giving (-250 ->256). + +(define_expand "cbranchsi4" + [(set (pc) (if_then_else + (match_operator 0 "expandable_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_EITHER" + " + if (!TARGET_THUMB1) + { + if (!arm_validize_comparison (&operands[0], &operands[1], &operands[2])) + FAIL; + emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } + if (thumb1_cmpneg_operand (operands[2], SImode)) + { + emit_jump_insn (gen_cbranchsi4_scratch (NULL, operands[1], operands[2], + operands[3], operands[0])); + DONE; + } + if (!thumb1_cmp_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + ") + +;; A pattern to recognize a special situation and optimize for it. +;; On the thumb, zero-extension from memory is preferrable to sign-extension +;; due to the available addressing modes. Hence, convert a signed comparison +;; with zero into an unsigned comparison with 127 if possible. +(define_expand "cbranchqi4" + [(set (pc) (if_then_else + (match_operator 0 "lt_ge_comparison_operator" + [(match_operand:QI 1 "memory_operand" "") + (match_operand:QI 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" +{ + rtx xops[4]; + xops[1] = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendqisi2 (xops[1], operands[1])); + xops[2] = GEN_INT (127); + xops[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]) == GE ? LEU : GTU, + VOIDmode, xops[1], xops[2]); + xops[3] = operands[3]; + emit_insn (gen_cbranchsi4 (xops[0], xops[1], xops[2], xops[3])); + DONE; +}) + +(define_expand "cbranchsf4" + [(set (pc) (if_then_else + (match_operator 0 "expandable_comparison_operator" + [(match_operand:SF 1 "s_register_operand" "") + (match_operand:SF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); DONE;" +) + +(define_expand "cbranchdf4" + [(set (pc) (if_then_else + (match_operator 0 "expandable_comparison_operator" + [(match_operand:DF 1 "s_register_operand" "") + (match_operand:DF 2 "arm_float_compare_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); DONE;" +) + +(define_expand "cbranchdi4" + [(set (pc) (if_then_else + (match_operator 0 "expandable_comparison_operator" + [(match_operand:DI 1 "s_register_operand" "") + (match_operand:DI 2 "cmpdi_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT" + "{ + if (!arm_validize_comparison (&operands[0], &operands[1], &operands[2])) + FAIL; + emit_jump_insn (gen_cbranch_cc (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }" +) + +(define_insn "cbranchsi4_insn" + [(set (pc) (if_then_else + (match_operator 0 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "l,l*h") + (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" +{ + rtx t = cfun->machine->thumb1_cc_insn; + if (t != NULL_RTX) + { + if (!rtx_equal_p (cfun->machine->thumb1_cc_op0, operands[1]) + || !rtx_equal_p (cfun->machine->thumb1_cc_op1, operands[2])) + t = NULL_RTX; + if (cfun->machine->thumb1_cc_mode == CC_NOOVmode) + { + if (!noov_comparison_operator (operands[0], VOIDmode)) + t = NULL_RTX; + } + else if (cfun->machine->thumb1_cc_mode != CCmode) + t = NULL_RTX; + } + if (t == NULL_RTX) + { + output_asm_insn ("cmp\t%1, %2", operands); + cfun->machine->thumb1_cc_insn = insn; + cfun->machine->thumb1_cc_op0 = operands[1]; + cfun->machine->thumb1_cc_op1 = operands[2]; + cfun->machine->thumb1_cc_mode = CCmode; + } + else + /* Ensure we emit the right type of condition code on the jump. */ + XEXP (operands[0], 0) = gen_rtx_REG (cfun->machine->thumb1_cc_mode, + CC_REGNUM); + + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } +} + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + +(define_insn "cbranchsi4_scratch" + [(set (pc) (if_then_else + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "l,0") + (match_operand:SI 2 "thumb1_cmpneg_operand" "L,J")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 0 "=l,l"))] + "TARGET_THUMB1" + "* + output_asm_insn (\"add\\t%0, %1, #%n2\", operands); + + switch (get_attr_length (insn)) + { + case 4: return \"b%d4\\t%l3\"; + case 6: return \"b%D4\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D4\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + +(define_insn "*negated_cbranchsi4" + [(set (pc) + (if_then_else + (match_operator 0 "equality_operator" + [(match_operand:SI 1 "s_register_operand" "l") + (neg:SI (match_operand:SI 2 "s_register_operand" "l"))]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_THUMB1" + "* + output_asm_insn (\"cmn\\t%1, %2\", operands); + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + +(define_insn "*tbit_cbranch" + [(set (pc) + (if_then_else + (match_operator 0 "equality_operator" + [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "i")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=l"))] + "TARGET_THUMB1" + "* + { + rtx op[3]; + op[0] = operands[4]; + op[1] = operands[1]; + op[2] = GEN_INT (32 - 1 - INTVAL (operands[2])); + + output_asm_insn (\"lsl\\t%0, %1, %2\", op); + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + }" + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + +(define_insn "*tlobits_cbranch" + [(set (pc) + (if_then_else + (match_operator 0 "equality_operator" + [(zero_extract:SI (match_operand:SI 1 "s_register_operand" "l") + (match_operand:SI 2 "const_int_operand" "i") + (const_int 0)) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=l"))] + "TARGET_THUMB1" + "* + { + rtx op[3]; + op[0] = operands[4]; + op[1] = operands[1]; + op[2] = GEN_INT (32 - INTVAL (operands[2])); + + output_asm_insn (\"lsl\\t%0, %1, %2\", op); + switch (get_attr_length (insn)) + { + case 4: return \"b%d0\\t%l3\"; + case 6: return \"b%D0\\t.LCB%=\;b\\t%l3\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D0\\t.LCB%=\;bl\\t%l3\\t%@far jump\\n.LCB%=:\"; + } + }" + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 3) (pc)) (const_int -2040)) + (le (minus (match_dup 3) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + +(define_insn "*tstsi3_cbranch" + [(set (pc) + (if_then_else + (match_operator 3 "equality_operator" + [(and:SI (match_operand:SI 0 "s_register_operand" "%l") + (match_operand:SI 1 "s_register_operand" "l")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_THUMB1" + "* + { + output_asm_insn (\"tst\\t%0, %1\", operands); + switch (get_attr_length (insn)) + { + case 4: return \"b%d3\\t%l2\"; + case 6: return \"b%D3\\t.LCB%=\;b\\t%l2\\t%@long jump\\n.LCB%=:\"; + default: return \"b%D3\\t.LCB%=\;bl\\t%l2\\t%@far jump\\n.LCB%=:\"; + } + }" + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 2) (pc)) (const_int -250)) + (le (minus (match_dup 2) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 2) (pc)) (const_int -2040)) + (le (minus (match_dup 2) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + +(define_insn "*cbranchne_decr1" + [(set (pc) + (if_then_else (match_operator 3 "equality_operator" + [(match_operand:SI 2 "s_register_operand" "l,l,1,l") + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (set (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,*?h,*?m,*?m") + (plus:SI (match_dup 2) (const_int -1))) + (clobber (match_scratch:SI 1 "=X,l,&l,&l"))] + "TARGET_THUMB1" + "* + { + rtx cond[2]; + cond[0] = gen_rtx_fmt_ee ((GET_CODE (operands[3]) == NE + ? GEU : LTU), + VOIDmode, operands[2], const1_rtx); + cond[1] = operands[4]; + + if (which_alternative == 0) + output_asm_insn (\"sub\\t%0, %2, #1\", operands); + else if (which_alternative == 1) + { + /* We must provide an alternative for a hi reg because reload + cannot handle output reloads on a jump instruction, but we + can't subtract into that. Fortunately a mov from lo to hi + does not clobber the condition codes. */ + output_asm_insn (\"sub\\t%1, %2, #1\", operands); + output_asm_insn (\"mov\\t%0, %1\", operands); + } + else + { + /* Similarly, but the target is memory. */ + output_asm_insn (\"sub\\t%1, %2, #1\", operands); + output_asm_insn (\"str\\t%1, %0\", operands); + } + + switch (get_attr_length (insn) - (which_alternative ? 2 : 0)) + { + case 4: + output_asm_insn (\"b%d0\\t%l1\", cond); + return \"\"; + case 6: + output_asm_insn (\"b%D0\\t.LCB%=\", cond); + return \"b\\t%l4\\t%@long jump\\n.LCB%=:\"; + default: + output_asm_insn (\"b%D0\\t.LCB%=\", cond); + return \"bl\\t%l4\\t%@far jump\\n.LCB%=:\"; + } + } + " + [(set (attr "far_jump") + (if_then_else + (ior (and (eq (symbol_ref ("which_alternative")) + (const_int 0)) + (eq_attr "length" "8")) + (eq_attr "length" "10")) + (const_string "yes") + (const_string "no"))) + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -250)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2040)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))) + ;; Alternative 1 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -248)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10))) + ;; Alternative 2 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -248)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10))) + ;; Alternative 3 + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -248)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2038)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10)))]) + (set_attr "type" "multiple")] +) + +(define_insn "*addsi3_cbranch" + [(set (pc) + (if_then_else + (match_operator 4 "arm_comparison_operator" + [(plus:SI + (match_operand:SI 2 "s_register_operand" "%0,l,*l,1,1,1") + (match_operand:SI 3 "reg_or_int_operand" "IJ,lL,*l,lIJ,lIJ,lIJ")) + (const_int 0)]) + (label_ref (match_operand 5 "" "")) + (pc))) + (set + (match_operand:SI 0 "thumb_cbrch_target_operand" "=l,l,*!h,*?h,*?m,*?m") + (plus:SI (match_dup 2) (match_dup 3))) + (clobber (match_scratch:SI 1 "=X,X,l,l,&l,&l"))] + "TARGET_THUMB1 + && (GET_CODE (operands[4]) == EQ + || GET_CODE (operands[4]) == NE + || GET_CODE (operands[4]) == GE + || GET_CODE (operands[4]) == LT)" + "* + { + rtx cond[3]; + + cond[0] = (which_alternative < 2) ? operands[0] : operands[1]; + cond[1] = operands[2]; + cond[2] = operands[3]; + + if (CONST_INT_P (cond[2]) && INTVAL (cond[2]) < 0) + output_asm_insn (\"sub\\t%0, %1, #%n2\", cond); + else + output_asm_insn (\"add\\t%0, %1, %2\", cond); + + if (which_alternative >= 2 + && which_alternative < 4) + output_asm_insn (\"mov\\t%0, %1\", operands); + else if (which_alternative >= 4) + output_asm_insn (\"str\\t%1, %0\", operands); + + switch (get_attr_length (insn) - ((which_alternative >= 2) ? 2 : 0)) + { + case 4: + return \"b%d4\\t%l5\"; + case 6: + return \"b%D4\\t.LCB%=\;b\\t%l5\\t%@long jump\\n.LCB%=:\"; + default: + return \"b%D4\\t.LCB%=\;bl\\t%l5\\t%@far jump\\n.LCB%=:\"; + } + } + " + [(set (attr "far_jump") + (if_then_else + (ior (and (lt (symbol_ref ("which_alternative")) + (const_int 2)) + (eq_attr "length" "8")) + (eq_attr "length" "10")) + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (lt (symbol_ref ("which_alternative")) + (const_int 2)) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -250)) + (le (minus (match_dup 5) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -2040)) + (le (minus (match_dup 5) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8))) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -248)) + (le (minus (match_dup 5) (pc)) (const_int 256))) + (const_int 6) + (if_then_else + (and (ge (minus (match_dup 5) (pc)) (const_int -2038)) + (le (minus (match_dup 5) (pc)) (const_int 2048))) + (const_int 8) + (const_int 10))))) + (set_attr "type" "multiple")] +) + +(define_insn "*addsi3_cbranch_scratch" + [(set (pc) + (if_then_else + (match_operator 3 "arm_comparison_operator" + [(plus:SI + (match_operand:SI 1 "s_register_operand" "%l,l,l,0") + (match_operand:SI 2 "reg_or_int_operand" "J,l,L,IJ")) + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (match_scratch:SI 0 "=X,X,l,l"))] + "TARGET_THUMB1 + && (GET_CODE (operands[3]) == EQ + || GET_CODE (operands[3]) == NE + || GET_CODE (operands[3]) == GE + || GET_CODE (operands[3]) == LT)" + "* + { + switch (which_alternative) + { + case 0: + output_asm_insn (\"cmp\t%1, #%n2\", operands); + break; + case 1: + output_asm_insn (\"cmn\t%1, %2\", operands); + break; + case 2: + if (INTVAL (operands[2]) < 0) + output_asm_insn (\"sub\t%0, %1, %2\", operands); + else + output_asm_insn (\"add\t%0, %1, %2\", operands); + break; + case 3: + if (INTVAL (operands[2]) < 0) + output_asm_insn (\"sub\t%0, %0, %2\", operands); + else + output_asm_insn (\"add\t%0, %0, %2\", operands); + break; + } + + switch (get_attr_length (insn)) + { + case 4: + return \"b%d3\\t%l4\"; + case 6: + return \"b%D3\\t.LCB%=\;b\\t%l4\\t%@long jump\\n.LCB%=:\"; + default: + return \"b%D3\\t.LCB%=\;bl\\t%l4\\t%@far jump\\n.LCB%=:\"; + } + } + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "8") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -250)) + (le (minus (match_dup 4) (pc)) (const_int 256))) + (const_int 4) + (if_then_else + (and (ge (minus (match_dup 4) (pc)) (const_int -2040)) + (le (minus (match_dup 4) (pc)) (const_int 2048))) + (const_int 6) + (const_int 8)))) + (set_attr "type" "multiple")] +) + + +;; Comparison and test insns + +(define_insn "*arm_cmpsi_insn" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r,r") + (match_operand:SI 1 "arm_add_operand" "Py,r,r,I,L")))] + "TARGET_32BIT" + "@ + cmp%?\\t%0, %1 + cmp%?\\t%0, %1 + cmp%?\\t%0, %1 + cmp%?\\t%0, %1 + cmn%?\\t%0, #%n1" + [(set_attr "conds" "set") + (set_attr "arch" "t2,t2,any,any,any") + (set_attr "length" "2,2,4,4,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,yes,yes,no,no") + (set_attr "type" "alus_imm,alus_reg,alus_reg,alus_imm,alus_imm")] +) + +(define_insn "*cmpsi_shiftsi" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "s_register_operand" "r,r,r") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "shift_amount_operand" "M,r,M")])))] + "TARGET_32BIT" + "cmp\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) + +(define_insn "*cmpsi_shiftsi_swp" + [(set (reg:CC_SWP CC_REGNUM) + (compare:CC_SWP (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operand:SI 2 "shift_amount_operand" "M,r,M")]) + (match_operand:SI 0 "s_register_operand" "r,r,r")))] + "TARGET_32BIT" + "cmp%?\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alu_shift_reg,alus_shift_imm")]) + +(define_insn "*arm_cmpsi_negshiftsi_si" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (neg:SI (match_operator:SI 1 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "reg_or_int_operand" "rM")])) + (match_operand:SI 0 "s_register_operand" "r")))] + "TARGET_ARM" + "cmn%?\\t%0, %2%S1" + [(set_attr "conds" "set") + (set (attr "type") (if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alus_shift_imm") + (const_string "alus_shift_reg"))) + (set_attr "predicable" "yes")] +) + +;; DImode comparisons. The generic code generates branches that +;; if-conversion can not reduce to a conditional compare, so we do +;; that directly. + +(define_insn_and_split "*arm_cmpdi_insn" + [(set (reg:CC_NCV CC_REGNUM) + (compare:CC_NCV (match_operand:DI 0 "s_register_operand" "r") + (match_operand:DI 1 "arm_di_operand" "rDi"))) + (clobber (match_scratch:SI 2 "=r"))] + "TARGET_32BIT" + "#" ; "cmp\\t%Q0, %Q1\;sbcs\\t%2, %R0, %R1" + "&& reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (match_dup 1))) + (parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 3) (match_dup 4))) + (set (match_dup 2) + (minus:SI (match_dup 5) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])] + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + if (CONST_INT_P (operands[1])) + { + operands[4] = GEN_INT (~INTVAL (gen_highpart_mode (SImode, + DImode, + operands[1]))); + operands[5] = gen_rtx_PLUS (SImode, operands[3], operands[4]); + } + else + { + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_rtx_MINUS (SImode, operands[3], operands[4]); + } + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_lowpart (SImode, operands[2]); + } + [(set_attr "conds" "set") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*arm_cmpdi_unsigned" + [(set (reg:CC_CZ CC_REGNUM) + (compare:CC_CZ (match_operand:DI 0 "s_register_operand" "l,r,r") + (match_operand:DI 1 "arm_di_operand" "Py,r,rDi")))] + + "TARGET_32BIT" + "#" ; "cmp\\t%R0, %R1\;it eq\;cmpeq\\t%Q0, %Q1" + "&& reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) (match_dup 3))) + (cond_exec (eq:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (match_dup 1))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + if (CONST_INT_P (operands[1])) + operands[3] = gen_highpart_mode (SImode, DImode, operands[1]); + else + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "conds" "set") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "arch" "t2,t2,*") + (set_attr "length" "6,6,8") + (set_attr "type" "multiple")] +) + +(define_insn "*arm_cmpdi_zero" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:DI 0 "s_register_operand" "r") + (const_int 0))) + (clobber (match_scratch:SI 1 "=r"))] + "TARGET_32BIT" + "orr%.\\t%1, %Q0, %R0" + [(set_attr "conds" "set") + (set_attr "type" "logics_reg")] +) + +(define_insn "*thumb_cmpdi_zero" + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z (match_operand:DI 0 "s_register_operand" "l") + (const_int 0))) + (clobber (match_scratch:SI 1 "=l"))] + "TARGET_THUMB1" + "orr\\t%1, %Q0, %R0" + [(set_attr "conds" "set") + (set_attr "length" "2") + (set_attr "type" "logics_reg")] +) + +; This insn allows redundant compares to be removed by cse, nothing should +; ever appear in the output file since (set (reg x) (reg x)) is a no-op that +; is deleted later on. The match_dup will match the mode here, so that +; mode changes of the condition codes aren't lost by this even though we don't +; specify what they are. + +(define_insn "*deleted_compare" + [(set (match_operand 0 "cc_register" "") (match_dup 0))] + "TARGET_32BIT" + "\\t%@ deleted compare" + [(set_attr "conds" "set") + (set_attr "length" "0") + (set_attr "type" "no_insn")] +) + + +;; Conditional branch insns + +(define_expand "cbranch_cc" + [(set (pc) + (if_then_else (match_operator 0 "" [(match_operand 1 "" "") + (match_operand 2 "" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_32BIT" + "operands[1] = arm_gen_compare_reg (GET_CODE (operands[0]), + operands[1], operands[2], NULL_RTX); + operands[2] = const0_rtx;" +) + +;; +;; Patterns to match conditional branch insns. +;; + +(define_insn "arm_cond_branch" + [(set (pc) + (if_then_else (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_32BIT" + "* + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return \"b%d1\\t%l0\"; + " + [(set_attr "conds" "use") + (set_attr "type" "branch") + (set (attr "length") + (if_then_else + (and (match_test "TARGET_THUMB2") + (and (ge (minus (match_dup 0) (pc)) (const_int -250)) + (le (minus (match_dup 0) (pc)) (const_int 256)))) + (const_int 2) + (const_int 4)))] +) + +(define_insn "*arm_cond_branch_reversed" + [(set (pc) + (if_then_else (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "TARGET_32BIT" + "* + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return \"b%D1\\t%l0\"; + " + [(set_attr "conds" "use") + (set_attr "type" "branch") + (set (attr "length") + (if_then_else + (and (match_test "TARGET_THUMB2") + (and (ge (minus (match_dup 0) (pc)) (const_int -250)) + (le (minus (match_dup 0) (pc)) (const_int 256)))) + (const_int 2) + (const_int 4)))] +) + + + +; scc insns + +(define_expand "cstore_cc" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "" [(match_operand 2 "" "") + (match_operand 3 "" "")]))] + "TARGET_32BIT" + "operands[2] = arm_gen_compare_reg (GET_CODE (operands[1]), + operands[2], operands[3], NULL_RTX); + operands[3] = const0_rtx;" +) + +(define_insn_and_split "*mov_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]))] + "TARGET_ARM" + "#" ; "mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + "TARGET_ARM" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (const_int 1) + (const_int 0)))] + "" + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*mov_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_ARM" + "#" ; "mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + "TARGET_ARM" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (const_int 0)))] + { + operands[3] = GEN_INT (~0); + } + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*mov_notscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_ARM" + "#" ; "mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + "TARGET_ARM" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (match_dup 4)))] + { + operands[3] = GEN_INT (~1); + operands[4] = GEN_INT (~0); + } + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "reg_or_int_operand" "")]))] + "TARGET_32BIT || TARGET_THUMB1" + "{ + rtx op3, scratch, scratch2; + + if (!TARGET_THUMB1) + { + if (!arm_add_operand (operands[3], SImode)) + operands[3] = force_reg (SImode, operands[3]); + emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (operands[3] == const0_rtx) + { + switch (GET_CODE (operands[1])) + { + case EQ: + emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], operands[2])); + break; + + case NE: + emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], operands[2])); + break; + + case LE: + scratch = expand_binop (SImode, add_optab, operands[2], constm1_rtx, + NULL_RTX, 0, OPTAB_WIDEN); + scratch = expand_binop (SImode, ior_optab, operands[2], scratch, + NULL_RTX, 0, OPTAB_WIDEN); + expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), + operands[0], 1, OPTAB_WIDEN); + break; + + case GE: + scratch = expand_unop (SImode, one_cmpl_optab, operands[2], + NULL_RTX, 1); + expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), + NULL_RTX, 1, OPTAB_WIDEN); + break; + + case GT: + scratch = expand_binop (SImode, ashr_optab, operands[2], + GEN_INT (31), NULL_RTX, 0, OPTAB_WIDEN); + scratch = expand_binop (SImode, sub_optab, scratch, operands[2], + NULL_RTX, 0, OPTAB_WIDEN); + expand_binop (SImode, lshr_optab, scratch, GEN_INT (31), operands[0], + 0, OPTAB_WIDEN); + break; + + /* LT is handled by generic code. No need for unsigned with 0. */ + default: + FAIL; + } + DONE; + } + + switch (GET_CODE (operands[1])) + { + case EQ: + scratch = expand_binop (SImode, sub_optab, operands[2], operands[3], + NULL_RTX, 0, OPTAB_WIDEN); + emit_insn (gen_cstoresi_eq0_thumb1 (operands[0], scratch)); + break; + + case NE: + scratch = expand_binop (SImode, sub_optab, operands[2], operands[3], + NULL_RTX, 0, OPTAB_WIDEN); + emit_insn (gen_cstoresi_ne0_thumb1 (operands[0], scratch)); + break; + + case LE: + op3 = force_reg (SImode, operands[3]); + + scratch = expand_binop (SImode, lshr_optab, operands[2], GEN_INT (31), + NULL_RTX, 1, OPTAB_WIDEN); + scratch2 = expand_binop (SImode, ashr_optab, op3, GEN_INT (31), + NULL_RTX, 0, OPTAB_WIDEN); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2, + op3, operands[2])); + break; + + case GE: + op3 = operands[3]; + if (!thumb1_cmp_operand (op3, SImode)) + op3 = force_reg (SImode, op3); + scratch = expand_binop (SImode, ashr_optab, operands[2], GEN_INT (31), + NULL_RTX, 0, OPTAB_WIDEN); + scratch2 = expand_binop (SImode, lshr_optab, op3, GEN_INT (31), + NULL_RTX, 1, OPTAB_WIDEN); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch2, + operands[2], op3)); + break; + + case LEU: + op3 = force_reg (SImode, operands[3]); + scratch = force_reg (SImode, const0_rtx); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch, + op3, operands[2])); + break; + + case GEU: + op3 = operands[3]; + if (!thumb1_cmp_operand (op3, SImode)) + op3 = force_reg (SImode, op3); + scratch = force_reg (SImode, const0_rtx); + emit_insn (gen_thumb1_addsi3_addgeu (operands[0], scratch, scratch, + operands[2], op3)); + break; + + case LTU: + op3 = operands[3]; + if (!thumb1_cmp_operand (op3, SImode)) + op3 = force_reg (SImode, op3); + scratch = gen_reg_rtx (SImode); + emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], operands[2], op3)); + break; + + case GTU: + op3 = force_reg (SImode, operands[3]); + scratch = gen_reg_rtx (SImode); + emit_insn (gen_cstoresi_ltu_thumb1 (operands[0], op3, operands[2])); + break; + + /* No good sequences for GT, LT. */ + default: + FAIL; + } + DONE; +}") + +(define_expand "cstoresf4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + "emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); DONE;" +) + +(define_expand "cstoredf4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "arm_float_compare_operand" "")]))] + "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" + "emit_insn (gen_cstore_cc (operands[0], operands[1], + operands[2], operands[3])); DONE;" +) + +(define_expand "cstoredi4" + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "expandable_comparison_operator" + [(match_operand:DI 2 "s_register_operand" "") + (match_operand:DI 3 "cmpdi_operand" "")]))] + "TARGET_32BIT" + "{ + if (!arm_validize_comparison (&operands[1], + &operands[2], + &operands[3])) + FAIL; + emit_insn (gen_cstore_cc (operands[0], operands[1], operands[2], + operands[3])); + DONE; + }" +) + +(define_expand "cstoresi_eq0_thumb1" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (match_dup:SI 2))])] + "TARGET_THUMB1" + "operands[2] = gen_reg_rtx (SImode);" +) + +(define_expand "cstoresi_ne0_thumb1" + [(parallel + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (match_dup:SI 2))])] + "TARGET_THUMB1" + "operands[2] = gen_reg_rtx (SImode);" +) + +(define_insn "*cstoresi_eq0_thumb1_insn" + [(set (match_operand:SI 0 "s_register_operand" "=&l,l") + (eq:SI (match_operand:SI 1 "s_register_operand" "l,0") + (const_int 0))) + (clobber (match_operand:SI 2 "s_register_operand" "=X,l"))] + "TARGET_THUMB1" + "@ + neg\\t%0, %1\;adc\\t%0, %0, %1 + neg\\t%2, %1\;adc\\t%0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + +(define_insn "*cstoresi_ne0_thumb1_insn" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (ne:SI (match_operand:SI 1 "s_register_operand" "0") + (const_int 0))) + (clobber (match_operand:SI 2 "s_register_operand" "=l"))] + "TARGET_THUMB1" + "sub\\t%2, %1, #1\;sbc\\t%0, %1, %2" + [(set_attr "length" "4")] +) + +;; Used as part of the expansion of thumb ltu and gtu sequences +(define_insn "cstoresi_nltu_thumb1" + [(set (match_operand:SI 0 "s_register_operand" "=l,l") + (neg:SI (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h") + (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r"))))] + "TARGET_THUMB1" + "cmp\\t%1, %2\;sbc\\t%0, %0, %0" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "cstoresi_ltu_thumb1" + [(set (match_operand:SI 0 "s_register_operand" "=l,l") + (ltu:SI (match_operand:SI 1 "s_register_operand" "l,*h") + (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")))] + "TARGET_THUMB1" + "#" + "TARGET_THUMB1" + [(set (match_dup 3) + (neg:SI (ltu:SI (match_dup 1) (match_dup 2)))) + (set (match_dup 0) (neg:SI (match_dup 3)))] + "operands[3] = gen_reg_rtx (SImode);" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + +;; Used as part of the expansion of thumb les sequence. +(define_insn "thumb1_addsi3_addgeu" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%0") + (match_operand:SI 2 "s_register_operand" "l")) + (geu:SI (match_operand:SI 3 "s_register_operand" "l") + (match_operand:SI 4 "thumb1_cmp_operand" "lI"))))] + "TARGET_THUMB1" + "cmp\\t%3, %4\;adc\\t%0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + + +;; Conditional move insns + +(define_expand "movsicc" + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operand 1 "expandable_comparison_operator" "") + (match_operand:SI 2 "arm_not_operand" "") + (match_operand:SI 3 "arm_not_operand" "")))] + "TARGET_32BIT" + " + { + enum rtx_code code; + rtx ccreg; + + if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), + &XEXP (operands[1], 1))) + FAIL; + + code = GET_CODE (operands[1]); + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1), NULL_RTX); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_expand "movsfcc" + [(set (match_operand:SF 0 "s_register_operand" "") + (if_then_else:SF (match_operand 1 "arm_cond_move_operator" "") + (match_operand:SF 2 "s_register_operand" "") + (match_operand:SF 3 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT" + " + { + enum rtx_code code = GET_CODE (operands[1]); + rtx ccreg; + + if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), + &XEXP (operands[1], 1))) + FAIL; + + code = GET_CODE (operands[1]); + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1), NULL_RTX); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_expand "movdfcc" + [(set (match_operand:DF 0 "s_register_operand" "") + (if_then_else:DF (match_operand 1 "arm_cond_move_operator" "") + (match_operand:DF 2 "s_register_operand" "") + (match_operand:DF 3 "s_register_operand" "")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + " + { + enum rtx_code code = GET_CODE (operands[1]); + rtx ccreg; + + if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), + &XEXP (operands[1], 1))) + FAIL; + code = GET_CODE (operands[1]); + ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1), NULL_RTX); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + }" +) + +(define_insn "*cmov" + [(set (match_operand:SDF 0 "s_register_operand" "=") + (if_then_else:SDF (match_operator 1 "arm_vsel_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SDF 3 "s_register_operand" + "") + (match_operand:SDF 4 "s_register_operand" + "")))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 " + "* + { + enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]); + switch (code) + { + case ARM_GE: + case ARM_GT: + case ARM_EQ: + case ARM_VS: + return \"vsel%d1.\\t%0, %3, %4\"; + case ARM_LT: + case ARM_LE: + case ARM_NE: + case ARM_VC: + return \"vsel%D1.\\t%0, %4, %3\"; + default: + gcc_unreachable (); + } + return \"\"; + }" + [(set_attr "conds" "use") + (set_attr "type" "f_sel")] +) + +(define_insn_and_split "*movsicc_insn" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") + (if_then_else:SI + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K") + (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))] + "TARGET_ARM" + "@ + mov%D3\\t%0, %2 + mvn%D3\\t%0, #%B2 + mov%d3\\t%0, %1 + mvn%d3\\t%0, #%B1 + # + # + # + #" + ; alt4: mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ; alt5: mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + ; alt6: mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + ; alt7: mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2" + "&& reload_completed" + [(const_int 0)] + { + enum rtx_code rev_code; + enum machine_mode mode; + rtx rev_cond; + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + operands[3], + gen_rtx_SET (VOIDmode, + operands[0], + operands[1]))); + + rev_code = GET_CODE (operands[3]); + mode = GET_MODE (operands[4]); + if (mode == CCFPmode || mode == CCFPEmode) + rev_code = reverse_condition_maybe_unordered (rev_code); + else + rev_code = reverse_condition (rev_code); + + rev_cond = gen_rtx_fmt_ee (rev_code, + VOIDmode, + operands[4], + const0_rtx); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + rev_cond, + gen_rtx_SET (VOIDmode, + operands[0], + operands[2]))); + DONE; + } + [(set_attr "length" "4,4,4,4,8,8,8,8") + (set_attr "conds" "use") + (set_attr_alternative "type" + [(if_then_else (match_operand 2 "const_int_operand" "") + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") + (if_then_else (match_operand 1 "const_int_operand" "") + (const_string "mov_imm") + (const_string "mov_reg")) + (const_string "mvn_imm") + (const_string "mov_reg") + (const_string "mov_reg") + (const_string "mov_reg") + (const_string "mov_reg")])] +) + +(define_insn "*movsfcc_soft_insn" + [(set (match_operand:SF 0 "s_register_operand" "=r,r") + (if_then_else:SF (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,r") + (match_operand:SF 2 "s_register_operand" "r,0")))] + "TARGET_ARM && TARGET_SOFT_FLOAT" + "@ + mov%D3\\t%0, %2 + mov%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "type" "mov_reg")] +) + + +;; Jump and linkage insns + +(define_expand "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_EITHER" + "" +) + +(define_insn "*arm_jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_32BIT" + "* + { + if (arm_ccfsm_state == 1 || arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return \"b%?\\t%l0\"; + } + " + [(set_attr "predicable" "yes") + (set (attr "length") + (if_then_else + (and (match_test "TARGET_THUMB2") + (and (ge (minus (match_dup 0) (pc)) (const_int -2044)) + (le (minus (match_dup 0) (pc)) (const_int 2048)))) + (const_int 2) + (const_int 4))) + (set_attr "type" "branch")] +) + +(define_insn "*thumb_jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_THUMB1" + "* + if (get_attr_length (insn) == 2) + return \"b\\t%l0\"; + return \"bl\\t%l0\\t%@ far jump\"; + " + [(set (attr "far_jump") + (if_then_else + (eq_attr "length" "4") + (const_string "yes") + (const_string "no"))) + (set (attr "length") + (if_then_else + (and (ge (minus (match_dup 0) (pc)) (const_int -2044)) + (le (minus (match_dup 0) (pc)) (const_int 2048))) + (const_int 2) + (const_int 4))) + (set_attr "type" "branch")] +) + +(define_expand "call" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_EITHER" + " + { + rtx callee, pat; + + /* In an untyped call, we can get NULL for operand 2. */ + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 32-bit address of the callee into a register before performing the + branch and link. */ + callee = XEXP (operands[0], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? arm_is_long_call_p (SYMBOL_REF_DECL (callee)) + : !REG_P (callee)) + XEXP (operands[0], 0) = force_reg (Pmode, callee); + + pat = gen_call_internal (operands[0], operands[1], operands[2]); + arm_emit_call_insn (pat, XEXP (operands[0], 0)); + DONE; + }" +) + +(define_expand "call_internal" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))])]) + +(define_insn "*call_reg_armv5" + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)" + "blx%?\\t%0" + [(set_attr "type" "call")] +) + +(define_insn "*call_reg_arm" + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)" + "* + return output_call (operands); + " + ;; length is worst case, normally it is only two + [(set_attr "length" "12") + (set_attr "type" "call")] +) + + +;; Note: not used for armv5+ because the sequence used (ldr pc, ...) is not +;; considered a function call by the branch predictor of some cores (PR40887). +;; Falls back to blx rN (*call_reg_armv5). + +(define_insn "*call_mem" + [(call (mem:SI (match_operand:SI 0 "call_memory_operand" "m")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)" + "* + return output_call_mem (operands); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +(define_insn "*call_reg_thumb1_v5" + [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && arm_arch5 && !SIBLING_CALL_P (insn)" + "blx\\t%0" + [(set_attr "length" "2") + (set_attr "type" "call")] +) + +(define_insn "*call_reg_thumb1" + [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && !arm_arch5 && !SIBLING_CALL_P (insn)" + "* + { + if (!TARGET_CALLER_INTERWORKING) + return thumb_call_via_reg (operands[0]); + else if (operands[1] == const0_rtx) + return \"bl\\t%__interwork_call_via_%0\"; + else if (frame_pointer_needed) + return \"bl\\t%__interwork_r7_call_via_%0\"; + else + return \"bl\\t%__interwork_r11_call_via_%0\"; + }" + [(set_attr "type" "call")] +) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_EITHER" + " + { + rtx pat, callee; + + /* In an untyped call, we can get NULL for operand 2. */ + if (operands[3] == 0) + operands[3] = const0_rtx; + + /* Decide if we should generate indirect calls by loading the + 32-bit address of the callee into a register before performing the + branch and link. */ + callee = XEXP (operands[1], 0); + if (GET_CODE (callee) == SYMBOL_REF + ? arm_is_long_call_p (SYMBOL_REF_DECL (callee)) + : !REG_P (callee)) + XEXP (operands[1], 0) = force_reg (Pmode, callee); + + pat = gen_call_value_internal (operands[0], operands[1], + operands[2], operands[3]); + arm_emit_call_insn (pat, XEXP (operands[1], 0)); + DONE; + }" +) + +(define_expand "call_value_internal" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))])]) + +(define_insn "*call_value_reg_armv5" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && arm_arch5 && !SIBLING_CALL_P (insn)" + "blx%?\\t%1" + [(set_attr "type" "call")] +) + +(define_insn "*call_value_reg_arm" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5 && !SIBLING_CALL_P (insn)" + "* + return output_call (&operands[1]); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +;; Note: see *call_mem + +(define_insn "*call_value_mem" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "call_memory_operand" "m")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_ARM && !arm_arch5 && (!CONSTANT_ADDRESS_P (XEXP (operands[1], 0))) + && !SIBLING_CALL_P (insn)" + "* + return output_call_mem (&operands[1]); + " + [(set_attr "length" "12") + (set_attr "type" "call")] +) + +(define_insn "*call_value_reg_thumb1_v5" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && arm_arch5" + "blx\\t%1" + [(set_attr "length" "2") + (set_attr "type" "call")] +) + +(define_insn "*call_value_reg_thumb1" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 && !arm_arch5" + "* + { + if (!TARGET_CALLER_INTERWORKING) + return thumb_call_via_reg (operands[1]); + else if (operands[2] == const0_rtx) + return \"bl\\t%__interwork_call_via_%1\"; + else if (frame_pointer_needed) + return \"bl\\t%__interwork_r7_call_via_%1\"; + else + return \"bl\\t%__interwork_r11_call_via_%1\"; + }" + [(set_attr "type" "call")] +) + +;; Allow calls to SYMBOL_REFs specially as they are not valid general addresses +;; The 'a' causes the operand to be treated as an address, i.e. no '#' output. + +(define_insn "*call_symbol" + [(call (mem:SI (match_operand:SI 0 "" "")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_32BIT + && !SIBLING_CALL_P (insn) + && (GET_CODE (operands[0]) == SYMBOL_REF) + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))" + "* + { + return NEED_PLT_RELOC ? \"bl%?\\t%a0(PLT)\" : \"bl%?\\t%a0\"; + }" + [(set_attr "type" "call")] +) + +(define_insn "*call_value_symbol" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "" "")) + (match_operand:SI 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_32BIT + && !SIBLING_CALL_P (insn) + && (GET_CODE (operands[1]) == SYMBOL_REF) + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "* + { + return NEED_PLT_RELOC ? \"bl%?\\t%a1(PLT)\" : \"bl%?\\t%a1\"; + }" + [(set_attr "type" "call")] +) + +(define_insn "*call_insn" + [(call (mem:SI (match_operand:SI 0 "" "")) + (match_operand:SI 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 + && GET_CODE (operands[0]) == SYMBOL_REF + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[0]))" + "bl\\t%a0" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + +(define_insn "*call_value_insn" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand 1 "" "")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB1 + && GET_CODE (operands[1]) == SYMBOL_REF + && !arm_is_long_call_p (SYMBOL_REF_DECL (operands[1]))" + "bl\\t%a1" + [(set_attr "length" "4") + (set_attr "type" "call")] +) + +;; We may also be able to do sibcalls for Thumb, but it's much harder... +(define_expand "sibcall" + [(parallel [(call (match_operand 0 "memory_operand" "") + (match_operand 1 "general_operand" "")) + (return) + (use (match_operand 2 "" ""))])] + "TARGET_32BIT" + " + { + if (!REG_P (XEXP (operands[0], 0)) + && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) + XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0)); + + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + }" +) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "general_operand" ""))) + (return) + (use (match_operand 3 "" ""))])] + "TARGET_32BIT" + " + { + if (!REG_P (XEXP (operands[1], 0)) && + (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF)) + XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0)); + + if (operands[3] == NULL_RTX) + operands[3] = const0_rtx; + }" +) + +(define_insn "*sibcall_insn" + [(call (mem:SI (match_operand:SI 0 "call_insn_operand" "Cs, US")) + (match_operand 1 "" "")) + (return) + (use (match_operand 2 "" ""))] + "TARGET_32BIT && SIBLING_CALL_P (insn)" + "* + if (which_alternative == 1) + return NEED_PLT_RELOC ? \"b%?\\t%a0(PLT)\" : \"b%?\\t%a0\"; + else + { + if (arm_arch5 || arm_arch4t) + return \"bx%?\\t%0\\t%@ indirect register sibling call\"; + else + return \"mov%?\\t%|pc, %0\\t%@ indirect register sibling call\"; + } + " + [(set_attr "type" "call")] +) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "call_insn_operand" "Cs,US")) + (match_operand 2 "" ""))) + (return) + (use (match_operand 3 "" ""))] + "TARGET_32BIT && SIBLING_CALL_P (insn)" + "* + if (which_alternative == 1) + return NEED_PLT_RELOC ? \"b%?\\t%a1(PLT)\" : \"b%?\\t%a1\"; + else + { + if (arm_arch5 || arm_arch4t) + return \"bx%?\\t%1\"; + else + return \"mov%?\\t%|pc, %1\\t@ indirect sibling call \"; + } + " + [(set_attr "type" "call")] +) + +(define_expand "return" + [(returns)] + "(TARGET_ARM || (TARGET_THUMB2 + && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL + && !IS_STACKALIGN (arm_current_func_type ()))) + " + " + { + if (TARGET_THUMB2) + { + thumb2_expand_return (); + DONE; + } + } + " +) + +;; Often the return insn will be the same as loading from memory, so set attr +(define_insn "*arm_return" + [(return)] + "TARGET_ARM && USE_RETURN_INSN (FALSE)" + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (const_true_rtx, true, false, false); + }" + [(set_attr "type" "load1") + (set_attr "length" "12") + (set_attr "predicable" "yes")] +) + +(define_insn "*cond_return" + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (returns) + (pc)))] + "TARGET_ARM " + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (operands[0], true, false, + ); + }" + [(set_attr "conds" "use") + (set_attr "length" "12") + (set_attr "type" "load1")] +) + +(define_insn "*cond_return_inverted" + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (pc) + (returns)))] + "TARGET_ARM " + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (operands[0], true, true, + ); + }" + [(set_attr "conds" "use") + (set_attr "length" "12") + (set_attr "type" "load1")] +) + +(define_insn "*arm_simple_return" + [(simple_return)] + "TARGET_ARM" + "* + { + if (arm_ccfsm_state == 2) + { + arm_ccfsm_state += 2; + return \"\"; + } + return output_return_instruction (const_true_rtx, true, false, true); + }" + [(set_attr "type" "branch") + (set_attr "length" "4") + (set_attr "predicable" "yes")] +) + +;; Generate a sequence of instructions to determine if the processor is +;; in 26-bit or 32-bit mode, and return the appropriate return address +;; mask. + +(define_expand "return_addr_mask" + [(set (match_dup 1) + (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (eq (match_dup 1) (const_int 0)) + (const_int -1) + (const_int 67108860)))] ; 0x03fffffc + "TARGET_ARM" + " + operands[1] = gen_rtx_REG (CC_NOOVmode, CC_REGNUM); + ") + +(define_insn "*check_arch2" + [(set (match_operand:CC_NOOV 0 "cc_register" "") + (compare:CC_NOOV (unspec [(const_int 0)] UNSPEC_CHECK_ARCH) + (const_int 0)))] + "TARGET_ARM" + "teq\\t%|r0, %|r0\;teq\\t%|pc, %|pc" + [(set_attr "length" "8") + (set_attr "conds" "set") + (set_attr "type" "multiple")] +) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "TARGET_EITHER" + " + { + int i; + rtx par = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (XVECLEN (operands[2], 0))); + rtx addr = gen_reg_rtx (Pmode); + rtx mem; + int size = 0; + + emit_move_insn (addr, XEXP (operands[1], 0)); + mem = change_address (operands[1], BLKmode, addr); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx src = SET_SRC (XVECEXP (operands[2], 0, i)); + + /* Default code only uses r0 as a return value, but we could + be using anything up to 4 registers. */ + if (REGNO (src) == R0_REGNUM) + src = gen_rtx_REG (TImode, R0_REGNUM); + + XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, src, + GEN_INT (size)); + size += GET_MODE_SIZE (GET_MODE (src)); + } + + emit_call_insn (GEN_CALL_VALUE (par, operands[0], const0_rtx, NULL, + const0_rtx)); + + size = 0; + + for (i = 0; i < XVECLEN (par, 0); i++) + { + HOST_WIDE_INT offset = 0; + rtx reg = XEXP (XVECEXP (par, 0, i), 0); + + if (size != 0) + emit_move_insn (addr, plus_constant (Pmode, addr, size)); + + mem = change_address (mem, GET_MODE (reg), NULL); + if (REGNO (reg) == R0_REGNUM) + { + /* On thumb we have to use a write-back instruction. */ + emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, addr, + TARGET_THUMB ? TRUE : FALSE, mem, &offset)); + size = TARGET_ARM ? 16 : 0; + } + else + { + emit_move_insn (mem, reg); + size = GET_MODE_SIZE (GET_MODE (reg)); + } + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; + }" +) + +(define_expand "untyped_return" + [(match_operand:BLK 0 "memory_operand" "") + (match_operand 1 "" "")] + "TARGET_EITHER" + " + { + int i; + rtx addr = gen_reg_rtx (Pmode); + rtx mem; + int size = 0; + + emit_move_insn (addr, XEXP (operands[0], 0)); + mem = change_address (operands[0], BLKmode, addr); + + for (i = 0; i < XVECLEN (operands[1], 0); i++) + { + HOST_WIDE_INT offset = 0; + rtx reg = SET_DEST (XVECEXP (operands[1], 0, i)); + + if (size != 0) + emit_move_insn (addr, plus_constant (Pmode, addr, size)); + + mem = change_address (mem, GET_MODE (reg), NULL); + if (REGNO (reg) == R0_REGNUM) + { + /* On thumb we have to use a write-back instruction. */ + emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, addr, + TARGET_THUMB ? TRUE : FALSE, mem, &offset)); + size = TARGET_ARM ? 16 : 0; + } + else + { + emit_move_insn (reg, mem); + size = GET_MODE_SIZE (GET_MODE (reg)); + } + } + + /* Emit USE insns before the return. */ + for (i = 0; i < XVECLEN (operands[1], 0); i++) + emit_use (SET_DEST (XVECEXP (operands[1], 0, i))); + + /* Construct the return. */ + expand_naked_return (); + + DONE; + }" +) + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] VUNSPEC_BLOCKAGE)] + "TARGET_EITHER" + "" + [(set_attr "length" "0") + (set_attr "type" "block")] +) + +(define_expand "casesi" + [(match_operand:SI 0 "s_register_operand" "") ; index to jump on + (match_operand:SI 1 "const_int_operand" "") ; lower bound + (match_operand:SI 2 "const_int_operand" "") ; total range + (match_operand:SI 3 "" "") ; table label + (match_operand:SI 4 "" "")] ; Out of range label + "TARGET_32BIT || optimize_size || flag_pic" + " + { + enum insn_code code; + if (operands[1] != const0_rtx) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_addsi3 (reg, operands[0], + gen_int_mode (-INTVAL (operands[1]), + SImode))); + operands[0] = reg; + } + + if (TARGET_ARM) + code = CODE_FOR_arm_casesi_internal; + else if (TARGET_THUMB1) + code = CODE_FOR_thumb1_casesi_internal_pic; + else if (flag_pic) + code = CODE_FOR_thumb2_casesi_internal_pic; + else + code = CODE_FOR_thumb2_casesi_internal; + + if (!insn_data[(int) code].operand[1].predicate(operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + + emit_jump_insn (GEN_FCN ((int) code) (operands[0], operands[2], + operands[3], operands[4])); + DONE; + }" +) + +;; The USE in this pattern is needed to tell flow analysis that this is +;; a CASESI insn. It has no other purpose. +(define_insn "arm_casesi_internal" + [(parallel [(set (pc) + (if_then_else + (leu (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) + (label_ref (match_operand 2 "" "")))) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM)) + (use (label_ref (match_dup 2)))])] + "TARGET_ARM" + "* + if (flag_pic) + return \"cmp\\t%0, %1\;addls\\t%|pc, %|pc, %0, asl #2\;b\\t%l3\"; + return \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_expand "thumb1_casesi_internal_pic" + [(match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 1 "thumb1_cmp_operand" "") + (match_operand 2 "" "") + (match_operand 3 "" "")] + "TARGET_THUMB1" + { + rtx reg0; + rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[1]); + emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[1], + operands[3])); + reg0 = gen_rtx_REG (SImode, 0); + emit_move_insn (reg0, operands[0]); + emit_jump_insn (gen_thumb1_casesi_dispatch (operands[2]/*, operands[3]*/)); + DONE; + } +) + +(define_insn "thumb1_casesi_dispatch" + [(parallel [(set (pc) (unspec [(reg:SI 0) + (label_ref (match_operand 0 "" "")) +;; (label_ref (match_operand 1 "" "")) +] + UNSPEC_THUMB1_CASESI)) + (clobber (reg:SI IP_REGNUM)) + (clobber (reg:SI LR_REGNUM))])] + "TARGET_THUMB1" + "* return thumb1_output_casesi(operands);" + [(set_attr "length" "4") + (set_attr "type" "multiple")] +) + +(define_expand "indirect_jump" + [(set (pc) + (match_operand:SI 0 "s_register_operand" ""))] + "TARGET_EITHER" + " + /* Thumb-2 doesn't have mov pc, reg. Explicitly set the low bit of the + address and use bx. */ + if (TARGET_THUMB2) + { + rtx tmp; + tmp = gen_reg_rtx (SImode); + emit_insn (gen_iorsi3 (tmp, operands[0], GEN_INT(1))); + operands[0] = tmp; + } + " +) + +;; NB Never uses BX. +(define_insn "*arm_indirect_jump" + [(set (pc) + (match_operand:SI 0 "s_register_operand" "r"))] + "TARGET_ARM" + "mov%?\\t%|pc, %0\\t%@ indirect register jump" + [(set_attr "predicable" "yes") + (set_attr "type" "branch")] +) + +(define_insn "*load_indirect_jump" + [(set (pc) + (match_operand:SI 0 "memory_operand" "m"))] + "TARGET_ARM" + "ldr%?\\t%|pc, %0\\t%@ indirect memory jump" + [(set_attr "type" "load1") + (set_attr "pool_range" "4096") + (set_attr "neg_pool_range" "4084") + (set_attr "predicable" "yes")] +) + +;; NB Never uses BX. +(define_insn "*thumb1_indirect_jump" + [(set (pc) + (match_operand:SI 0 "register_operand" "l*r"))] + "TARGET_THUMB1" + "mov\\tpc, %0" + [(set_attr "conds" "clob") + (set_attr "length" "2") + (set_attr "type" "branch")] +) + + +;; Misc insns + +(define_insn "nop" + [(const_int 0)] + "TARGET_EITHER" + "* + if (TARGET_UNIFIED_ASM) + return \"nop\"; + if (TARGET_ARM) + return \"mov%?\\t%|r0, %|r0\\t%@ nop\"; + return \"mov\\tr8, r8\"; + " + [(set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 2) + (const_int 4))) + (set_attr "type" "mov_reg")] +) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 0))] + "" + "* + if (TARGET_ARM) + return \".inst\\t0xe7f000f0\"; + else + return \".inst\\t0xdeff\"; + " + [(set (attr "length") + (if_then_else (eq_attr "is_thumb" "yes") + (const_int 2) + (const_int 4))) + (set_attr "type" "trap") + (set_attr "conds" "unconditional")] +) + + +;; Patterns to allow combination of arithmetic, cond code and shifts + +(define_insn "*arith_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "r,r,r,r") + (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")]) + (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))] + "TARGET_32BIT" + "%i1%?\\t%0, %2, %4%S3" + [(set_attr "predicable" "yes") + (set_attr "shift" "4") + (set_attr "arch" "a,t2,t2,a") + ;; Thumb2 doesn't allow the stack pointer to be used for + ;; operand1 for all operations other than add and sub. In this case + ;; the minus operation is a candidate for an rsub and hence needs + ;; to be disabled. + ;; We have to make sure to disable the fourth alternative if + ;; the shift_operator is MULT, since otherwise the insn will + ;; also match a multiply_accumulate pattern and validate_change + ;; will allow a replacement of the constant with a register + ;; despite the checks done in shift_operator. + (set_attr_alternative "insn_enabled" + [(const_string "yes") + (if_then_else + (match_operand:SI 1 "add_operator" "") + (const_string "yes") (const_string "no")) + (const_string "yes") + (if_then_else + (match_operand:SI 3 "mult_operator" "") + (const_string "no") (const_string "yes"))]) + (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 2 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "") + (match_operand:SI 5 "reg_or_int_operand" "")]) + (match_operand:SI 6 "s_register_operand" "")]) + (match_operand:SI 7 "arm_rhs_operand" "")])) + (clobber (match_operand:SI 8 "s_register_operand" ""))] + "TARGET_32BIT" + [(set (match_dup 8) + (match_op_dup 2 [(match_op_dup 3 [(match_dup 4) (match_dup 5)]) + (match_dup 6)])) + (set (match_dup 0) + (match_op_dup 1 [(match_dup 8) (match_dup 7)]))] + "") + +(define_insn "*arith_shiftsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "shift_amount_operand" "M,r")]) + (match_operand:SI 2 "s_register_operand" "r,r")]) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_op_dup 1 [(match_op_dup 3 [(match_dup 4) (match_dup 5)]) + (match_dup 2)]))] + "TARGET_32BIT" + "%i1%.\\t%0, %2, %4%S3" + [(set_attr "conds" "set") + (set_attr "shift" "4") + (set_attr "arch" "32,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg")]) + +(define_insn "*arith_shiftsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (match_operator:SI 1 "shiftable_operator" + [(match_operator:SI 3 "shift_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "shift_amount_operand" "M,r")]) + (match_operand:SI 2 "s_register_operand" "r,r")]) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r"))] + "TARGET_32BIT" + "%i1%.\\t%0, %2, %4%S3" + [(set_attr "conds" "set") + (set_attr "shift" "4") + (set_attr "arch" "32,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg")]) + +(define_insn "*sub_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "shift_amount_operand" "M,r")])))] + "TARGET_32BIT" + "sub%?\\t%0, %1, %3%S2" + [(set_attr "predicable" "yes") + (set_attr "shift" "3") + (set_attr "arch" "32,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg")]) + +(define_insn "*sub_shiftsi_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "shift_amount_operand" "M,r,M")])) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (minus:SI (match_dup 1) + (match_op_dup 2 [(match_dup 3) (match_dup 4)])))] + "TARGET_32BIT" + "sub%.\\t%0, %1, %3%S2" + [(set_attr "conds" "set") + (set_attr "shift" "3") + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) + +(define_insn "*sub_shiftsi_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (minus:SI (match_operand:SI 1 "s_register_operand" "r,r,r") + (match_operator:SI 2 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "shift_amount_operand" "M,r,M")])) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r,r,r"))] + "TARGET_32BIT" + "sub%.\\t%0, %1, %3%S2" + [(set_attr "conds" "set") + (set_attr "shift" "3") + (set_attr "arch" "32,a,a") + (set_attr "type" "alus_shift_imm,alus_shift_reg,alus_shift_imm")]) + + +(define_insn_and_split "*and_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_ARM" + "#" ; "mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1" + "&& reload_completed" + [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0))) + (cond_exec (match_dup 4) (set (match_dup 0) + (and:SI (match_dup 3) (const_int 1))))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + /* Note that operands[4] is the same as operands[1], + but with VOIDmode as the result. */ + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "type" "multiple") + (set_attr "length" "8")] +) + +(define_insn_and_split "*ior_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (ior:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "0,?r")))] + "TARGET_ARM" + "@ + orr%d1\\t%0, %3, #1 + #" + "&& reload_completed + && REGNO (operands [0]) != REGNO (operands[3])" + ;; && which_alternative == 1 + ; mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1 + [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3))) + (cond_exec (match_dup 4) (set (match_dup 0) + (ior:SI (match_dup 3) (const_int 1))))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + /* Note that operands[4] is the same as operands[1], + but with VOIDmode as the result. */ + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "length" "4,8") + (set_attr "type" "logic_imm,multiple")] +) + +; A series of splitters for the compare_scc pattern below. Note that +; order is important. +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (lt:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (lshiftrt:SI (match_dup 1) (const_int 31)))]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ge:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (not:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 31)))]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "arm_arch5 && TARGET_32BIT" + [(set (match_dup 0) (clz:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (eq:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 1) (match_dup 1))) + (set (match_dup 0) + (minus:SI (const_int 1) (match_dup 1)))]) + (cond_exec (ltu:CC (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 0)))]) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))]) + (cond_exec (ne:CC (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] +{ + operands[3] = GEN_INT (-INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (ne:SI (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 2 "arm_add_operand" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed" + [(parallel + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (minus:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (cond_exec (ne:CC_NOOV (reg:CC_NOOV CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))]) + +(define_insn_and_split "*compare_scc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "&& reload_completed" + [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 3))) + (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0))) + (cond_exec (match_dup 5) (set (match_dup 0) (const_int 1)))] +{ + rtx tmp1; + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + tmp1 = gen_rtx_REG (mode, CC_REGNUM); + + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, tmp1, const0_rtx); +} + [(set_attr "type" "multiple")] +) + +;; Attempt to improve the sequence generated by the compare_scc splitters +;; not to use conditional execution. + +;; Rd = (eq (reg1) (const_int0)) // ARMv5 +;; clz Rd, reg1 +;; lsr Rd, Rd, #5 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (const_int 0))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 0) (clz:SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + +;; Rd = (eq (reg1) (const_int0)) // !ARMv5 +;; negs Rd, reg1 +;; adc Rd, Rd, reg1 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (const_int 0))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (match_scratch:SI 2 "r")] + "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 2) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 1) (match_dup 2)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] +) + +;; Rd = (eq (reg1) (reg2/imm)) // ARMv5 and optimising for speed. +;; sub Rd, Reg1, reg2 +;; clz Rd, Rd +;; lsr Rd, Rd, #5 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1)))] + "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM) + && !(TARGET_THUMB2 && optimize_insn_for_size_p ())" + [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (clz:SI (match_dup 0))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))] +) + + +;; Rd = (eq (reg1) (reg2)) // ! ARMv5 or optimising for size. +;; sub T1, Reg1, reg2 +;; negs Rd, T1 +;; adc Rd, Rd, T1 +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "arm_rhs_operand" ""))) + (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0)) + (set (match_operand:SI 0 "register_operand" "") (const_int 0))) + (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) (const_int 1))) + (match_scratch:SI 3 "r")] + "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 3))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))]) + (set (match_dup 0) + (plus:SI (plus:SI (match_dup 0) (match_dup 3)) + (geu:SI (reg:CC CC_REGNUM) (const_int 0))))] + " + if (CONST_INT_P (operands[2])) + operands[4] = plus_constant (SImode, operands[1], -INTVAL (operands[2])); + else + operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[2]); + ") + +(define_insn "*cond_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI (match_operator 3 "equality_operator" + [(match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))] + "TARGET_ARM" + "* + if (GET_CODE (operands[3]) == NE) + { + if (which_alternative != 1) + output_asm_insn (\"mov%D4\\t%0, %2\", operands); + if (which_alternative != 0) + output_asm_insn (\"mov%d4\\t%0, %1\", operands); + return \"\"; + } + if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%d4\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "use") + (set_attr "type" "mov_reg,mov_reg,multiple") + (set_attr "length" "4,4,8")] +) + +(define_insn "*cond_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 5 "shiftable_operator" + [(match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + return \"%i5\\t%0, %1, %2, lsr #31\"; + + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (GET_CODE (operands[5]) == AND) + output_asm_insn (\"mov%D4\\t%0, #0\", operands); + else if (GET_CODE (operands[5]) == MINUS) + output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands); + else if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + return \"%i5%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn "*cond_sub" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?r") + (match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (which_alternative != 0) + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + return \"sub%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*cmp_ite0" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" + "l,l,l,r,r,r,r,r,r") + (match_operand:SI 1 "arm_add_operand" + "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" + "l,r,r,l,l,r,r,r,r") + (match_operand:SI 3 "arm_add_operand" + "lPy,rI,L,lPy,lPy,rI,rI,L,L")]) + (const_int 0)) + (const_int 0)))] + "TARGET_32BIT" + "* + { + static const char * const cmp1[NUM_OF_COND_CMP][2] = + { + {\"cmp%d5\\t%0, %1\", + \"cmp%d4\\t%2, %3\"}, + {\"cmn%d5\\t%0, #%n1\", + \"cmp%d4\\t%2, %3\"}, + {\"cmp%d5\\t%0, %1\", + \"cmn%d4\\t%2, #%n3\"}, + {\"cmn%d5\\t%0, #%n1\", + \"cmn%d4\\t%2, #%n3\"} + }; + static const char * const cmp2[NUM_OF_COND_CMP][2] = + { + {\"cmp\\t%2, %3\", + \"cmp\\t%0, %1\"}, + {\"cmp\\t%2, %3\", + \"cmn\\t%0, #%n1\"}, + {\"cmn\\t%2, #%n3\", + \"cmp\\t%0, %1\"}, + {\"cmn\\t%2, #%n3\", + \"cmn\\t%0, #%n1\"} + }; + static const char * const ite[2] = + { + \"it\\t%d5\", + \"it\\t%d4\" + }; + static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, + CMP_CMP, CMN_CMP, CMP_CMP, + CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + + output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); + if (TARGET_THUMB2) { + output_asm_insn (ite[swap], operands); + } + output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); + return \"\"; + }" + [(set_attr "conds" "set") + (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") + (set_attr "type" "multiple") + (set_attr_alternative "length" + [(const_int 6) + (const_int 8) + (const_int 8) + (const_int 8) + (const_int 8) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10))])] +) + +(define_insn "*cmp_ite1" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" + "l,l,l,r,r,r,r,r,r") + (match_operand:SI 1 "arm_add_operand" + "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" + "l,r,r,l,l,r,r,r,r") + (match_operand:SI 3 "arm_add_operand" + "lPy,rI,L,lPy,lPy,rI,rI,L,L")]) + (const_int 1)) + (const_int 0)))] + "TARGET_32BIT" + "* + { + static const char * const cmp1[NUM_OF_COND_CMP][2] = + { + {\"cmp\\t%0, %1\", + \"cmp\\t%2, %3\"}, + {\"cmn\\t%0, #%n1\", + \"cmp\\t%2, %3\"}, + {\"cmp\\t%0, %1\", + \"cmn\\t%2, #%n3\"}, + {\"cmn\\t%0, #%n1\", + \"cmn\\t%2, #%n3\"} + }; + static const char * const cmp2[NUM_OF_COND_CMP][2] = + { + {\"cmp%d4\\t%2, %3\", + \"cmp%D5\\t%0, %1\"}, + {\"cmp%d4\\t%2, %3\", + \"cmn%D5\\t%0, #%n1\"}, + {\"cmn%d4\\t%2, #%n3\", + \"cmp%D5\\t%0, %1\"}, + {\"cmn%d4\\t%2, #%n3\", + \"cmn%D5\\t%0, #%n1\"} + }; + static const char * const ite[2] = + { + \"it\\t%d4\", + \"it\\t%D5\" + }; + static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, + CMP_CMP, CMN_CMP, CMP_CMP, + CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), + reverse_condition (GET_CODE (operands[4]))); + + output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); + if (TARGET_THUMB2) { + output_asm_insn (ite[swap], operands); + } + output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); + return \"\"; + }" + [(set_attr "conds" "set") + (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") + (set_attr_alternative "length" + [(const_int 6) + (const_int 8) + (const_int 8) + (const_int 8) + (const_int 8) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10))]) + (set_attr "type" "multiple")] +) + +(define_insn "*cmp_and" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (and:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" + "l,l,l,r,r,r,r,r,r") + (match_operand:SI 1 "arm_add_operand" + "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" + "l,r,r,l,l,r,r,r,r") + (match_operand:SI 3 "arm_add_operand" + "lPy,rI,L,lPy,lPy,rI,rI,L,L")])) + (const_int 0)))] + "TARGET_32BIT" + "* + { + static const char *const cmp1[NUM_OF_COND_CMP][2] = + { + {\"cmp%d5\\t%0, %1\", + \"cmp%d4\\t%2, %3\"}, + {\"cmn%d5\\t%0, #%n1\", + \"cmp%d4\\t%2, %3\"}, + {\"cmp%d5\\t%0, %1\", + \"cmn%d4\\t%2, #%n3\"}, + {\"cmn%d5\\t%0, #%n1\", + \"cmn%d4\\t%2, #%n3\"} + }; + static const char *const cmp2[NUM_OF_COND_CMP][2] = + { + {\"cmp\\t%2, %3\", + \"cmp\\t%0, %1\"}, + {\"cmp\\t%2, %3\", + \"cmn\\t%0, #%n1\"}, + {\"cmn\\t%2, #%n3\", + \"cmp\\t%0, %1\"}, + {\"cmn\\t%2, #%n3\", + \"cmn\\t%0, #%n1\"} + }; + static const char *const ite[2] = + { + \"it\\t%d5\", + \"it\\t%d4\" + }; + static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, + CMP_CMP, CMN_CMP, CMP_CMP, + CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + + output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); + if (TARGET_THUMB2) { + output_asm_insn (ite[swap], operands); + } + output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); + return \"\"; + }" + [(set_attr "conds" "set") + (set_attr "predicable" "no") + (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") + (set_attr_alternative "length" + [(const_int 6) + (const_int 8) + (const_int 8) + (const_int 8) + (const_int 8) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10))]) + (set_attr "type" "multiple")] +) + +(define_insn "*cmp_ior" + [(set (match_operand 6 "dominant_cc_register" "") + (compare + (ior:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand:SI 0 "s_register_operand" + "l,l,l,r,r,r,r,r,r") + (match_operand:SI 1 "arm_add_operand" + "lPy,lPy,lPy,rI,L,rI,L,rI,L")]) + (match_operator:SI 5 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" + "l,r,r,l,l,r,r,r,r") + (match_operand:SI 3 "arm_add_operand" + "lPy,rI,L,lPy,lPy,rI,rI,L,L")])) + (const_int 0)))] + "TARGET_32BIT" + "* + { + static const char *const cmp1[NUM_OF_COND_CMP][2] = + { + {\"cmp\\t%0, %1\", + \"cmp\\t%2, %3\"}, + {\"cmn\\t%0, #%n1\", + \"cmp\\t%2, %3\"}, + {\"cmp\\t%0, %1\", + \"cmn\\t%2, #%n3\"}, + {\"cmn\\t%0, #%n1\", + \"cmn\\t%2, #%n3\"} + }; + static const char *const cmp2[NUM_OF_COND_CMP][2] = + { + {\"cmp%D4\\t%2, %3\", + \"cmp%D5\\t%0, %1\"}, + {\"cmp%D4\\t%2, %3\", + \"cmn%D5\\t%0, #%n1\"}, + {\"cmn%D4\\t%2, #%n3\", + \"cmp%D5\\t%0, %1\"}, + {\"cmn%D4\\t%2, #%n3\", + \"cmn%D5\\t%0, #%n1\"} + }; + static const char *const ite[2] = + { + \"it\\t%D4\", + \"it\\t%D5\" + }; + static const int cmp_idx[9] = {CMP_CMP, CMP_CMP, CMP_CMN, + CMP_CMP, CMN_CMP, CMP_CMP, + CMN_CMP, CMP_CMN, CMN_CMN}; + int swap = + comparison_dominates_p (GET_CODE (operands[5]), GET_CODE (operands[4])); + + output_asm_insn (cmp1[cmp_idx[which_alternative]][swap], operands); + if (TARGET_THUMB2) { + output_asm_insn (ite[swap], operands); + } + output_asm_insn (cmp2[cmp_idx[which_alternative]][swap], operands); + return \"\"; + } + " + [(set_attr "conds" "set") + (set_attr "arch" "t2,t2,t2,t2,t2,any,any,any,any") + (set_attr_alternative "length" + [(const_int 6) + (const_int 8) + (const_int 8) + (const_int 8) + (const_int 8) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10)) + (if_then_else (eq_attr "is_thumb" "no") + (const_int 8) + (const_int 10))]) + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*ior_scc_scc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts") + (ior:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_OR_Y) + != CCmode)" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 7) + (compare + (ior:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))] + "operands[7] + = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6], + DOM_CC_X_OR_Y), + CC_REGNUM);" + [(set_attr "conds" "clob") + (set_attr "length" "16") + (set_attr "type" "multiple")] +) + +; If the above pattern is followed by a CMP insn, then the compare is +; redundant, since we can rework the conditional instruction that follows. +(define_insn_and_split "*ior_scc_scc_cmp" + [(set (match_operand 0 "dominant_cc_register" "") + (compare (ior:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")])) + (const_int 0))) + (set (match_operand:SI 7 "s_register_operand" "=Ts") + (ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) + (compare + (ior:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))] + "" + [(set_attr "conds" "set") + (set_attr "length" "16") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*and_scc_scc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts") + (and:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + != CCmode)" + "#" + "TARGET_32BIT && reload_completed + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + != CCmode)" + [(set (match_dup 7) + (compare + (and:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 0) (ne:SI (match_dup 7) (const_int 0)))] + "operands[7] + = gen_rtx_REG (arm_select_dominance_cc_mode (operands[3], operands[6], + DOM_CC_X_AND_Y), + CC_REGNUM);" + [(set_attr "conds" "clob") + (set_attr "length" "16") + (set_attr "type" "multiple")] +) + +; If the above pattern is followed by a CMP insn, then the compare is +; redundant, since we can rework the conditional instruction that follows. +(define_insn_and_split "*and_scc_scc_cmp" + [(set (match_operand 0 "dominant_cc_register" "") + (compare (and:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_add_operand" "rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")])) + (const_int 0))) + (set (match_operand:SI 7 "s_register_operand" "=Ts") + (and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) + (compare + (and:SI + (match_op_dup 3 [(match_dup 1) (match_dup 2)]) + (match_op_dup 6 [(match_dup 4) (match_dup 5)])) + (const_int 0))) + (set (match_dup 7) (ne:SI (match_dup 0) (const_int 0)))] + "" + [(set_attr "conds" "set") + (set_attr "length" "16") + (set_attr "type" "multiple")] +) + +;; If there is no dominance in the comparison, then we can still save an +;; instruction in the AND case, since we can know that the second compare +;; need only zero the value if false (if true, then the value is already +;; correct). +(define_insn_and_split "*and_scc_scc_nodom" + [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts") + (and:SI (match_operator:SI 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r,r,0") + (match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")]) + (match_operator:SI 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL,rIL")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT + && (arm_select_dominance_cc_mode (operands[3], operands[6], DOM_CC_X_AND_Y) + == CCmode)" + "#" + "TARGET_32BIT && reload_completed" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC CC_REGNUM))]) + (set (match_dup 7) (match_op_dup 8 [(match_dup 4) (match_dup 5)])) + (set (match_dup 0) + (if_then_else:SI (match_op_dup 6 [(match_dup 7) (const_int 0)]) + (match_dup 0) + (const_int 0)))] + "operands[7] = gen_rtx_REG (SELECT_CC_MODE (GET_CODE (operands[6]), + operands[4], operands[5]), + CC_REGNUM); + operands[8] = gen_rtx_COMPARE (GET_MODE (operands[7]), operands[4], + operands[5]);" + [(set_attr "conds" "clob") + (set_attr "length" "20") + (set_attr "type" "multiple")] +) + +(define_split + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI + (and:SI (match_operand:SI 0 "s_register_operand" "") + (const_int 1)) + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "arm_add_operand" "")])) + (const_int 0))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 4) + (ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)]) + (match_dup 0))) + (set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 4) (const_int 1)) + (const_int 0)))] + "") + +(define_split + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (ior:SI + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "arm_add_operand" "")]) + (and:SI (match_operand:SI 0 "s_register_operand" "") + (const_int 1))) + (const_int 0))) + (clobber (match_operand:SI 4 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 4) + (ior:SI (match_op_dup 1 [(match_dup 2) (match_dup 3)]) + (match_dup 0))) + (set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV (and:SI (match_dup 4) (const_int 1)) + (const_int 0)))] + "") +;; ??? The conditional patterns above need checking for Thumb-2 usefulness + +(define_insn_and_split "*negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) + { + /* Emit mov\\t%0, %1, asr #31 */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)))); + DONE; + } + else if (GET_CODE (operands[3]) == NE) + { + /* Emit subs\\t%0, %1, %2\;mvnne\\t%0, #0 */ + if (CONST_INT_P (operands[2])) + emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2], + GEN_INT (- INTVAL (operands[2])))); + else + emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2])); + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_NE (SImode, + cc_reg, + const0_rtx), + gen_rtx_SET (SImode, + operands[0], + GEN_INT (~0)))); + DONE; + } + else + { + /* Emit: cmp\\t%1, %2\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */ + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[1], operands[2]))); + enum rtx_code rc = GET_CODE (operands[3]); + + rc = reverse_condition (rc); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, + VOIDmode, + cc_reg, + const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + rc = GET_CODE (operands[3]); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, + VOIDmode, + cc_reg, + const0_rtx), + gen_rtx_SET (VOIDmode, + operands[0], + GEN_INT (~0)))); + DONE; + } + FAIL; + } + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "movcond_addsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (if_then_else:SI + (match_operator 5 "comparison_operator" + [(plus:SI (match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")) + (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "rI,rPy,r") + (match_operand:SI 2 "arm_rhs_operand" "rI,rPy,r"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT" + "#" + "&& reload_completed" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_dup 3) + (match_dup 4)) + (const_int 0))) + (set (match_dup 0) (match_dup 1)) + (cond_exec (match_dup 6) + (set (match_dup 0) (match_dup 2)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[5]), + operands[3], operands[4]); + enum rtx_code rc = GET_CODE (operands[5]); + + operands[6] = gen_rtx_REG (mode, CC_REGNUM); + gcc_assert (!(mode == CCFPmode || mode == CCFPEmode)); + rc = reverse_condition (rc); + + operands[6] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); + } + " + [(set_attr "conds" "clob") + (set_attr "enabled_for_depr_it" "no,yes,yes") + (set_attr "type" "multiple")] +) + +(define_insn "movcond" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + if (GET_CODE (operands[5]) == LT + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && REG_P (operands[1])) + { + if (operands[2] == const0_rtx) + return \"and\\t%0, %1, %3, asr #31\"; + return \"ands\\t%0, %1, %3, asr #32\;movcc\\t%0, %2\"; + } + else if (which_alternative != 0 && REG_P (operands[2])) + { + if (operands[1] == const0_rtx) + return \"bic\\t%0, %2, %3, asr #31\"; + return \"bics\\t%0, %2, %3, asr #32\;movcs\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + + if (GET_CODE (operands[5]) == GE + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && REG_P (operands[1])) + { + if (operands[2] == const0_rtx) + return \"bic\\t%0, %1, %3, asr #31\"; + return \"bics\\t%0, %1, %3, asr #32\;movcs\\t%0, %2\"; + } + else if (which_alternative != 0 && REG_P (operands[2])) + { + if (operands[1] == const0_rtx) + return \"and\\t%0, %2, %3, asr #31\"; + return \"ands\\t%0, %2, %3, asr #32\;movcc\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + if (CONST_INT_P (operands[4]) + && !const_ok_for_arm (INTVAL (operands[4]))) + output_asm_insn (\"cmn\\t%3, #%n4\", operands); + else + output_asm_insn (\"cmp\\t%3, %4\", operands); + if (which_alternative != 0) + output_asm_insn (\"mov%d5\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%D5\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,8,12") + (set_attr "type" "multiple")] +) + +;; ??? The patterns below need checking for Thumb-2 usefulness. + +(define_insn "*ifcompare_plus_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_plus_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L")) + (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI")))] + "TARGET_ARM" + "@ + add%d4\\t%0, %2, %3 + sub%d4\\t%0, %2, #%n3 + add%d4\\t%0, %2, %3\;mov%D4\\t%0, %1 + sub%d4\\t%0, %2, #%n3\;mov%D4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,8") + (set_attr_alternative "type" + [(if_then_else (match_operand 3 "const_int_operand" "") + (const_string "alu_imm" ) + (const_string "alu_reg")) + (const_string "alu_imm") + (const_string "alu_reg") + (const_string "alu_reg")])] +) + +(define_insn "*ifcompare_move_plus" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_move_plus" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,0,?rI,?rI") + (plus:SI + (match_operand:SI 2 "s_register_operand" "r,r,r,r") + (match_operand:SI 3 "arm_add_operand" "rI,L,rI,L"))))] + "TARGET_ARM" + "@ + add%D4\\t%0, %2, %3 + sub%D4\\t%0, %2, #%n3 + add%D4\\t%0, %2, %3\;mov%d4\\t%0, %1 + sub%D4\\t%0, %2, #%n3\;mov%d4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,8") + (set_attr "type" "alu_reg,alu_imm,multiple,multiple")] +) + +(define_insn "*ifcompare_arith_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (match_operator 9 "arm_comparison_operator" + [(match_operand:SI 5 "s_register_operand" "r") + (match_operand:SI 6 "arm_add_operand" "rIL")]) + (match_operator:SI 8 "shiftable_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_arith_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI (match_operator 5 "arm_comparison_operator" + [(match_operand 8 "cc_register" "") (const_int 0)]) + (match_operator:SI 6 "shiftable_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rI")])))] + "TARGET_ARM" + "%I6%d5\\t%0, %1, %2\;%I7%D5\\t%0, %3, %4" + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "*ifcompare_arith_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_add_operand" "rIL,rIL")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + /* If we have an operation where (op x 0) is the identity operation and + the conditional operator is LT or GE and we are comparing against zero and + everything is in registers then we can do this in two instructions. */ + if (operands[3] == const0_rtx + && GET_CODE (operands[7]) != AND + && REG_P (operands[5]) + && REG_P (operands[1]) + && REGNO (operands[1]) == REGNO (operands[4]) + && REGNO (operands[4]) != REGNO (operands[0])) + { + if (GET_CODE (operands[6]) == LT) + return \"and\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\"; + else if (GET_CODE (operands[6]) == GE) + return \"bic\\t%0, %5, %2, asr #31\;%I7\\t%0, %4, %0\"; + } + if (CONST_INT_P (operands[3]) + && !const_ok_for_arm (INTVAL (operands[3]))) + output_asm_insn (\"cmn\\t%2, #%n3\", operands); + else + output_asm_insn (\"cmp\\t%2, %3\", operands); + output_asm_insn (\"%I7%d6\\t%0, %4, %5\", operands); + if (which_alternative != 0) + return \"mov%D6\\t%0, %1\"; + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_arith_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 4 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operator:SI 5 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI")))] + "TARGET_ARM" + "@ + %I5%d4\\t%0, %2, %3 + %I5%d4\\t%0, %2, %3\;mov%D4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,8") + (set_attr "type" "alu_shift_reg,multiple")] +) + +(define_insn "*ifcompare_move_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + /* If we have an operation where (op x 0) is the identity operation and + the conditional operator is LT or GE and we are comparing against zero and + everything is in registers then we can do this in two instructions */ + if (operands[5] == const0_rtx + && GET_CODE (operands[7]) != AND + && REG_P (operands[3]) + && REG_P (operands[1]) + && REGNO (operands[1]) == REGNO (operands[2]) + && REGNO (operands[2]) != REGNO (operands[0])) + { + if (GET_CODE (operands[6]) == GE) + return \"and\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\"; + else if (GET_CODE (operands[6]) == LT) + return \"bic\\t%0, %3, %4, asr #31\;%I7\\t%0, %2, %0\"; + } + + if (CONST_INT_P (operands[5]) + && !const_ok_for_arm (INTVAL (operands[5]))) + output_asm_insn (\"cmn\\t%4, #%n5\", operands); + else + output_asm_insn (\"cmp\\t%4, %5\", operands); + + if (which_alternative != 0) + output_asm_insn (\"mov%d6\\t%0, %1\", operands); + return \"%I7%D6\\t%0, %2, %3\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_move_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (match_operator:SI 5 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")])))] + "TARGET_ARM" + "@ + %I5%D4\\t%0, %2, %3 + %I5%D4\\t%0, %2, %3\;mov%d4\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,8") + (set_attr "type" "alu_shift_reg,multiple")] +) + +(define_insn "*ifcompare_move_not" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_move_not" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K") + (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))] + "TARGET_ARM" + "@ + mvn%D4\\t%0, %2 + mov%d4\\t%0, %1\;mvn%D4\\t%0, %2 + mvn%d4\\t%0, #%B1\;mvn%D4\\t%0, %2" + [(set_attr "conds" "use") + (set_attr "type" "mvn_reg") + (set_attr "length" "4,8,8") + (set_attr "type" "mvn_reg,multiple,multiple")] +) + +(define_insn "*ifcompare_not_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_not_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (not:SI (match_operand:SI 2 "s_register_operand" "r,r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))] + "TARGET_ARM" + "@ + mvn%d4\\t%0, %2 + mov%D4\\t%0, %1\;mvn%d4\\t%0, %2 + mvn%D4\\t%0, #%B1\;mvn%d4\\t%0, %2" + [(set_attr "conds" "use") + (set_attr "type" "mvn_reg,multiple,multiple") + (set_attr "length" "4,8,8")] +) + +(define_insn "*ifcompare_shift_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operator:SI 7 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_shift_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))] + "TARGET_ARM" + "@ + mov%d5\\t%0, %2%S4 + mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4 + mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4" + [(set_attr "conds" "use") + (set_attr "shift" "2") + (set_attr "length" "4,8,8") + (set_attr "type" "mov_shift_reg,multiple,multiple")] +) + +(define_insn "*ifcompare_move_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r,r") + (match_operand:SI 5 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (match_operator:SI 7 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_move_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 6 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K") + (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r,r,r") + (match_operand:SI 3 "arm_rhs_operand" "rM,rM,rM")])))] + "TARGET_ARM" + "@ + mov%D5\\t%0, %2%S4 + mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4 + mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4" + [(set_attr "conds" "use") + (set_attr "shift" "2") + (set_attr "length" "4,8,8") + (set_attr "type" "mov_shift_reg,multiple,multiple")] +) + +(define_insn "*ifcompare_shift_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 7 "arm_comparison_operator" + [(match_operand:SI 5 "s_register_operand" "r") + (match_operand:SI 6 "arm_add_operand" "rIL")]) + (match_operator:SI 8 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rM")]) + (match_operator:SI 9 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rM")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_shift_shift" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 8 "cc_register" "") (const_int 0)]) + (match_operator:SI 6 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rM")]) + (match_operator:SI 7 "shift_operator" + [(match_operand:SI 3 "s_register_operand" "r") + (match_operand:SI 4 "arm_rhs_operand" "rM")])))] + "TARGET_ARM" + "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7" + [(set_attr "conds" "use") + (set_attr "shift" "1") + (set_attr "length" "8") + (set (attr "type") (if_then_else + (and (match_operand 2 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_string "mov_shift") + (const_string "mov_shift_reg")))] +) + +(define_insn "*ifcompare_not_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_not_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")) + (match_operator:SI 6 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")])))] + "TARGET_ARM" + "mvn%d5\\t%0, %1\;%I6%D5\\t%0, %2, %3" + [(set_attr "conds" "use") + (set_attr "type" "mvn_reg") + (set_attr "length" "8")] +) + +(define_insn "*ifcompare_arith_not" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 6 "arm_comparison_operator" + [(match_operand:SI 4 "s_register_operand" "r") + (match_operand:SI 5 "arm_add_operand" "rIL")]) + (match_operator:SI 7 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_arith_not" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operator:SI 6 "shiftable_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (not:SI (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_ARM" + "mvn%D5\\t%0, %1\;%I6%d5\\t%0, %2, %3" + [(set_attr "conds" "use") + (set_attr "type" "multiple") + (set_attr "length" "8")] +) + +(define_insn "*ifcompare_neg_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rIK"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_neg_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r")) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K")))] + "TARGET_ARM" + "@ + rsb%d4\\t%0, %2, #0 + mov%D4\\t%0, %1\;rsb%d4\\t%0, %2, #0 + mvn%D4\\t%0, #%B1\;rsb%d4\\t%0, %2, #0" + [(set_attr "conds" "use") + (set_attr "length" "4,8,8") + (set_attr "type" "logic_shift_imm,multiple,multiple")] +) + +(define_insn "*ifcompare_move_neg" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL")]) + (match_operand:SI 1 "arm_not_operand" "0,?rIK") + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "#" + [(set_attr "conds" "clob") + (set_attr "length" "8,12") + (set_attr "type" "multiple")] +) + +(define_insn "*if_move_neg" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI + (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0,?rI,K") + (neg:SI (match_operand:SI 2 "s_register_operand" "r,r,r"))))] + "TARGET_ARM" + "@ + rsb%D4\\t%0, %2, #0 + mov%d4\\t%0, %1\;rsb%D4\\t%0, %2, #0 + mvn%d4\\t%0, #%B1\;rsb%D4\\t%0, %2, #0" + [(set_attr "conds" "use") + (set_attr "length" "4,8,8") + (set_attr "type" "logic_shift_imm,multiple,multiple")] +) + +(define_insn "*arith_adjacentmem" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (match_operator:SI 1 "shiftable_operator" + [(match_operand:SI 2 "memory_operand" "m") + (match_operand:SI 3 "memory_operand" "m")])) + (clobber (match_scratch:SI 4 "=r"))] + "TARGET_ARM && adjacent_mem_locations (operands[2], operands[3])" + "* + { + rtx ldm[3]; + rtx arith[4]; + rtx base_reg; + HOST_WIDE_INT val1 = 0, val2 = 0; + + if (REGNO (operands[0]) > REGNO (operands[4])) + { + ldm[1] = operands[4]; + ldm[2] = operands[0]; + } + else + { + ldm[1] = operands[0]; + ldm[2] = operands[4]; + } + + base_reg = XEXP (operands[2], 0); + + if (!REG_P (base_reg)) + { + val1 = INTVAL (XEXP (base_reg, 1)); + base_reg = XEXP (base_reg, 0); + } + + if (!REG_P (XEXP (operands[3], 0))) + val2 = INTVAL (XEXP (XEXP (operands[3], 0), 1)); + + arith[0] = operands[0]; + arith[3] = operands[1]; + + if (val1 < val2) + { + arith[1] = ldm[1]; + arith[2] = ldm[2]; + } + else + { + arith[1] = ldm[2]; + arith[2] = ldm[1]; + } + + ldm[0] = base_reg; + if (val1 !=0 && val2 != 0) + { + rtx ops[3]; + + if (val1 == 4 || val2 == 4) + /* Other val must be 8, since we know they are adjacent and neither + is zero. */ + output_asm_insn (\"ldm%(ib%)\\t%0, {%1, %2}\", ldm); + else if (const_ok_for_arm (val1) || const_ok_for_arm (-val1)) + { + ldm[0] = ops[0] = operands[4]; + ops[1] = base_reg; + ops[2] = GEN_INT (val1); + output_add_immediate (ops); + if (val1 < val2) + output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm); + else + output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm); + } + else + { + /* Offset is out of range for a single add, so use two ldr. */ + ops[0] = ldm[1]; + ops[1] = base_reg; + ops[2] = GEN_INT (val1); + output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops); + ops[0] = ldm[2]; + ops[2] = GEN_INT (val2); + output_asm_insn (\"ldr%?\\t%0, [%1, %2]\", ops); + } + } + else if (val1 != 0) + { + if (val1 < val2) + output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm); + else + output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm); + } + else + { + if (val1 < val2) + output_asm_insn (\"ldm%(ia%)\\t%0, {%1, %2}\", ldm); + else + output_asm_insn (\"ldm%(da%)\\t%0, {%1, %2}\", ldm); + } + output_asm_insn (\"%I3%?\\t%0, %1, %2\", arith); + return \"\"; + }" + [(set_attr "length" "12") + (set_attr "predicable" "yes") + (set_attr "type" "load1")] +) + +; This pattern is never tried by combine, so do it as a peephole + +(define_peephole2 + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 1 "arm_general_register_operand" "")) + (set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (const_int 0)))] + "TARGET_ARM" + [(parallel [(set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) (const_int 0))) + (set (match_dup 0) (match_dup 1))])] + "" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (and:SI (ge:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 0)) + (neg:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 4 "arm_rhs_operand" "")])))) + (clobber (match_operand:SI 5 "s_register_operand" ""))] + "TARGET_ARM" + [(set (match_dup 5) (not:SI (ashiftrt:SI (match_dup 1) (const_int 31)))) + (set (match_dup 0) (and:SI (match_op_dup 2 [(match_dup 3) (match_dup 4)]) + (match_dup 5)))] + "" +) + +;; This split can be used because CC_Z mode implies that the following +;; branch will be an equality, or an unsigned inequality, so the sign +;; extension is not needed. + +(define_split + [(set (reg:CC_Z CC_REGNUM) + (compare:CC_Z + (ashift:SI (subreg:SI (match_operand:QI 0 "memory_operand" "") 0) + (const_int 24)) + (match_operand 1 "const_int_operand" ""))) + (clobber (match_scratch:SI 2 ""))] + "TARGET_ARM + && (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) + == (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) >> 24) << 24)" + [(set (match_dup 2) (zero_extend:SI (match_dup 0))) + (set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 1)))] + " + operands[1] = GEN_INT (((unsigned long) INTVAL (operands[1])) >> 24); + " +) +;; ??? Check the patterns above for Thumb-2 usefulness + +(define_expand "prologue" + [(clobber (const_int 0))] + "TARGET_EITHER" + "if (TARGET_32BIT) + arm_expand_prologue (); + else + thumb1_expand_prologue (); + DONE; + " +) + +(define_expand "epilogue" + [(clobber (const_int 0))] + "TARGET_EITHER" + " + if (crtl->calls_eh_return) + emit_insn (gen_force_register_use (gen_rtx_REG (Pmode, 2))); + if (TARGET_THUMB1) + { + thumb1_expand_epilogue (); + emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, + gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE)); + } + else if (HAVE_return) + { + /* HAVE_return is testing for USE_RETURN_INSN (FALSE). Hence, + no need for explicit testing again. */ + emit_jump_insn (gen_return ()); + } + else if (TARGET_32BIT) + { + arm_expand_epilogue (true); + } + DONE; + " +) + +(define_insn "prologue_thumb1_interwork" + [(unspec_volatile [(const_int 0)] VUNSPEC_THUMB1_INTERWORK)] + "TARGET_THUMB1" + "* return thumb1_output_interwork ();" + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + +;; Note - although unspec_volatile's USE all hard registers, +;; USEs are ignored after relaod has completed. Thus we need +;; to add an unspec of the link register to ensure that flow +;; does not think that it is unused by the sibcall branch that +;; will replace the standard function epilogue. +(define_expand "sibcall_epilogue" + [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_REGISTER_USE) + (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])] + "TARGET_32BIT" + " + arm_expand_epilogue (false); + DONE; + " +) + +(define_insn "*epilogue_insns" + [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)] + "TARGET_THUMB1" + "* + return thumb1_unexpanded_epilogue (); + " + ; Length is absolute worst case + [(set_attr "length" "44") + (set_attr "type" "block") + ;; We don't clobber the conditions, but the potential length of this + ;; operation is sufficient to make conditionalizing the sequence + ;; unlikely to be profitable. + (set_attr "conds" "clob")] +) + +(define_expand "eh_epilogue" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:SI 2 "register_operand" ""))] + "TARGET_EITHER" + " + { + cfun->machine->eh_epilogue_sp_ofs = operands[1]; + if (!REG_P (operands[2]) || REGNO (operands[2]) != 2) + { + rtx ra = gen_rtx_REG (Pmode, 2); + + emit_move_insn (ra, operands[2]); + operands[2] = ra; + } + /* This is a hack -- we may have crystalized the function type too + early. */ + cfun->machine->func_type = 0; + }" +) + +;; This split is only used during output to reduce the number of patterns +;; that need assembler instructions adding to them. We allowed the setting +;; of the conditions to be implicit during rtl generation so that +;; the conditional compare patterns would work. However this conflicts to +;; some extent with the conditional data operations, so we have to split them +;; up again here. + +;; ??? Need to audit these splitters for Thumb-2. Why isn't normal +;; conditional execution sufficient? + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "" "") (match_operand 3 "" "")]) + (match_dup 0) + (match_operand 4 "" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 5) (match_dup 6)) + (cond_exec (match_dup 7) + (set (match_dup 0) (match_dup 4)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[5] = gen_rtx_REG (mode, CC_REGNUM); + operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + + operands[7] = gen_rtx_fmt_ee (rc, VOIDmode, operands[5], const0_rtx); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "" "") (match_operand 3 "" "")]) + (match_operand 4 "" "") + (match_dup 0))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 5) (match_dup 6)) + (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)]) + (set (match_dup 0) (match_dup 4)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + + operands[5] = gen_rtx_REG (mode, CC_REGNUM); + operands[6] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand 2 "" "") (match_operand 3 "" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 6) (match_dup 7)) + (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) + (set (match_dup 0) (match_dup 4))) + (cond_exec (match_dup 8) + (set (match_dup 0) (match_dup 5)))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[6] = gen_rtx_REG (mode, CC_REGNUM); + operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + + operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); + }" +) + +(define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (if_then_else:SI (match_operator 1 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 3 "arm_add_operand" "")]) + (match_operand:SI 4 "arm_rhs_operand" "") + (not:SI + (match_operand:SI 5 "s_register_operand" "")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM && reload_completed" + [(set (match_dup 6) (match_dup 7)) + (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)]) + (set (match_dup 0) (match_dup 4))) + (cond_exec (match_dup 8) + (set (match_dup 0) (not:SI (match_dup 5))))] + " + { + enum machine_mode mode = SELECT_CC_MODE (GET_CODE (operands[1]), + operands[2], operands[3]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[6] = gen_rtx_REG (mode, CC_REGNUM); + operands[7] = gen_rtx_COMPARE (mode, operands[2], operands[3]); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + + operands[8] = gen_rtx_fmt_ee (rc, VOIDmode, operands[6], const0_rtx); + }" +) + +(define_insn "*cond_move_not" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (if_then_else:SI (match_operator 4 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,?rI") + (not:SI + (match_operand:SI 2 "s_register_operand" "r,r"))))] + "TARGET_ARM" + "@ + mvn%D4\\t%0, %2 + mov%d4\\t%0, %1\;mvn%D4\\t%0, %2" + [(set_attr "conds" "use") + (set_attr "type" "mvn_reg,multiple") + (set_attr "length" "4,8")] +) + +;; The next two patterns occur when an AND operation is followed by a +;; scc insn sequence + +(define_insn "*sign_extract_onebit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + output_asm_insn (\"ands\\t%0, %1, %2\", operands); + return \"mvnne\\t%0, #0\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "*not_signextract_onebit" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI + (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "n")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_ARM" + "* + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + output_asm_insn (\"tst\\t%1, %2\", operands); + output_asm_insn (\"mvneq\\t%0, #0\", operands); + return \"movne\\t%0, #0\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) +;; ??? The above patterns need auditing for Thumb-2 + +;; Push multiple registers to the stack. Registers are in parallel (use ...) +;; expressions. For simplicity, the first register is also in the unspec +;; part. +;; To avoid the usage of GNU extension, the length attribute is computed +;; in a C function arm_attr_length_push_multi. +(define_insn "*push_multi" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "push_mult_memory_operand" "") + (unspec:BLK [(match_operand:SI 1 "s_register_operand" "")] + UNSPEC_PUSH_MULT))])] + "" + "* + { + int num_saves = XVECLEN (operands[2], 0); + + /* For the StrongARM at least it is faster to + use STR to store only a single register. + In Thumb mode always use push, and the assembler will pick + something appropriate. */ + if (num_saves == 1 && TARGET_ARM) + output_asm_insn (\"str%?\\t%1, [%m0, #-4]!\", operands); + else + { + int i; + char pattern[100]; + + if (TARGET_ARM) + strcpy (pattern, \"stm%(fd%)\\t%m0!, {%1\"); + else if (TARGET_THUMB2) + strcpy (pattern, \"push%?\\t{%1\"); + else + strcpy (pattern, \"push\\t{%1\"); + + for (i = 1; i < num_saves; i++) + { + strcat (pattern, \", %|\"); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[2], 0, i), 0))]); + } + + strcat (pattern, \"}\"); + output_asm_insn (pattern, operands); + } + + return \"\"; + }" + [(set_attr "type" "store4") + (set (attr "length") + (symbol_ref "arm_attr_length_push_multi (operands[2], operands[1])"))] +) + +(define_insn "stack_tie" + [(set (mem:BLK (scratch)) + (unspec:BLK [(match_operand:SI 0 "s_register_operand" "rk") + (match_operand:SI 1 "s_register_operand" "rk")] + UNSPEC_PRLG_STK))] + "" + "" + [(set_attr "length" "0") + (set_attr "type" "block")] +) + +;; Pop (as used in epilogue RTL) +;; +(define_insn "*load_multiple_with_writeback" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "s_register_operand" "+rk") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + (set (match_operand:SI 3 "s_register_operand" "=rk") + (mem:SI (match_dup 1))) + ])] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/false, + /*cond=*/const_true_rtx, + /*reverse=*/false, + /*update=*/true); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] +) + +;; Pop with return (as used in epilogue RTL) +;; +;; This instruction is generated when the registers are popped at the end of +;; epilogue. Here, instead of popping the value into LR and then generating +;; jump to LR, value is popped into PC directly. Hence, the pattern is combined +;; with (return). +(define_insn "*pop_multiple_with_writeback_and_return" + [(match_parallel 0 "pop_multiple_return" + [(return) + (set (match_operand:SI 1 "s_register_operand" "+rk") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + (set (match_operand:SI 3 "s_register_operand" "=rk") + (mem:SI (match_dup 1))) + ])] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/true, + /*cond=*/const_true_rtx, + /*reverse=*/false, + /*update=*/true); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] +) + +(define_insn "*pop_multiple_with_return" + [(match_parallel 0 "pop_multiple_return" + [(return) + (set (match_operand:SI 2 "s_register_operand" "=rk") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + ])] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/true, + /*cond=*/const_true_rtx, + /*reverse=*/false, + /*update=*/false); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] +) + +;; Load into PC and return +(define_insn "*ldr_with_return" + [(return) + (set (reg:SI PC_REGNUM) + (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+rk"))))] + "TARGET_32BIT && (reload_in_progress || reload_completed)" + "ldr%?\t%|pc, [%0], #4" + [(set_attr "type" "load1") + (set_attr "predicable" "yes")] +) +;; Pop for floating point registers (as used in epilogue RTL) +(define_insn "*vfp_pop_multiple_with_writeback" + [(match_parallel 0 "pop_multiple_fp" + [(set (match_operand:SI 1 "s_register_operand" "+rk") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + (set (match_operand:DF 3 "vfp_hard_register_operand" "") + (mem:DF (match_dup 1)))])] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "* + { + int num_regs = XVECLEN (operands[0], 0); + char pattern[100]; + rtx op_list[2]; + strcpy (pattern, \"fldmfdd\\t\"); + strcat (pattern, reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]); + strcat (pattern, \"!, {\"); + op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0); + strcat (pattern, \"%P0\"); + if ((num_regs - 1) > 1) + { + strcat (pattern, \"-%P1\"); + op_list [1] = XEXP (XVECEXP (operands[0], 0, num_regs - 1), 0); + } + + strcat (pattern, \"}\"); + output_asm_insn (pattern, op_list); + return \"\"; + } + " + [(set_attr "type" "load4") + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")] +) + +;; Special patterns for dealing with the constant pool + +(define_insn "align_4" + [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN)] + "TARGET_EITHER" + "* + assemble_align (32); + return \"\"; + " + [(set_attr "type" "no_insn")] +) + +(define_insn "align_8" + [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN8)] + "TARGET_EITHER" + "* + assemble_align (64); + return \"\"; + " + [(set_attr "type" "no_insn")] +) + +(define_insn "consttable_end" + [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)] + "TARGET_EITHER" + "* + making_const_table = FALSE; + return \"\"; + " + [(set_attr "type" "no_insn")] +) + +(define_insn "consttable_1" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_1)] + "TARGET_THUMB1" + "* + making_const_table = TRUE; + assemble_integer (operands[0], 1, BITS_PER_WORD, 1); + assemble_zeros (3); + return \"\"; + " + [(set_attr "length" "4") + (set_attr "type" "no_insn")] +) + +(define_insn "consttable_2" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_2)] + "TARGET_THUMB1" + "* + making_const_table = TRUE; + gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT); + assemble_integer (operands[0], 2, BITS_PER_WORD, 1); + assemble_zeros (2); + return \"\"; + " + [(set_attr "length" "4") + (set_attr "type" "no_insn")] +) + +(define_insn "consttable_4" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_4)] + "TARGET_EITHER" + "* + { + rtx x = operands[0]; + making_const_table = TRUE; + switch (GET_MODE_CLASS (GET_MODE (x))) + { + case MODE_FLOAT: + if (GET_MODE (x) == HFmode) + arm_emit_fp16_const (x); + else + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + assemble_real (r, GET_MODE (x), BITS_PER_WORD); + } + break; + default: + /* XXX: Sometimes gcc does something really dumb and ends up with + a HIGH in a constant pool entry, usually because it's trying to + load into a VFP register. We know this will always be used in + combination with a LO_SUM which ignores the high bits, so just + strip off the HIGH. */ + if (GET_CODE (x) == HIGH) + x = XEXP (x, 0); + assemble_integer (x, 4, BITS_PER_WORD, 1); + mark_symbol_refs_as_used (x); + break; + } + return \"\"; + }" + [(set_attr "length" "4") + (set_attr "type" "no_insn")] +) + +(define_insn "consttable_8" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_8)] + "TARGET_EITHER" + "* + { + making_const_table = TRUE; + switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + { + case MODE_FLOAT: + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); + assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); + break; + } + default: + assemble_integer (operands[0], 8, BITS_PER_WORD, 1); + break; + } + return \"\"; + }" + [(set_attr "length" "8") + (set_attr "type" "no_insn")] +) + +(define_insn "consttable_16" + [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_16)] + "TARGET_EITHER" + "* + { + making_const_table = TRUE; + switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + { + case MODE_FLOAT: + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); + assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); + break; + } + default: + assemble_integer (operands[0], 16, BITS_PER_WORD, 1); + break; + } + return \"\"; + }" + [(set_attr "length" "16") + (set_attr "type" "no_insn")] +) + +;; Miscellaneous Thumb patterns + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "TARGET_THUMB1" + " + if (flag_pic) + { + /* Hopefully, CSE will eliminate this copy. */ + rtx reg1 = copy_addr_to_reg (gen_rtx_LABEL_REF (Pmode, operands[1])); + rtx reg2 = gen_reg_rtx (SImode); + + emit_insn (gen_addsi3 (reg2, operands[0], reg1)); + operands[0] = reg2; + } + " +) + +;; NB never uses BX. +(define_insn "*thumb1_tablejump" + [(set (pc) (match_operand:SI 0 "register_operand" "l*r")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_THUMB1" + "mov\\t%|pc, %0" + [(set_attr "length" "2") + (set_attr "type" "no_insn")] +) + +;; V5 Instructions, + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (clz:SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_32BIT && arm_arch5" + "clz%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "clz")]) + +(define_insn "rbitsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))] + "TARGET_32BIT && arm_arch_thumb2" + "rbit%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "clz")]) + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "s_register_operand" "") + (ctz:SI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_32BIT && arm_arch_thumb2" + " + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_rbitsi2 (tmp, operands[1])); + emit_insn (gen_clzsi2 (operands[0], tmp)); + } + DONE; + " +) + +;; V5E instructions. + +(define_insn "prefetch" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "" "") + (match_operand:SI 2 "" ""))] + "TARGET_32BIT && arm_arch5e" + "pld\\t%a0" + [(set_attr "type" "load1")] +) + +;; General predication pattern + +(define_cond_exec + [(match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") + (const_int 0)])] + "TARGET_32BIT" + "" +[(set_attr "predicated" "yes")] +) + +(define_insn "force_register_use" + [(unspec:SI [(match_operand:SI 0 "register_operand" "")] UNSPEC_REGISTER_USE)] + "" + "%@ %0 needed" + [(set_attr "length" "0") + (set_attr "type" "no_insn")] +) + + +;; Patterns for exception handling + +(define_expand "eh_return" + [(use (match_operand 0 "general_operand" ""))] + "TARGET_EITHER" + " + { + if (TARGET_32BIT) + emit_insn (gen_arm_eh_return (operands[0])); + else + emit_insn (gen_thumb_eh_return (operands[0])); + DONE; + }" +) + +;; We can't expand this before we know where the link register is stored. +(define_insn_and_split "arm_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")] + VUNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "TARGET_ARM" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + arm_set_return_address (operands[0], operands[1]); + DONE; + }" +) + +(define_insn_and_split "thumb_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "l")] + VUNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&l"))] + "TARGET_THUMB1" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + thumb_set_return_address (operands[0], operands[1]); + DONE; + }" + [(set_attr "type" "mov_reg")] +) + + +;; TLS support + +(define_insn "load_tp_hard" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TLS))] + "TARGET_HARD_TP" + "mrc%?\\tp15, 0, %0, c13, c0, 3\\t@ load_tp_hard" + [(set_attr "predicable" "yes") + (set_attr "type" "mrs")] +) + +;; Doesn't clobber R1-R3. Must use r0 for the first operand. +(define_insn "load_tp_soft" + [(set (reg:SI 0) (unspec:SI [(const_int 0)] UNSPEC_TLS)) + (clobber (reg:SI LR_REGNUM)) + (clobber (reg:SI IP_REGNUM)) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SOFT_TP" + "bl\\t__aeabi_read_tp\\t@ load_tp_soft" + [(set_attr "conds" "clob") + (set_attr "type" "branch")] +) + +;; tls descriptor call +(define_insn "tlscall" + [(set (reg:SI R0_REGNUM) + (unspec:SI [(reg:SI R0_REGNUM) + (match_operand:SI 0 "" "X") + (match_operand 1 "" "")] UNSPEC_TLS)) + (clobber (reg:SI R1_REGNUM)) + (clobber (reg:SI LR_REGNUM)) + (clobber (reg:SI CC_REGNUM))] + "TARGET_GNU2_TLS" + { + targetm.asm_out.internal_label (asm_out_file, "LPIC", + INTVAL (operands[1])); + return "bl\\t%c0(tlscall)"; + } + [(set_attr "conds" "clob") + (set_attr "length" "4") + (set_attr "type" "branch")] +) + +;; For thread pointer builtin +(define_expand "get_thread_pointersi" + [(match_operand:SI 0 "s_register_operand" "=r")] + "" + " + { + arm_load_tp (operands[0]); + DONE; + }") + +;; + +;; We only care about the lower 16 bits of the constant +;; being inserted into the upper 16 bits of the register. +(define_insn "*arm_movtas_ze" + [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") + (const_int 16) + (const_int 16)) + (match_operand:SI 1 "const_int_operand" ""))] + "arm_arch_thumb2" + "movt%?\t%0, %L1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "length" "4") + (set_attr "type" "mov_imm")] +) + +(define_insn "*arm_rev" + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (bswap:SI (match_operand:SI 1 "s_register_operand" "l,l,r")))] + "arm_arch6" + "@ + rev\t%0, %1 + rev%?\t%0, %1 + rev%?\t%0, %1" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] +) + +(define_expand "arm_legacy_rev" + [(set (match_operand:SI 2 "s_register_operand" "") + (xor:SI (rotatert:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 16)) + (match_dup 1))) + (set (match_dup 2) + (lshiftrt:SI (match_dup 2) + (const_int 8))) + (set (match_operand:SI 3 "s_register_operand" "") + (rotatert:SI (match_dup 1) + (const_int 8))) + (set (match_dup 2) + (and:SI (match_dup 2) + (const_int -65281))) + (set (match_operand:SI 0 "s_register_operand" "") + (xor:SI (match_dup 3) + (match_dup 2)))] + "TARGET_32BIT" + "" +) + +;; Reuse temporaries to keep register pressure down. +(define_expand "thumb_legacy_rev" + [(set (match_operand:SI 2 "s_register_operand" "") + (ashift:SI (match_operand:SI 1 "s_register_operand" "") + (const_int 24))) + (set (match_operand:SI 3 "s_register_operand" "") + (lshiftrt:SI (match_dup 1) + (const_int 24))) + (set (match_dup 3) + (ior:SI (match_dup 3) + (match_dup 2))) + (set (match_operand:SI 4 "s_register_operand" "") + (const_int 16)) + (set (match_operand:SI 5 "s_register_operand" "") + (rotatert:SI (match_dup 1) + (match_dup 4))) + (set (match_dup 2) + (ashift:SI (match_dup 5) + (const_int 24))) + (set (match_dup 5) + (lshiftrt:SI (match_dup 5) + (const_int 24))) + (set (match_dup 5) + (ior:SI (match_dup 5) + (match_dup 2))) + (set (match_dup 5) + (rotatert:SI (match_dup 5) + (match_dup 4))) + (set (match_operand:SI 0 "s_register_operand" "") + (ior:SI (match_dup 5) + (match_dup 3)))] + "TARGET_THUMB" + "" +) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (bswap:SI (match_operand:SI 1 "s_register_operand" "r")))] +"TARGET_EITHER && (arm_arch6 || !optimize_size)" +" + if (!arm_arch6) + { + rtx op2 = gen_reg_rtx (SImode); + rtx op3 = gen_reg_rtx (SImode); + + if (TARGET_THUMB) + { + rtx op4 = gen_reg_rtx (SImode); + rtx op5 = gen_reg_rtx (SImode); + + emit_insn (gen_thumb_legacy_rev (operands[0], operands[1], + op2, op3, op4, op5)); + } + else + { + emit_insn (gen_arm_legacy_rev (operands[0], operands[1], + op2, op3)); + } + + DONE; + } + " +) + +;; bswap16 patterns: use revsh and rev16 instructions for the signed +;; and unsigned variants, respectively. For rev16, expose +;; byte-swapping in the lower 16 bits only. +(define_insn "*arm_revsh" + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (sign_extend:SI (bswap:HI (match_operand:HI 1 "s_register_operand" "l,l,r"))))] + "arm_arch6" + "@ + revsh\t%0, %1 + revsh%?\t%0, %1 + revsh%?\t%0, %1" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] +) + +(define_insn "*arm_rev16" + [(set (match_operand:HI 0 "s_register_operand" "=l,l,r") + (bswap:HI (match_operand:HI 1 "s_register_operand" "l,l,r")))] + "arm_arch6" + "@ + rev16\t%0, %1 + rev16%?\t%0, %1 + rev16%?\t%0, %1" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] +) + +(define_expand "bswaphi2" + [(set (match_operand:HI 0 "s_register_operand" "=r") + (bswap:HI (match_operand:HI 1 "s_register_operand" "r")))] +"arm_arch6" +"" +) + +;; Patterns for LDRD/STRD in Thumb2 mode + +(define_insn "*thumb2_ldrd" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk") + (match_operand:SI 2 "ldrd_strd_offset_operand" "Do")))) + (set (match_operand:SI 3 "s_register_operand" "=r") + (mem:SI (plus:SI (match_dup 1) + (match_operand:SI 4 "const_int_operand" ""))))] + "TARGET_LDRD && TARGET_THUMB2 && reload_completed + && current_tune->prefer_ldrd_strd + && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4])) + && (operands_ok_ldrd_strd (operands[0], operands[3], + operands[1], INTVAL (operands[2]), + false, true))" + "ldrd%?\t%0, %3, [%1, %2]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb2_ldrd_base" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "s_register_operand" "=r") + (mem:SI (plus:SI (match_dup 1) + (const_int 4))))] + "TARGET_LDRD && TARGET_THUMB2 && reload_completed + && current_tune->prefer_ldrd_strd + && (operands_ok_ldrd_strd (operands[0], operands[2], + operands[1], 0, false, true))" + "ldrd%?\t%0, %2, [%1]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb2_ldrd_base_neg" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk") + (const_int -4)))) + (set (match_operand:SI 2 "s_register_operand" "=r") + (mem:SI (match_dup 1)))] + "TARGET_LDRD && TARGET_THUMB2 && reload_completed + && current_tune->prefer_ldrd_strd + && (operands_ok_ldrd_strd (operands[0], operands[2], + operands[1], -4, false, true))" + "ldrd%?\t%0, %2, [%1, #-4]" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb2_strd" + [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk") + (match_operand:SI 1 "ldrd_strd_offset_operand" "Do"))) + (match_operand:SI 2 "s_register_operand" "r")) + (set (mem:SI (plus:SI (match_dup 0) + (match_operand:SI 3 "const_int_operand" ""))) + (match_operand:SI 4 "s_register_operand" "r"))] + "TARGET_LDRD && TARGET_THUMB2 && reload_completed + && current_tune->prefer_ldrd_strd + && ((INTVAL (operands[1]) + 4) == INTVAL (operands[3])) + && (operands_ok_ldrd_strd (operands[2], operands[4], + operands[0], INTVAL (operands[1]), + false, false))" + "strd%?\t%2, %4, [%0, %1]" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb2_strd_base" + [(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk")) + (match_operand:SI 1 "s_register_operand" "r")) + (set (mem:SI (plus:SI (match_dup 0) + (const_int 4))) + (match_operand:SI 2 "s_register_operand" "r"))] + "TARGET_LDRD && TARGET_THUMB2 && reload_completed + && current_tune->prefer_ldrd_strd + && (operands_ok_ldrd_strd (operands[1], operands[2], + operands[0], 0, false, false))" + "strd%?\t%1, %2, [%0]" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb2_strd_base_neg" + [(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk") + (const_int -4))) + (match_operand:SI 1 "s_register_operand" "r")) + (set (mem:SI (match_dup 0)) + (match_operand:SI 2 "s_register_operand" "r"))] + "TARGET_LDRD && TARGET_THUMB2 && reload_completed + && current_tune->prefer_ldrd_strd + && (operands_ok_ldrd_strd (operands[1], operands[2], + operands[0], -4, false, false))" + "strd%?\t%1, %2, [%0, #-4]" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +;; ARMv8 CRC32 instructions. +(define_insn "" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "r") + (match_operand: 2 "s_register_operand" "r")] + CRC))] + "TARGET_CRC32" + "\\t%0, %1, %2" + [(set_attr "type" "crc") + (set_attr "conds" "unconditional")] +) + +;; Load the load/store double peephole optimizations. +(include "ldrdstrd.md") + +;; Load the load/store multiple patterns +(include "ldmstm.md") + +;; Patterns in ldmstm.md don't cover more than 4 registers. This pattern covers +;; large lists without explicit writeback generated for APCS_FRAME epilogue. +(define_insn "*load_multiple" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 2 "s_register_operand" "=rk") + (mem:SI (match_operand:SI 1 "s_register_operand" "rk"))) + ])] + "TARGET_32BIT" + "* + { + arm_output_multireg_pop (operands, /*return_pc=*/false, + /*cond=*/const_true_rtx, + /*reverse=*/false, + /*update=*/false); + return \"\"; + } + " + [(set_attr "predicable" "yes")] +) + +;; Vector bits common to IWMMXT and Neon +(include "vec-common.md") +;; Load the Intel Wireless Multimedia Extension patterns +(include "iwmmxt.md") +;; Load the VFP co-processor patterns +(include "vfp.md") +;; Thumb-2 patterns +(include "thumb2.md") +;; Neon patterns +(include "neon.md") +;; Crypto patterns +(include "crypto.md") +;; Synchronization Primitives +(include "sync.md") +;; Fixed-point patterns +(include "arm-fixed.md") diff --git a/gcc-4.9/gcc/config/arm/arm.opt b/gcc-4.9/gcc/config/arm/arm.opt new file mode 100644 index 000000000..d80f1f13b --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm.opt @@ -0,0 +1,277 @@ +; Options for the ARM port of the compiler. + +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/arm/arm-opts.h + +Enum +Name(tls_type) Type(enum arm_tls_type) +TLS dialect to use: + +EnumValue +Enum(tls_type) String(gnu) Value(TLS_GNU) + +EnumValue +Enum(tls_type) String(gnu2) Value(TLS_GNU2) + +mabi= +Target RejectNegative Joined Enum(arm_abi_type) Var(arm_abi) Init(ARM_DEFAULT_ABI) +Specify an ABI + +Enum +Name(arm_abi_type) Type(enum arm_abi_type) +Known ARM ABIs (for use with the -mabi= option): + +EnumValue +Enum(arm_abi_type) String(apcs-gnu) Value(ARM_ABI_APCS) + +EnumValue +Enum(arm_abi_type) String(atpcs) Value(ARM_ABI_ATPCS) + +EnumValue +Enum(arm_abi_type) String(aapcs) Value(ARM_ABI_AAPCS) + +EnumValue +Enum(arm_abi_type) String(iwmmxt) Value(ARM_ABI_IWMMXT) + +EnumValue +Enum(arm_abi_type) String(aapcs-linux) Value(ARM_ABI_AAPCS_LINUX) + +mabort-on-noreturn +Target Report Mask(ABORT_NORETURN) +Generate a call to abort if a noreturn function returns + +mapcs +Target RejectNegative Mask(APCS_FRAME) Undocumented + +mapcs-float +Target Report Mask(APCS_FLOAT) +Pass FP arguments in FP registers + +mapcs-frame +Target Report Mask(APCS_FRAME) +Generate APCS conformant stack frames + +mapcs-reentrant +Target Report Mask(APCS_REENT) +Generate re-entrant, PIC code + +mapcs-stack-check +Target Report Mask(APCS_STACK) Undocumented + +march= +Target RejectNegative ToLower Joined Enum(arm_arch) Var(arm_arch_option) +Specify the name of the target architecture + +; Other arm_arch values are loaded from arm-tables.opt +; but that is a generated file and this is an odd-one-out. +EnumValue +Enum(arm_arch) String(native) Value(-1) DriverOnly + +marm +Target Report RejectNegative InverseMask(THUMB) +Generate code in 32 bit ARM state. + +mbig-endian +Target Report RejectNegative Mask(BIG_END) +Assume target CPU is configured as big endian + +mcallee-super-interworking +Target Report Mask(CALLEE_INTERWORKING) +Thumb: Assume non-static functions may be called from ARM code + +mcaller-super-interworking +Target Report Mask(CALLER_INTERWORKING) +Thumb: Assume function pointers may go to non-Thumb aware code + +mcpu= +Target RejectNegative ToLower Joined Enum(processor_type) Var(arm_cpu_option) Init(arm_none) +Specify the name of the target CPU + +mfloat-abi= +Target RejectNegative Joined Enum(float_abi_type) Var(arm_float_abi) Init(TARGET_DEFAULT_FLOAT_ABI) +Specify if floating point hardware should be used + +Enum +Name(float_abi_type) Type(enum float_abi_type) +Known floating-point ABIs (for use with the -mfloat-abi= option): + +EnumValue +Enum(float_abi_type) String(soft) Value(ARM_FLOAT_ABI_SOFT) + +EnumValue +Enum(float_abi_type) String(softfp) Value(ARM_FLOAT_ABI_SOFTFP) + +EnumValue +Enum(float_abi_type) String(hard) Value(ARM_FLOAT_ABI_HARD) + +mfp16-format= +Target RejectNegative Joined Enum(arm_fp16_format_type) Var(arm_fp16_format) Init(ARM_FP16_FORMAT_NONE) +Specify the __fp16 floating-point format + +Enum +Name(arm_fp16_format_type) Type(enum arm_fp16_format_type) +Known __fp16 formats (for use with the -mfp16-format= option): + +EnumValue +Enum(arm_fp16_format_type) String(none) Value(ARM_FP16_FORMAT_NONE) + +EnumValue +Enum(arm_fp16_format_type) String(ieee) Value(ARM_FP16_FORMAT_IEEE) + +EnumValue +Enum(arm_fp16_format_type) String(alternative) Value(ARM_FP16_FORMAT_ALTERNATIVE) + +mfpu= +Target RejectNegative Joined Enum(arm_fpu) Var(arm_fpu_index) +Specify the name of the target floating point hardware/format + +mlra +Target Report Var(arm_lra_flag) Init(1) Save +Use LRA instead of reload (transitional) + +mhard-float +Target RejectNegative Alias(mfloat-abi=, hard) Undocumented + +mlittle-endian +Target Report RejectNegative InverseMask(BIG_END) +Assume target CPU is configured as little endian + +mlong-calls +Target Report Mask(LONG_CALLS) +Generate call insns as indirect calls, if necessary + +mpic-data-is-text-relative +Target Report Var(arm_pic_data_is_text_relative) Init(TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE) +Assume data segments are relative to text segment. + +mpic-register= +Target RejectNegative Joined Var(arm_pic_register_string) +Specify the register to be used for PIC addressing + +mpoke-function-name +Target Report Mask(POKE_FUNCTION_NAME) +Store function names in object code + +msched-prolog +Target Report Mask(SCHED_PROLOG) +Permit scheduling of a function's prologue sequence + +msingle-pic-base +Target Report Mask(SINGLE_PIC_BASE) +Do not load the PIC register in function prologues + +msoft-float +Target RejectNegative Alias(mfloat-abi=, soft) Undocumented + +mstructure-size-boundary= +Target RejectNegative Joined UInteger Var(arm_structure_size_boundary) Init(DEFAULT_STRUCTURE_SIZE_BOUNDARY) +Specify the minimum bit alignment of structures + +mthumb +Target Report RejectNegative Mask(THUMB) +Generate code for Thumb state + +mthumb-interwork +Target Report Mask(INTERWORK) +Support calls between Thumb and ARM instruction sets + +mtls-dialect= +Target RejectNegative Joined Enum(tls_type) Var(target_tls_dialect) Init(TLS_GNU) +Specify thread local storage scheme + +mtp= +Target RejectNegative Joined Enum(arm_tp_type) Var(target_thread_pointer) Init(TP_AUTO) +Specify how to access the thread pointer + +Enum +Name(arm_tp_type) Type(enum arm_tp_type) +Valid arguments to -mtp=: + +EnumValue +Enum(arm_tp_type) String(soft) Value(TP_SOFT) + +EnumValue +Enum(arm_tp_type) String(auto) Value(TP_AUTO) + +EnumValue +Enum(arm_tp_type) String(cp15) Value(TP_CP15) + +mtpcs-frame +Target Report Mask(TPCS_FRAME) +Thumb: Generate (non-leaf) stack frames even if not needed + +mtpcs-leaf-frame +Target Report Mask(TPCS_LEAF_FRAME) +Thumb: Generate (leaf) stack frames even if not needed + +mtune= +Target RejectNegative ToLower Joined Enum(processor_type) Var(arm_tune_option) Init(arm_none) +Tune code for the given processor + +; Other processor_type values are loaded from arm-tables.opt +; but that is a generated file and this is an odd-one-out. +EnumValue +Enum(processor_type) String(native) Value(-1) DriverOnly + +mwords-little-endian +Target Report RejectNegative Mask(LITTLE_WORDS) +Assume big endian bytes, little endian words. This option is deprecated. + +mvectorize-with-neon-quad +Target Report RejectNegative InverseMask(NEON_VECTORIZE_DOUBLE) +Use Neon quad-word (rather than double-word) registers for vectorization + +mvectorize-with-neon-double +Target Report RejectNegative Mask(NEON_VECTORIZE_DOUBLE) +Use Neon double-word (rather than quad-word) registers for vectorization + +mword-relocations +Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS) +Only generate absolute relocations on word sized values. + +mrestrict-it +Target Report Var(arm_restrict_it) Init(2) +Generate IT blocks appropriate for ARMv8. + +mold-rtx-costs +Target Report Mask(OLD_RTX_COSTS) +Use the old RTX costing tables (transitional). + +mnew-generic-costs +Target Report Mask(NEW_GENERIC_COSTS) +Use the new generic RTX cost tables if new core-specific cost table not available (transitional). + +mfix-cortex-m3-ldrd +Target Report Var(fix_cm3_ldrd) Init(2) +Avoid overlapping destination and address registers on LDRD instructions +that may trigger Cortex-M3 errata. + +munaligned-access +Target Report Var(unaligned_access) Init(2) +Enable unaligned word and halfword accesses to packed data. + +mneon-for-64bits +Target Report RejectNegative Var(use_neon_for_64bits) Init(0) +Use Neon to perform 64-bits operations rather than core registers. + +mslow-flash-data +Target Report Var(target_slow_flash_data) Init(0) +Assume loading data from flash is slower than fetching instructions. diff --git a/gcc-4.9/gcc/config/arm/arm1020e.md b/gcc-4.9/gcc/config/arm/arm1020e.md new file mode 100644 index 000000000..0206ea2af --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm1020e.md @@ -0,0 +1,385 @@ +;; ARM 1020E & ARM 1022E Pipeline Description +;; Copyright (C) 2005-2014 Free Software Foundation, Inc. +;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM1020E Technical Reference Manual, Copyright (c) 2003 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 1020E core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm1020e") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are two pipelines: +;; +;; - An Arithmetic Logic Unit (ALU) pipeline. +;; +;; The ALU pipeline has fetch, issue, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. +;; +;; - A Load-Store Unit (LSU) pipeline. +;; +;; The LSU pipeline has decode, execute, memory, and write stages. +;; We only model the execute, memory and write stages. + +(define_cpu_unit "1020a_e,1020a_m,1020a_w" "arm1020e") +(define_cpu_unit "1020l_e,1020l_m,1020l_w" "arm1020e") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "1020alu_op" 1 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + multiple,no_insn")) + "1020a_e,1020a_m,1020a_w") + +;; ALU operations with a shift-by-constant operand +(define_insn_reservation "1020alu_shift_op" 1 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) + "1020a_e,1020a_m,1020a_w") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the execute stage. +(define_insn_reservation "1020alu_shift_reg_op" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) + "1020a_e*2,1020a_m,1020a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. + +;; The result of the "smul" and "smulw" instructions is not available +;; until after the memory stage. +(define_insn_reservation "1020mult1" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "smulxy,smulwy")) + "1020a_e,1020a_m,1020a_w") + +;; The "smlaxy" and "smlawx" instructions require two iterations through +;; the execute stage; the result is available immediately following +;; the execute stage. +(define_insn_reservation "1020mult2" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "smlaxy,smlalxy,smlawx")) + "1020a_e*2,1020a_m,1020a_w") + +;; The "smlalxy", "mul", and "mla" instructions require two iterations +;; through the execute stage; the result is not available until after +;; the memory stage. +(define_insn_reservation "1020mult3" 3 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "smlalxy,mul,mla")) + "1020a_e*2,1020a_m,1020a_w") + +;; The "muls" and "mlas" instructions loop in the execute stage for +;; four iterations in order to set the flags. The value result is +;; available after three iterations. +(define_insn_reservation "1020mult4" 3 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "muls,mlas")) + "1020a_e*4,1020a_m,1020a_w") + +;; Long multiply instructions that produce two registers of +;; output (such as umull) make their results available in two cycles; +;; the least significant word is available before the most significant +;; word. That fact is not modeled; instead, the instructions are +;; described.as if the entire result was available at the end of the +;; cycle in which both words are available. + +;; The "umull", "umlal", "smull", and "smlal" instructions all take +;; three iterations through the execute cycle, and make their results +;; available after the memory cycle. +(define_insn_reservation "1020mult5" 4 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "umull,umlal,smull,smlal")) + "1020a_e*3,1020a_m,1020a_w") + +;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in +;; the execute stage for five iterations in order to set the flags. +;; The value result is available after four iterations. +(define_insn_reservation "1020mult6" 4 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "umulls,umlals,smulls,smlals")) + "1020a_e*5,1020a_m,1020a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; LSU instructions require six cycles to execute. They use the ALU +;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles +;; three through six. +;; Loads and stores which use a scaled register offset or scaled +;; register pre-indexed addressing mode take three cycles EXCEPT for +;; those that are base + offset with LSL of 0 or 2, or base - offset +;; with LSL of zero. The remainder take 1 cycle to execute. +;; For 4byte loads there is a bypass from the load stage + +(define_insn_reservation "1020load1_op" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "load_byte,load1")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "1020store1_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "store1")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +;; A load's result can be stored by an immediately following store +(define_bypass 1 "1020load1_op" "1020store1_op" "arm_no_early_store_addr_dep") + +;; On a LDM/STM operation, the LSU pipeline iterates until all of the +;; registers have been processed. +;; +;; The time it takes to load the data depends on whether or not the +;; base address is 64-bit aligned; if it is not, an additional cycle +;; is required. This model assumes that the address is always 64-bit +;; aligned. Because the processor can load two registers per cycle, +;; that assumption means that we use the same instruction reservations +;; for loading 2k and 2k - 1 registers. +;; +;; The ALU pipeline is decoupled after the first cycle unless there is +;; a register dependency; the dependency is cleared as soon as the LDM/STM +;; has dealt with the corresponding register. So for example, +;; stmia sp, {r0-r3} +;; add r0, r0, #4 +;; will have one fewer stalls than +;; stmia sp, {r0-r3} +;; add r3, r3, #4 +;; +;; As with ALU operations, if one of the destination registers is the +;; PC, there are additional stalls; that is not modeled. + +(define_insn_reservation "1020load2_op" 2 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "load2")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "1020store2_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "store2")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "1020load34_op" 3 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "load3,load4")) + "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w") + +(define_insn_reservation "1020store34_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "store3,store4")) + "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The ARM +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "1020branch_op" 0 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "branch")) + "1020a_e") + +;; The latency for a call is not predictable. Therefore, we use 32 as +;; roughly equivalent to positive infinity. + +(define_insn_reservation "1020call_op" 32 + (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "type" "call")) + "1020a_e*32") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_cpu_unit "v10_fmac" "arm1020e") + +(define_cpu_unit "v10_ds" "arm1020e") + +(define_cpu_unit "v10_fmstat" "arm1020e") + +(define_cpu_unit "v10_ls1,v10_ls2,v10_ls3" "arm1020e") + +;; fmstat is a serializing instruction. It will stall the core until +;; the mac and ds units have completed. +(exclusion_set "v10_fmac,v10_ds" "v10_fmstat") + +(define_attr "vfp10" "yes,no" + (const (if_then_else (and (eq_attr "tune" "arm1020e,arm1022e") + (eq_attr "fpu" "vfp")) + (const_string "yes") (const_string "no")))) + +;; Note, no instruction can issue to the VFP if the core is stalled in the +;; first execute state. We model this by using 1020a_e in the first cycle. +(define_insn_reservation "v10_ffarith" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd")) + "1020a_e+v10_fmac") + +(define_insn_reservation "v10_farith" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "faddd,fadds")) + "1020a_e+v10_fmac") + +(define_insn_reservation "v10_cvt" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_cvt,f_cvti2f,f_cvtf2i")) + "1020a_e+v10_fmac") + +(define_insn_reservation "v10_fmul" 6 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fmuls,fmacs,ffmas,fmuld,fmacd,ffmad")) + "1020a_e+v10_fmac*2") + +(define_insn_reservation "v10_fdivs" 18 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fdivs, fsqrts")) + "1020a_e+v10_ds*14") + +(define_insn_reservation "v10_fdivd" 32 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "fdivd, fsqrtd")) + "1020a_e+v10_fmac+v10_ds*28") + +(define_insn_reservation "v10_floads" 4 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_loads")) + "1020a_e+1020l_e+v10_ls1,v10_ls2") + +;; We model a load of a double as needing all the vfp ls* stage in cycle 1. +;; This gives the correct mix between single-and double loads where a flds +;; followed by and fldd will stall for one cycle, but two back-to-back fldd +;; insns stall for two cycles. +(define_insn_reservation "v10_floadd" 5 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_loadd")) + "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3") + +;; Moves to/from arm regs also use the load/store pipeline. + +(define_insn_reservation "v10_c2v" 4 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_mcr,f_mcrr")) + "1020a_e+1020l_e+v10_ls1,v10_ls2") + +(define_insn_reservation "v10_fstores" 1 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_stores")) + "1020a_e+1020l_e+v10_ls1,v10_ls2") + +(define_insn_reservation "v10_fstored" 1 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_stored")) + "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3") + +(define_insn_reservation "v10_v2c" 1 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_mrc,f_mrrc")) + "1020a_e+1020l_e,1020l_m,1020l_w") + +(define_insn_reservation "v10_to_cpsr" 2 + (and (eq_attr "vfp10" "yes") + (eq_attr "type" "f_flag")) + "1020a_e+v10_fmstat,1020a_e+1020l_e,1020l_m,1020l_w") + +;; VFP bypasses + +;; There are bypasses for most operations other than store + +(define_bypass 3 + "v10_c2v,v10_floads" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd,v10_cvt") + +(define_bypass 4 + "v10_floadd" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +;; Arithmetic to other arithmetic saves a cycle due to forwarding +(define_bypass 4 + "v10_ffarith,v10_farith" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +(define_bypass 5 + "v10_fmul" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +(define_bypass 17 + "v10_fdivs" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +(define_bypass 31 + "v10_fdivd" + "v10_ffarith,v10_farith,v10_fmul,v10_fdivs,v10_fdivd") + +;; VFP anti-dependencies. + +;; There is one anti-dependence in the following case (not yet modelled): +;; - After a store: one extra cycle for both fsts and fstd +;; Note, back-to-back fstd instructions will overload the load/store datapath +;; causing a two-cycle stall. diff --git a/gcc-4.9/gcc/config/arm/arm1026ejs.md b/gcc-4.9/gcc/config/arm/arm1026ejs.md new file mode 100644 index 000000000..3f290b475 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm1026ejs.md @@ -0,0 +1,250 @@ +;; ARM 1026EJ-S Pipeline Description +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM1026EJ-S Technical Reference Manual, Copyright (c) 2003 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 1026EJ-S core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm1026ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are two pipelines: +;; +;; - An Arithmetic Logic Unit (ALU) pipeline. +;; +;; The ALU pipeline has fetch, issue, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. +;; +;; - A Load-Store Unit (LSU) pipeline. +;; +;; The LSU pipeline has decode, execute, memory, and write stages. +;; We only model the execute, memory and write stages. + +(define_cpu_unit "a_e,a_m,a_w" "arm1026ejs") +(define_cpu_unit "l_e,l_m,l_w" "arm1026ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "alu_op" 1 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + multiple,no_insn")) + "a_e,a_m,a_w") + +;; ALU operations with a shift-by-constant operand +(define_insn_reservation "alu_shift_op" 1 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) + "a_e,a_m,a_w") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the execute stage. +(define_insn_reservation "alu_shift_reg_op" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) + "a_e*2,a_m,a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. + +;; The result of the "smul" and "smulw" instructions is not available +;; until after the memory stage. +(define_insn_reservation "mult1" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "smulxy,smulwy")) + "a_e,a_m,a_w") + +;; The "smlaxy" and "smlawx" instructions require two iterations through +;; the execute stage; the result is available immediately following +;; the execute stage. +(define_insn_reservation "mult2" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "smlaxy,smlalxy,smlawx")) + "a_e*2,a_m,a_w") + +;; The "smlalxy", "mul", and "mla" instructions require two iterations +;; through the execute stage; the result is not available until after +;; the memory stage. +(define_insn_reservation "mult3" 3 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "smlalxy,mul,mla")) + "a_e*2,a_m,a_w") + +;; The "muls" and "mlas" instructions loop in the execute stage for +;; four iterations in order to set the flags. The value result is +;; available after three iterations. +(define_insn_reservation "mult4" 3 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "muls,mlas")) + "a_e*4,a_m,a_w") + +;; Long multiply instructions that produce two registers of +;; output (such as umull) make their results available in two cycles; +;; the least significant word is available before the most significant +;; word. That fact is not modeled; instead, the instructions are +;; described as if the entire result was available at the end of the +;; cycle in which both words are available. + +;; The "umull", "umlal", "smull", and "smlal" instructions all take +;; three iterations through the execute cycle, and make their results +;; available after the memory cycle. +(define_insn_reservation "mult5" 4 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "umull,umlal,smull,smlal")) + "a_e*3,a_m,a_w") + +;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in +;; the execute stage for five iterations in order to set the flags. +;; The value result is available after four iterations. +(define_insn_reservation "mult6" 4 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "umulls,umlals,smulls,smlals")) + "a_e*5,a_m,a_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; LSU instructions require six cycles to execute. They use the ALU +;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles +;; three through six. +;; Loads and stores which use a scaled register offset or scaled +;; register pre-indexed addressing mode take three cycles EXCEPT for +;; those that are base + offset with LSL of 0 or 2, or base - offset +;; with LSL of zero. The remainder take 1 cycle to execute. +;; For 4byte loads there is a bypass from the load stage + +(define_insn_reservation "load1_op" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "load_byte,load1")) + "a_e+l_e,l_m,a_w+l_w") + +(define_insn_reservation "store1_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "store1")) + "a_e+l_e,l_m,a_w+l_w") + +;; A load's result can be stored by an immediately following store +(define_bypass 1 "load1_op" "store1_op" "arm_no_early_store_addr_dep") + +;; On a LDM/STM operation, the LSU pipeline iterates until all of the +;; registers have been processed. +;; +;; The time it takes to load the data depends on whether or not the +;; base address is 64-bit aligned; if it is not, an additional cycle +;; is required. This model assumes that the address is always 64-bit +;; aligned. Because the processor can load two registers per cycle, +;; that assumption means that we use the same instruction reservations +;; for loading 2k and 2k - 1 registers. +;; +;; The ALU pipeline is stalled until the completion of the last memory +;; stage in the LSU pipeline. That is modeled by keeping the ALU +;; execute stage busy until that point. +;; +;; As with ALU operations, if one of the destination registers is the +;; PC, there are additional stalls; that is not modeled. + +(define_insn_reservation "load2_op" 2 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "load2")) + "a_e+l_e,l_m,a_w+l_w") + +(define_insn_reservation "store2_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "store2")) + "a_e+l_e,l_m,a_w+l_w") + +(define_insn_reservation "load34_op" 3 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "load3,load4")) + "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w") + +(define_insn_reservation "store34_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "store3,store4")) + "a_e+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The ARM +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "branch_op" 0 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "branch")) + "nothing") + +;; The latency for a call is not predictable. Therefore, we use 32 as +;; roughly equivalent to positive infinity. + +(define_insn_reservation "call_op" 32 + (and (eq_attr "tune" "arm1026ejs") + (eq_attr "type" "call")) + "nothing") diff --git a/gcc-4.9/gcc/config/arm/arm1136jfs.md b/gcc-4.9/gcc/config/arm/arm1136jfs.md new file mode 100644 index 000000000..9e941da76 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm1136jfs.md @@ -0,0 +1,387 @@ +;; ARM 1136J[F]-S Pipeline Description +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM1136JF-S Technical Reference Manual, Copyright (c) 2003 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 1136J-S and 1136JF-S cores. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm1136jfs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are three distinct pipelines (page 1-26 and following): +;; +;; - A 4-stage decode pipeline, shared by all three. It has fetch (1), +;; fetch (2), decode, and issue stages. Since this is always involved, +;; we do not model it in the scheduler. +;; +;; - A 4-stage ALU pipeline. It has shifter, ALU (main integer operations), +;; and saturation stages. The fourth stage is writeback; see below. +;; +;; - A 4-stage multiply-accumulate pipeline. It has three stages, called +;; MAC1 through MAC3, and a fourth writeback stage. +;; +;; The 4th-stage writeback is shared between the ALU and MAC pipelines, +;; which operate in lockstep. Results from either pipeline will be +;; moved into the writeback stage. Because the two pipelines operate +;; in lockstep, we schedule them as a single "execute" pipeline. +;; +;; - A 4-stage LSU pipeline. It has address generation, data cache (1), +;; data cache (2), and writeback stages. (Note that this pipeline, +;; including the writeback stage, is independent from the ALU & LSU pipes.) + +(define_cpu_unit "e_1,e_2,e_3,e_wb" "arm1136jfs") ; ALU and MAC +; e_1 = Sh/Mac1, e_2 = ALU/Mac2, e_3 = SAT/Mac3 +(define_cpu_unit "l_a,l_dc1,l_dc2,l_wb" "arm1136jfs") ; Load/Store + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require eight cycles to execute, and use the ALU +;; pipeline in each of the eight stages. The results are available +;; after the alu stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modelled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "11_alu_op" 2 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + multiple,no_insn")) + "e_1,e_2,e_3,e_wb") + +;; ALU operations with a shift-by-constant operand +(define_insn_reservation "11_alu_shift_op" 2 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) + "e_1,e_2,e_3,e_wb") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the shift stage. +(define_insn_reservation "11_alu_shift_reg_op" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) + "e_1*2,e_2,e_3,e_wb") + +;; alu_ops can start sooner, if there is no shifter dependency +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_alu_op") +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_alu_op") +(define_bypass 2 "11_alu_shift_reg_op" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the first two execute stages until +;; the instruction has been passed through the multiplier array enough +;; times. + +;; Multiply and multiply-accumulate results are available after four stages. +(define_insn_reservation "11_mult1" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "mul,mla")) + "e_1*2,e_2,e_3,e_wb") + +;; The *S variants set the condition flags, which requires three more cycles. +(define_insn_reservation "11_mult2" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "muls,mlas")) + "e_1*2,e_2,e_3,e_wb") + +(define_bypass 3 "11_mult1,11_mult2" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_mult1,11_mult2" + "11_alu_op") +(define_bypass 3 "11_mult1,11_mult2" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "11_mult1,11_mult2" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "11_mult1,11_mult2" + "11_store1" + "arm_no_early_store_addr_dep") + +;; Signed and unsigned multiply long results are available across two cycles; +;; the less significant word is available one cycle before the more significant +;; word. Here we conservatively wait until both are available, which is +;; after three iterations and the memory cycle. The same is also true of +;; the two multiply-accumulate instructions. +(define_insn_reservation "11_mult3" 5 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "smull,umull,smlal,umlal")) + "e_1*3,e_2,e_3,e_wb*2") + +;; The *S variants set the condition flags, which requires three more cycles. +(define_insn_reservation "11_mult4" 5 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "smulls,umulls,smlals,umlals")) + "e_1*3,e_2,e_3,e_wb*2") + +(define_bypass 4 "11_mult3,11_mult4" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 4 "11_mult3,11_mult4" + "11_alu_op") +(define_bypass 4 "11_mult3,11_mult4" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 4 "11_mult3,11_mult4" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "11_mult3,11_mult4" + "11_store1" + "arm_no_early_store_addr_dep") + +;; Various 16x16->32 multiplies and multiply-accumulates, using combinations +;; of high and low halves of the argument registers. They take a single +;; pass through the pipeline and make the result available after three +;; cycles. +(define_insn_reservation "11_mult5" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\ + smusd,smusdx,smlsd,smlsdx")) + "e_1,e_2,e_3,e_wb") + +(define_bypass 2 "11_mult5" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 2 "11_mult5" + "11_alu_op") +(define_bypass 2 "11_mult5" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 2 "11_mult5" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "11_mult5" + "11_store1" + "arm_no_early_store_addr_dep") + +;; The same idea, then the 32-bit result is added to a 64-bit quantity. +(define_insn_reservation "11_mult6" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "smlalxy")) + "e_1*2,e_2,e_3,e_wb*2") + +;; Signed 32x32 multiply, then the most significant 32 bits are extracted +;; and are available after the memory stage. +(define_insn_reservation "11_mult7" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "smmul,smmulr")) + "e_1*2,e_2,e_3,e_wb") + +(define_bypass 3 "11_mult6,11_mult7" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_mult6,11_mult7" + "11_alu_op") +(define_bypass 3 "11_mult6,11_mult7" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "11_mult6,11_mult7" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "11_mult6,11_mult7" + "11_store1" + "arm_no_early_store_addr_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; These vary greatly depending on their arguments and the results of +;; stat prediction. Cycle count ranges from zero (unconditional branch, +;; folded dynamic prediction) to seven (incorrect predictions, etc). We +;; assume an optimal case for now, because the cost of a cache miss +;; overwhelms the cost of everything else anyhow. + +(define_insn_reservation "11_branches" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "branch")) + "nothing") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "11_call" 32 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "call")) + "nothing") + +;; Branches are predicted. A correctly predicted branch will be no +;; cost, but we're conservative here, and use the timings a +;; late-register would give us. +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_branches") +(define_bypass 2 "11_alu_shift_reg_op" + "11_branches") +(define_bypass 2 "11_load1,11_load2" + "11_branches") +(define_bypass 3 "11_load34" + "11_branches") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback. +;; These models assume that all memory references hit in dcache. Also, +;; if the PC is one of the registers involved, there are additional stalls +;; not modelled here. Addressing modes are also not modelled. + +(define_insn_reservation "11_load1" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load1")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +;; Load byte results are not available until the writeback stage, where +;; the correct byte is extracted. + +(define_insn_reservation "11_loadb" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load_byte")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +(define_insn_reservation "11_store1" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "store1")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +;; Load/store double words into adjacent registers. The timing and +;; latencies are different depending on whether the address is 64-bit +;; aligned. This model assumes that it is. +(define_insn_reservation "11_load2" 3 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load2")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +(define_insn_reservation "11_store2" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "store2")) + "l_a+e_1,l_dc1,l_dc2,l_wb") + +;; Load/store multiple registers. Two registers are stored per cycle. +;; Actual timing depends on how many registers are affected, so we +;; optimistically schedule a low latency. +(define_insn_reservation "11_load34" 4 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "load3,load4")) + "l_a+e_1,l_dc1*2,l_dc2,l_wb") + +(define_insn_reservation "11_store34" 0 + (and (eq_attr "tune" "arm1136js,arm1136jfs") + (eq_attr "type" "store3,store4")) + "l_a+e_1,l_dc1*2,l_dc2,l_wb") + +;; A store can start immediately after an alu op, if that alu op does +;; not provide part of the address to access. +(define_bypass 1 "11_alu_op,11_alu_shift_op" + "11_store1" + "arm_no_early_store_addr_dep") +(define_bypass 2 "11_alu_shift_reg_op" + "11_store1" + "arm_no_early_store_addr_dep") + +;; An alu op can start sooner after a load, if that alu op does not +;; have an early register dependency on the load +(define_bypass 2 "11_load1" + "11_alu_op") +(define_bypass 2 "11_load1" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 2 "11_load1" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +(define_bypass 3 "11_loadb" + "11_alu_op") +(define_bypass 3 "11_loadb" + "11_alu_shift_op" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "11_loadb" + "11_alu_shift_reg_op" + "arm_no_early_alu_shift_dep") + +;; A mul op can start sooner after a load, if that mul op does not +;; have an early multiply dependency +(define_bypass 2 "11_load1" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_load34" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") +(define_bypass 3 "11_loadb" + "11_mult1,11_mult2,11_mult3,11_mult4,11_mult5,11_mult6,11_mult7" + "arm_no_early_mul_dep") + +;; A store can start sooner after a load, if that load does not +;; produce part of the address to access +(define_bypass 2 "11_load1" + "11_store1" + "arm_no_early_store_addr_dep") +(define_bypass 3 "11_loadb" + "11_store1" + "arm_no_early_store_addr_dep") diff --git a/gcc-4.9/gcc/config/arm/arm926ejs.md b/gcc-4.9/gcc/config/arm/arm926ejs.md new file mode 100644 index 000000000..883935dcf --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm926ejs.md @@ -0,0 +1,198 @@ +;; ARM 926EJ-S Pipeline Description +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM +;; Limited. +;; + +;; This automaton provides a pipeline description for the ARM +;; 926EJ-S core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "arm926ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +(define_cpu_unit "e,m,w" "arm926ejs") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations with no shifted operand +(define_insn_reservation "9_alu_op" 1 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + shift_imm,shift_reg,extend,\ + mov_imm,mov_reg,mov_shift,\ + mvn_imm,mvn_reg,mvn_shift,\ + multiple,no_insn")) + "e,m,w") + +;; ALU operations with a shift-by-register operand +;; These really stall in the decoder, in order to read +;; the shift value in a second cycle. Pretend we take two cycles in +;; the execute stage. +(define_insn_reservation "9_alu_shift_reg_op" 2 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) + "e*2,m,w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. Multiply operations occur in both the execute and memory +;; stages of the pipeline + +(define_insn_reservation "9_mult1" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "smlalxy,mul,mla")) + "e*2,m,w") + +(define_insn_reservation "9_mult2" 4 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "muls,mlas")) + "e*3,m,w") + +(define_insn_reservation "9_mult3" 4 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "umull,umlal,smull,smlal")) + "e*3,m,w") + +(define_insn_reservation "9_mult4" 5 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "umulls,umlals,smulls,smlals")) + "e*4,m,w") + +(define_insn_reservation "9_mult5" 2 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "smulxy,smlaxy,smlawx")) + "e,m,w") + +(define_insn_reservation "9_mult6" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "smlalxy")) + "e*2,m,w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; Loads with a shifted offset take 3 cycles, and are (a) probably the +;; most common and (b) the pessimistic assumption will lead to fewer stalls. +(define_insn_reservation "9_load1_op" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load1,load_byte")) + "e*2,m,w") + +(define_insn_reservation "9_store1_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store1")) + "e,m,w") + +;; multiple word loads and stores +(define_insn_reservation "9_load2_op" 3 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load2")) + "e,m*2,w") + +(define_insn_reservation "9_load3_op" 4 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load3")) + "e,m*3,w") + +(define_insn_reservation "9_load4_op" 5 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "load4")) + "e,m*4,w") + +(define_insn_reservation "9_store2_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store2")) + "e,m*2,w") + +(define_insn_reservation "9_store3_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store3")) + "e,m*3,w") + +(define_insn_reservation "9_store4_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "store4")) + "e,m*4,w") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The ARM +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "9_branch_op" 0 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "branch")) + "nothing") + +;; The latency for a call is not predictable. Therefore, we use 32 as +;; roughly equivalent to positive infinity. + +(define_insn_reservation "9_call_op" 32 + (and (eq_attr "tune" "arm926ejs") + (eq_attr "type" "call")) + "nothing") diff --git a/gcc-4.9/gcc/config/arm/arm_acle.h b/gcc-4.9/gcc/config/arm/arm_acle.h new file mode 100644 index 000000000..aaa7affee --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm_acle.h @@ -0,0 +1,100 @@ +/* ARM Non-NEON ACLE intrinsics include file. + + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_ARM_ACLE_H +#define _GCC_ARM_ACLE_H + +#include +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __ARM_FEATURE_CRC32 +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32b (uint32_t __a, uint8_t __b) +{ + return __builtin_arm_crc32b (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32h (uint32_t __a, uint16_t __b) +{ + return __builtin_arm_crc32h (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32w (uint32_t __a, uint32_t __b) +{ + return __builtin_arm_crc32w (__a, __b); +} + +#ifdef __ARM_32BIT_STATE +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32d (uint32_t __a, uint64_t __b) +{ + uint32_t __d; + + __d = __crc32w (__crc32w (__a, __b & 0xffffffffULL), __b >> 32); + return __d; +} +#endif + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cb (uint32_t __a, uint8_t __b) +{ + return __builtin_arm_crc32cb (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32ch (uint32_t __a, uint16_t __b) +{ + return __builtin_arm_crc32ch (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cw (uint32_t __a, uint32_t __b) +{ + return __builtin_arm_crc32cw (__a, __b); +} + +#ifdef __ARM_32BIT_STATE +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cd (uint32_t __a, uint64_t __b) +{ + uint32_t __d; + + __d = __crc32cw (__crc32cw (__a, __b & 0xffffffffULL), __b >> 32); + return __d; +} +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gcc-4.9/gcc/config/arm/arm_neon.h b/gcc-4.9/gcc/config/arm/arm_neon.h new file mode 100644 index 000000000..37a6e611b --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm_neon.h @@ -0,0 +1,13429 @@ +/* ARM NEON intrinsics include file. This file is generated automatically + using neon-gen.ml. Please do not edit manually. + + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_ARM_NEON_H +#define _GCC_ARM_NEON_H 1 + +#ifndef __ARM_NEON__ +#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h +#else + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef __builtin_neon_qi int8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_di int64x1_t; +typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8))); +#ifdef __ARM_FEATURE_CRYPTO +typedef __builtin_neon_poly64 poly64x1_t; +#endif +typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_uhi uint16x4_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_usi uint32x2_t __attribute__ ((__vector_size__ (8))); +typedef __builtin_neon_udi uint64x1_t; +typedef __builtin_neon_qi int8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_hi int16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_si int32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_di int64x2_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_sf float32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_poly8 poly8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_poly16 poly16x8_t __attribute__ ((__vector_size__ (16))); +#ifdef __ARM_FEATURE_CRYPTO +typedef __builtin_neon_poly64 poly64x2_t __attribute__ ((__vector_size__ (16))); +#endif +typedef __builtin_neon_uqi uint8x16_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_uhi uint16x8_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_usi uint32x4_t __attribute__ ((__vector_size__ (16))); +typedef __builtin_neon_udi uint64x2_t __attribute__ ((__vector_size__ (16))); + +typedef float float32_t; +typedef __builtin_neon_poly8 poly8_t; +typedef __builtin_neon_poly16 poly16_t; +#ifdef __ARM_FEATURE_CRYPTO +typedef __builtin_neon_poly64 poly64_t; +typedef __builtin_neon_poly128 poly128_t; +#endif + +typedef struct int8x8x2_t +{ + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t +{ + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t +{ + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t +{ + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t +{ + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t +{ + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t +{ + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t +{ + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t +{ + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t +{ + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t +{ + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t +{ + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t +{ + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t +{ + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t +{ + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t +{ + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float32x2x2_t +{ + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t +{ + float32x4_t val[2]; +} float32x4x2_t; + +typedef struct poly8x8x2_t +{ + poly8x8_t val[2]; +} poly8x8x2_t; + +typedef struct poly8x16x2_t +{ + poly8x16_t val[2]; +} poly8x16x2_t; + +typedef struct poly16x4x2_t +{ + poly16x4_t val[2]; +} poly16x4x2_t; + +typedef struct poly16x8x2_t +{ + poly16x8_t val[2]; +} poly16x8x2_t; + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x1x2_t +{ + poly64x1_t val[2]; +} poly64x1x2_t; +#endif + + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x2x2_t +{ + poly64x2_t val[2]; +} poly64x2x2_t; +#endif + + +typedef struct int8x8x3_t +{ + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t +{ + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t +{ + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t +{ + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t +{ + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t +{ + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t +{ + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t +{ + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t +{ + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t +{ + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t +{ + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t +{ + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t +{ + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t +{ + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t +{ + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t +{ + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float32x2x3_t +{ + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t +{ + float32x4_t val[3]; +} float32x4x3_t; + +typedef struct poly8x8x3_t +{ + poly8x8_t val[3]; +} poly8x8x3_t; + +typedef struct poly8x16x3_t +{ + poly8x16_t val[3]; +} poly8x16x3_t; + +typedef struct poly16x4x3_t +{ + poly16x4_t val[3]; +} poly16x4x3_t; + +typedef struct poly16x8x3_t +{ + poly16x8_t val[3]; +} poly16x8x3_t; + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x1x3_t +{ + poly64x1_t val[3]; +} poly64x1x3_t; +#endif + + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x2x3_t +{ + poly64x2_t val[3]; +} poly64x2x3_t; +#endif + + +typedef struct int8x8x4_t +{ + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t +{ + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t +{ + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t +{ + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t +{ + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t +{ + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t +{ + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t +{ + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t +{ + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t +{ + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t +{ + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t +{ + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t +{ + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t +{ + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t +{ + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t +{ + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float32x2x4_t +{ + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t +{ + float32x4_t val[4]; +} float32x4x4_t; + +typedef struct poly8x8x4_t +{ + poly8x8_t val[4]; +} poly8x8x4_t; + +typedef struct poly8x16x4_t +{ + poly8x16_t val[4]; +} poly8x16x4_t; + +typedef struct poly16x4x4_t +{ + poly16x4_t val[4]; +} poly16x4x4_t; + +typedef struct poly16x8x4_t +{ + poly16x8_t val[4]; +} poly16x8x4_t; + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x1x4_t +{ + poly64x1_t val[4]; +} poly64x1x4_t; +#endif + + +#ifdef __ARM_FEATURE_CRYPTO +typedef struct poly64x2x4_t +{ + poly64x2_t val[4]; +} poly64x2x4_t; +#endif + + + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vadd_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vadd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vaddq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vaddlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vaddlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vaddlv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vaddw_s8 (int16x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vaddw_s16 (int32x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vaddw_s32 (int64x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vaddw_u8 (uint16x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vaddwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vaddw_u16 (uint32x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vaddwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vaddw_u32 (uint64x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vaddwv2si ((int64x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrhadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrhadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrhadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrhaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrhaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrhaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqadd_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqadddi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqadd_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqadddi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqaddq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqaddq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqaddq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqaddq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vaddhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vaddhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vaddhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vraddhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vraddhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vraddhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmul_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmul_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmul_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmulq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmulq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmulq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (poly8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 2); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmull_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmull_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vmullv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vmullv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmull_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vmlslv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlslv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlslv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1); +} + +#ifdef __ARM_FEATURE_FMA +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c, 3); +} + +#endif +#ifdef __ARM_FEATURE_FMA +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c, 3); +} + +#endif +#ifdef __ARM_FEATURE_FMA +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c, 3); +} + +#endif +#ifdef __ARM_FEATURE_FMA +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c, 3); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndn_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrintnv2sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndqn_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrintnv4sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnda_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrintav2sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndqa_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrintav4sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndp_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrintpv2sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndqp_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrintpv4sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrndm_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrintmv2sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndqm_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrintmv4sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrnd_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrintzv2sf (__a); +} + +#endif +#if __ARM_ARCH >= 8 +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrndq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrintzv4sf (__a); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vsub_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsub_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vsubq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vsublv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vsublv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vsublv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsubw_s8 (int16x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsubw_s16 (int32x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsubw_s32 (int64x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsubw_u8 (uint16x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vsubwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsubw_u16 (uint32x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vsubwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsubw_u32 (uint64x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vsubwv2si ((int64x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vhsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vhsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vhsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vhsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vhsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vhsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vhsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vhsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vhsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vhsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vhsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vhsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vhsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vhsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vhsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqsub_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqsub_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqsub_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqsub_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqsubdi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqsub_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqsub_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqsub_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqsub_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqsubq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqsubq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqsubq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqsubq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsubhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsubhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsubhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsubhn_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrsubhn_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrsubhn_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 4); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vceq_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vceq_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vceq_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vceqq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vceqq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vceqq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 2); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcge_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcge_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcge_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcle_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcle_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcle_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcleq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcleq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcleq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcgt_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcgt_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcgt_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclt_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclt_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclt_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcltq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcltq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcltq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcage_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcageq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcale_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaleq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcagt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcagtq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcalt_f32 (float32x2_t __a, float32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcaltq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_s8 (int8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_s16 (int16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_s32 (int32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtst_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtst_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtst_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtstq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtstq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtstq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabd_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vabd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vabdv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vabd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vabdv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vabd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vabdv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabdq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabdq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vabdq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vabdv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vabdv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vabdv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabdl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabdl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabdl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabdl_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vabdlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabdl_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vabdlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabdl_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vabdlv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vabav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vabav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vabav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vabav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vabav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vabav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vabalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vabalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vabalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmax_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmax_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmax_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmax_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmax_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmax_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmax_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmaxq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmaxq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmaxq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmaxq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vmaxv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vmaxv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vmaxv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmin_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmin_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmin_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmin_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmin_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmin_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmin_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vminv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vminq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vminq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vminq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vminq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 3); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vminq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vminv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vminq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vminv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vminq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vminv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpadd_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadd_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadd_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpadd_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpadd_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadd_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadd_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpaddl_s8 (int8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpaddl_s16 (int16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpaddl_s32 (int32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpaddl_u8 (uint8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vpaddlv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpaddl_u16 (uint16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vpaddlv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpaddl_u32 (uint32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vpaddlv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpaddlq_s8 (int8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpaddlq_s16 (int16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpaddlq_s32 (int32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpaddlq_u8 (uint8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vpaddlv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpaddlq_u16 (uint16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vpaddlv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpaddlq_u32 (uint32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vpaddlv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpadal_s8 (int16x4_t __a, int8x8_t __b) +{ + return (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpadal_s16 (int32x2_t __a, int16x4_t __b) +{ + return (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vpadal_s32 (int64x1_t __a, int32x2_t __b) +{ + return (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpadal_u8 (uint16x4_t __a, uint8x8_t __b) +{ + return (uint16x4_t)__builtin_neon_vpadalv8qi ((int16x4_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpadal_u16 (uint32x2_t __a, uint16x4_t __b) +{ + return (uint32x2_t)__builtin_neon_vpadalv4hi ((int32x2_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vpadal_u32 (uint64x1_t __a, uint32x2_t __b) +{ + return (uint64x1_t)__builtin_neon_vpadalv2si ((int64x1_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vpadalq_s8 (int16x8_t __a, int8x16_t __b) +{ + return (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vpadalq_s16 (int32x4_t __a, int16x8_t __b) +{ + return (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vpadalq_s32 (int64x2_t __a, int32x4_t __b) +{ + return (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vpadalq_u8 (uint16x8_t __a, uint8x16_t __b) +{ + return (uint16x8_t)__builtin_neon_vpadalv16qi ((int16x8_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vpadalq_u16 (uint32x4_t __a, uint16x8_t __b) +{ + return (uint32x4_t)__builtin_neon_vpadalv8hi ((int32x4_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vpadalq_u32 (uint64x2_t __a, uint32x4_t __b) +{ + return (uint64x2_t)__builtin_neon_vpadalv4si ((int64x2_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmax_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmax_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmax_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmax_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmax_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vpmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmax_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vpmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmax_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vpmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vpmin_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vpmin_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vpmin_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vpmin_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 3); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vpmin_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vpminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vpmin_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vpminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vpmin_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vpminv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecps_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 3); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 3); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrts_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 3); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 3); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vshldi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vshldi (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vshldi ((int64x1_t) __a, __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqrshl_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrshl_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrshl_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqrshl_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vqshldi (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshl_u8 (uint8x8_t __a, int8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshl_u16 (uint16x4_t __a, int16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshl_u32 (uint32x2_t __a, int32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqrshl_u64 (uint64x1_t __a, int64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vqshldi ((int64x1_t) __a, __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqrshlq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrshlq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrshlq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqrshlq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshr_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshr_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshr_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshr_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshr_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshr_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshr_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshr_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshrq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshrq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshrq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshrq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshrq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshrq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshrq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshrq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshr_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshr_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshr_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrshr_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vshr_ndi (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshr_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshr_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshr_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrshr_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vshr_ndi ((int64x1_t) __a, __b, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrshrq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrshrq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrshrq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrshrq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrshrq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrshrq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrshrq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrshrq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqrshrn_n_s16 (int16x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrshrn_n_s32 (int32x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrshrn_n_s64 (int64x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshrn_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshrn_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshrn_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 4); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshrun_n_s16 (int16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshrun_n_s32 (int32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshrun_n_s64 (int64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqrshrun_n_s16 (int16x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 5); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqrshrun_n_s32 (int32x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 5); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqrshrun_n_s64 (int64x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 5); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vshl_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vshl_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vshl_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vshl_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vshl_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vshl_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vshl_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vshl_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vshlq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshlq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshlq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshlq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vshlq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshlq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshlq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshlq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqshl_n_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqshl_n_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqshl_n_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqshl_n_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vqshl_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshl_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshl_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshl_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshl_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshl_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshl_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshl_n_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vqshl_ndi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqshlq_n_s8 (int8x16_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqshlq_n_s16 (int16x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqshlq_n_s32 (int32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqshlq_n_s64 (int64x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshlq_n_u8 (uint8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vqshl_nv16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshlq_n_u16 (uint16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vqshl_nv8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshlq_n_u32 (uint32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vqshl_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshlq_n_u64 (uint64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vqshl_nv2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqshlu_n_s8 (int8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqshlu_n_s16 (int16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqshlu_n_s32 (int32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vqshlu_n_s64 (int64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vqshlu_ndi (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vqshluq_n_s8 (int8x16_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vqshluq_n_s16 (int16x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vqshluq_n_s32 (int32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vqshluq_n_s64 (int64x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vshll_n_s8 (int8x8_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vshll_n_s16 (int16x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vshll_n_s32 (int32x2_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vshll_n_u8 (uint8x8_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vshll_nv8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vshll_n_u16 (uint16x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vshll_nv4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vshll_n_u32 (uint32x2_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vshll_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 5); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsra_ndi (__a, __b, __c, 5); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 4); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 4); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 4); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsra_ndi ((int64x1_t) __a, (int64x1_t) __b, __c, 4); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 5); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 5); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 5); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 4); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 4); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 4); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 4); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsri_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vsli_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vabs_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vabsv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vabs_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vabsv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vabs_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vabsv2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vabs_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vabsv2sf (__a, 3); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vabsq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vabsv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vabsq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vabsv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vabsq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vabsv4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vabsq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vabsv4sf (__a, 3); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqabs_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqabs_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqabs_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vqabsv2si (__a, 1); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqabsq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqabsq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqabsq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vqabsv4si (__a, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vneg_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vnegv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vneg_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vnegv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vneg_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vnegv2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vneg_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vnegv2sf (__a, 3); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vnegq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vnegv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vnegq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vnegv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vnegq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vnegv4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vnegq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vnegv4sf (__a, 3); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqneg_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqneg_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqneg_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vqnegv2si (__a, 1); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vqnegq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqnegq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqnegq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vqnegv4si (__a, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmvn_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmvn_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmvn_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vmvnv2si (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmvn_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmvn_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmvn_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmvn_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmvnq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmvnq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmvnq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vmvnv4si (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmvnq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmvnq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmvnq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmvnq_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcls_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vclsv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcls_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vclsv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcls_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vclsv2si (__a, 1); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclsq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vclsv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclsq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vclsv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclsq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vclsv4si (__a, 1); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vclz_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vclzv8qi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vclz_s16 (int16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vclzv4hi (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vclz_s32 (int32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vclzv2si (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vclz_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vclz_u16 (uint16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vclz_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vclzq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vclzv16qi (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vclzq_s16 (int16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vclzv8hi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vclzq_s32 (int32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vclzv4si (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vclzq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vclzq_u16 (uint16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vclzq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcnt_s8 (int8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vcntv8qi (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcnt_u8 (uint8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcnt_p8 (poly8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 2); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcntq_s8 (int8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vcntv16qi (__a, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcntq_u8 (uint8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 0); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcntq_p8 (poly8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 2); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecpe_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrecpev2sf (__a, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrecpe_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpeq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrecpev4sf (__a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrecpeq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrsqrte_f32 (float32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 3); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrsqrte_u32 (uint32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_f32 (float32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 3); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrsqrteq_u32 (uint32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vget_lane_s8 (int8x8_t __a, const int __b) +{ + return (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vget_lane_s16 (int16x4_t __a, const int __b) +{ + return (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vget_lane_s32 (int32x2_t __a, const int __b) +{ + return (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vget_lane_f32 (float32x2_t __a, const int __b) +{ + return (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 3); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vget_lane_u8 (uint8x8_t __a, const int __b) +{ + return (uint8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vget_lane_u16 (uint16x4_t __a, const int __b) +{ + return (uint16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vget_lane_u32 (uint32x2_t __a, const int __b) +{ + return (uint32_t)__builtin_neon_vget_lanev2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vget_lane_p8 (poly8x8_t __a, const int __b) +{ + return (poly8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 2); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vget_lane_p16 (poly16x4_t __a, const int __b) +{ + return (poly16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 2); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vget_lane_s64 (int64x1_t __a, const int __b) +{ + return (int64_t)__builtin_neon_vget_lanedi (__a, __b, 1); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vget_lane_u64 (uint64x1_t __a, const int __b) +{ + return (uint64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b, 0); +} + +__extension__ static __inline int8_t __attribute__ ((__always_inline__)) +vgetq_lane_s8 (int8x16_t __a, const int __b) +{ + return (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vgetq_lane_s16 (int16x8_t __a, const int __b) +{ + return (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vgetq_lane_s32 (int32x4_t __a, const int __b) +{ + return (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vgetq_lane_f32 (float32x4_t __a, const int __b) +{ + return (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 3); +} + +__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) +vgetq_lane_u8 (uint8x16_t __a, const int __b) +{ + return (uint8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 0); +} + +__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) +vgetq_lane_u16 (uint16x8_t __a, const int __b) +{ + return (uint16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 0); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vgetq_lane_u32 (uint32x4_t __a, const int __b) +{ + return (uint32_t)__builtin_neon_vget_lanev4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) +vgetq_lane_p8 (poly8x16_t __a, const int __b) +{ + return (poly8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 2); +} + +__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) +vgetq_lane_p16 (poly16x8_t __a, const int __b) +{ + return (poly16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 2); +} + +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vgetq_lane_s64 (int64x2_t __a, const int __b) +{ + return (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1); +} + +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) +vgetq_lane_u64 (uint64x2_t __a, const int __b) +{ + return (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vcreate_p64 (uint64_t __a) +{ + return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vcreate_s8 (uint64_t __a) +{ + return (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vcreate_s16 (uint64_t __a) +{ + return (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcreate_s32 (uint64_t __a) +{ + return (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vcreate_s64 (uint64_t __a) +{ + return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcreate_f32 (uint64_t __a) +{ + return (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vcreate_u8 (uint64_t __a) +{ + return (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vcreate_u16 (uint64_t __a) +{ + return (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcreate_u32 (uint64_t __a) +{ + return (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vcreate_u64 (uint64_t __a) +{ + return (uint64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vcreate_p8 (uint64_t __a) +{ + return (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vcreate_p16 (uint64_t __a) +{ + return (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_n_s8 (int8_t __a) +{ + return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_n_s16 (int16_t __a) +{ + return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_n_s32 (int32_t __a) +{ + return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_n_f32 (float32_t __a) +{ + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_n_u8 (uint8_t __a) +{ + return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_n_u16 (uint16_t __a) +{ + return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_n_u32 (uint32_t __a) +{ + return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_n_p8 (poly8_t __a) +{ + return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_n_p16 (poly16_t __a) +{ + return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vdup_n_p64 (poly64_t __a) +{ + return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_n_s64 (int64_t __a) +{ + return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_n_u64 (uint64_t __a) +{ + return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vdupq_n_p64 (poly64_t __a) +{ + return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_n_s8 (int8_t __a) +{ + return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_n_s16 (int16_t __a) +{ + return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_n_s32 (int32_t __a) +{ + return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_n_f32 (float32_t __a) +{ + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_n_u8 (uint8_t __a) +{ + return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_n_u16 (uint16_t __a) +{ + return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_n_u32 (uint32_t __a) +{ + return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_n_p8 (poly8_t __a) +{ + return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_n_p16 (poly16_t __a) +{ + return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_n_s64 (int64_t __a) +{ + return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_n_u64 (uint64_t __a) +{ + return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmov_n_s8 (int8_t __a) +{ + return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmov_n_s16 (int16_t __a) +{ + return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmov_n_s32 (int32_t __a) +{ + return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmov_n_f32 (float32_t __a) +{ + return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmov_n_u8 (uint8_t __a) +{ + return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmov_n_u16 (uint16_t __a) +{ + return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmov_n_u32 (uint32_t __a) +{ + return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmov_n_p8 (poly8_t __a) +{ + return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vmov_n_p16 (poly16_t __a) +{ + return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vmov_n_s64 (int64_t __a) +{ + return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vmov_n_u64 (uint64_t __a) +{ + return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmovq_n_s8 (int8_t __a) +{ + return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovq_n_s16 (int16_t __a) +{ + return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovq_n_s32 (int32_t __a) +{ + return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmovq_n_f32 (float32_t __a) +{ + return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmovq_n_u8 (uint8_t __a) +{ + return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovq_n_u16 (uint16_t __a) +{ + return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovq_n_u32 (uint32_t __a) +{ + return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmovq_n_p8 (poly8_t __a) +{ + return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vmovq_n_p16 (poly16_t __a) +{ + return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovq_n_s64 (int64_t __a) +{ + return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovq_n_u64 (uint64_t __a) +{ + return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vdup_lane_s8 (int8x8_t __a, const int __b) +{ + return (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vdup_lane_s16 (int16x4_t __a, const int __b) +{ + return (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vdup_lane_s32 (int32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vdup_lane_f32 (float32x2_t __a, const int __b) +{ + return (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vdup_lane_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vdup_lane_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vdup_lane_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vdup_lane_p8 (poly8x8_t __a, const int __b) +{ + return (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vdup_lane_p16 (poly16x4_t __a, const int __b) +{ + return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vdup_lane_p64 (poly64x1_t __a, const int __b) +{ + return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vdup_lane_s64 (int64x1_t __a, const int __b) +{ + return (int64x1_t)__builtin_neon_vdup_lanedi (__a, __b); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vdup_lane_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x1_t)__builtin_neon_vdup_lanedi ((int64x1_t) __a, __b); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_s8 (int8x8_t __a, const int __b) +{ + return (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_s16 (int16x4_t __a, const int __b) +{ + return (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_s32 (int32x2_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_f32 (float32x2_t __a, const int __b) +{ + return (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_u8 (uint8x8_t __a, const int __b) +{ + return (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_u16 (uint16x4_t __a, const int __b) +{ + return (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vdupq_lane_u32 (uint32x2_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vdupq_lane_p8 (poly8x8_t __a, const int __b) +{ + return (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_p16 (poly16x4_t __a, const int __b) +{ + return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_p64 (poly64x1_t __a, const int __b) +{ + return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_s64 (int64x1_t __a, const int __b) +{ + return (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vdupq_lane_u64 (uint64x1_t __a, const int __b) +{ + return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vcombine_p64 (poly64x1_t __a, poly64x1_t __b) +{ + return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vcombine_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vcombine_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcombine_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x4_t)__builtin_neon_vcombinev2si (__a, __b); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vcombine_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x2_t)__builtin_neon_vcombinedi (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcombine_f32 (float32x2_t __a, float32x2_t __b) +{ + return (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vcombine_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vcombine_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcombine_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vcombine_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x2_t)__builtin_neon_vcombinedi ((int64x1_t) __a, (int64x1_t) __b); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vcombine_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vcombine_p16 (poly16x4_t __a, poly16x4_t __b) +{ + return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vget_high_p64 (poly64x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t __a) +{ + return (int8x8_t)__builtin_neon_vget_highv16qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t __a) +{ + return (int16x4_t)__builtin_neon_vget_highv8hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t __a) +{ + return (int32x2_t)__builtin_neon_vget_highv4si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t __a) +{ + return (int64x1_t)__builtin_neon_vget_highv2di (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t __a) +{ + return (float32x2_t)__builtin_neon_vget_highv4sf (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t __a) +{ + return (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t __a) +{ + return (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_low_s8 (int8x16_t __a) +{ + return (int8x8_t)__builtin_neon_vget_lowv16qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_low_s16 (int16x8_t __a) +{ + return (int16x4_t)__builtin_neon_vget_lowv8hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_low_s32 (int32x4_t __a) +{ + return (int32x2_t)__builtin_neon_vget_lowv4si (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_low_f32 (float32x4_t __a) +{ + return (float32x2_t)__builtin_neon_vget_lowv4sf (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_low_u8 (uint8x16_t __a) +{ + return (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_low_u16 (uint16x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_low_u32 (uint32x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_low_p8 (poly8x16_t __a) +{ + return (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_low_p16 (poly16x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vget_low_p64 (poly64x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_low_s64 (int64x2_t __a) +{ + return (int64x1_t)__builtin_neon_vget_lowv2di (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_low_u64 (uint64x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_s32_f32 (float32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_s32 (int32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vcvtv2si (__a, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_f32_u32 (uint32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vcvtv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvt_u32_f32 (float32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_s32_f32 (float32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_s32 (int32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4si (__a, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_f32_u32 (uint32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_u32_f32 (float32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0); +} + +#if ((__ARM_FP & 0x2) != 0) +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vcvt_f16_f32 (float32x4_t __a) +{ + return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a); +} + +#endif +#if ((__ARM_FP & 0x2) != 0) +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvt_f32_f16 (float16x4_t __a) +{ + return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a); +} + +#endif +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vcvt_n_s32_f32 (float32x2_t __a, const int __b) +{ + return (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_n_f32_s32 (int32x2_t __a, const int __b) +{ + return (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vcvt_n_f32_u32 (uint32x2_t __a, const int __b) +{ + return (float32x2_t)__builtin_neon_vcvt_nv2si ((int32x2_t) __a, __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vcvt_n_u32_f32 (float32x2_t __a, const int __b) +{ + return (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_s32_f32 (float32x4_t __a, const int __b) +{ + return (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_f32_s32 (int32x4_t __a, const int __b) +{ + return (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_f32_u32 (uint32x4_t __a, const int __b) +{ + return (float32x4_t)__builtin_neon_vcvt_nv4si ((int32x4_t) __a, __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vcvtq_n_u32_f32 (float32x4_t __a, const int __b) +{ + return (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmovn_s16 (int16x8_t __a) +{ + return (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmovn_s32 (int32x4_t __a) +{ + return (int16x4_t)__builtin_neon_vmovnv4si (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmovn_s64 (int64x2_t __a) +{ + return (int32x2_t)__builtin_neon_vmovnv2di (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmovn_u16 (uint16x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmovn_u32 (uint32x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmovn_u64 (uint64x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vqmovn_s16 (int16x8_t __a) +{ + return (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqmovn_s32 (int32x4_t __a) +{ + return (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqmovn_s64 (int64x2_t __a) +{ + return (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqmovn_u16 (uint16x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vqmovnv8hi ((int16x8_t) __a, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqmovn_u32 (uint32x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vqmovnv4si ((int32x4_t) __a, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqmovn_u64 (uint64x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vqmovnv2di ((int64x2_t) __a, 0); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vqmovun_s16 (int16x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vqmovun_s32 (int32x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vqmovun_s64 (int64x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmovl_s8 (int8x8_t __a) +{ + return (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmovl_s16 (int16x4_t __a) +{ + return (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmovl_s32 (int32x2_t __a) +{ + return (int64x2_t)__builtin_neon_vmovlv2si (__a, 1); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmovl_u8 (uint8x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vmovlv8qi ((int8x8_t) __a, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmovl_u16 (uint16x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vmovlv4hi ((int16x4_t) __a, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmovl_u32 (uint32x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vmovlv2si ((int32x2_t) __a, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl1_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl1_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl1_p8 (poly8x8_t __a, uint8x8_t __b) +{ + return (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl2_s8 (int8x8x2_t __a, int8x8_t __b) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; + return (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; + return (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; + return (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl3_s8 (int8x8x3_t __a, int8x8_t __b) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; + return (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; + return (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; + return (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbl4_s8 (int8x8x4_t __a, int8x8_t __b) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; + return (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; + return (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; + return (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) +{ + return (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + return (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + return (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + return (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + return (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + return (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + return (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + return (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + return (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + return (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d) +{ + return (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d) +{ + return (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmlal_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint64x2_t)__builtin_neon_vmlal_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d) +{ + return (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d) +{ + return (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d) +{ + return (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) +{ + return (uint32x4_t)__builtin_neon_vmlsl_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) +{ + return (uint64x2_t)__builtin_neon_vmlsl_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vmull_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vmull_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmul_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmul_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmul_n_f32 (float32x2_t __a, float32_t __b) +{ + return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmul_n_u16 (uint16x4_t __a, uint16_t __b) +{ + return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmul_n_u32 (uint32x2_t __a, uint32_t __b) +{ + return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmulq_n_s16 (int16x8_t __a, int16_t __b) +{ + return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmulq_n_s32 (int32x4_t __a, int32_t __b) +{ + return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulq_n_f32 (float32x4_t __a, float32_t __b) +{ + return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmulq_n_u16 (uint16x8_t __a, uint16_t __b) +{ + return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmulq_n_u32 (uint32x4_t __a, uint32_t __b) +{ + return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmull_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int32x4_t)__builtin_neon_vmull_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmull_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int64x2_t)__builtin_neon_vmull_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmull_n_u16 (uint16x4_t __a, uint16_t __b) +{ + return (uint32x4_t)__builtin_neon_vmull_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmull_n_u32 (uint32x2_t __a, uint32_t __b) +{ + return (uint64x2_t)__builtin_neon_vmull_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmull_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmull_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqdmulh_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqdmulh_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) +{ + return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 5); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) +{ + return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) +{ + return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 5); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) +{ + return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 5); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) +{ + return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) +{ + return (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) +{ + return (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) +{ + return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) +{ + return (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) +{ + return (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlal_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlal_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) +{ + return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) +{ + return (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) +{ + return (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) +{ + return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c, 3); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) +{ + return (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) +{ + return (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) +{ + return (uint32x4_t)__builtin_neon_vmlsl_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) +{ + return (uint64x2_t)__builtin_neon_vmlsl_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) +{ + return (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) +{ + return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vext_s8 (int8x8_t __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vext_s16 (int16x4_t __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vext_s32 (int32x2_t __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vext_s64 (int64x1_t __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vextdi (__a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vext_f32 (float32x2_t __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vextdi ((int64x1_t) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev64_s8 (int8x8_t __a) +{ + return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev64_s16 (int16x4_t __a) +{ + return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrev64_s32 (int32x2_t __a) +{ + return (int32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrev64_f32 (float32x2_t __a) +{ + return (float32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev64_u8 (uint8x8_t __a) +{ + return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev64_u16 (uint16x4_t __a) +{ + return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrev64_u32 (uint32x2_t __a) +{ + return (uint32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev64_p8 (poly8x8_t __a) +{ + return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev64_p16 (poly16x4_t __a) +{ + return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev64q_s8 (int8x16_t __a) +{ + return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev64q_s16 (int16x8_t __a) +{ + return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrev64q_s32 (int32x4_t __a) +{ + return (int32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrev64q_f32 (float32x4_t __a) +{ + return (float32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev64q_u8 (uint8x16_t __a) +{ + return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev64q_u16 (uint16x8_t __a) +{ + return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrev64q_u32 (uint32x4_t __a) +{ + return (uint32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev64q_p8 (poly8x16_t __a) +{ + return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev64q_p16 (poly16x8_t __a) +{ + return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev32_s8 (int8x8_t __a) +{ + return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev32_s16 (int16x4_t __a) +{ + return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev32_u8 (uint8x8_t __a) +{ + return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev32_u16 (uint16x4_t __a) +{ + return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev32_p8 (poly8x8_t __a) +{ + return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev32_p16 (poly16x4_t __a) +{ + return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev32q_s8 (int8x16_t __a) +{ + return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev32q_s16 (int16x8_t __a) +{ + return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev32q_u8 (uint8x16_t __a) +{ + return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev32q_u16 (uint16x8_t __a) +{ + return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev32q_p8 (poly8x16_t __a) +{ + return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev32q_p16 (poly16x8_t __a) +{ + return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev16_s8 (int8x8_t __a) +{ + return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev16_u8 (uint8x8_t __a) +{ + return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev16_p8 (poly8x8_t __a) +{ + return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev16q_s8 (int8x16_t __a) +{ + return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev16q_u8 (uint8x16_t __a) +{ + return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev16q_p8 (poly8x16_t __a) +{ + return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) +{ + return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) +{ + return (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) +{ + return (int64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) +{ + return (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) +{ + return (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) +{ + return (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) +{ + return (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) +{ + return (uint64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) +{ + return (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) +{ + return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) +{ + return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) +{ + return (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) +{ + return (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) +{ + return (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) +{ + return (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) +{ + return (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) +{ + return (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) +{ + return (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) +{ + return (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) +{ + return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vtrn_s8 (int8x8_t __a, int8x8_t __b) +{ + int8x8x2_t __rv; + __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); + return __rv; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vtrn_s16 (int16x4_t __a, int16x4_t __b) +{ + int16x4x2_t __rv; + __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vtrn_u8 (uint8x8_t __a, uint8x8_t __b) +{ + uint8x8x2_t __rv; + __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); + return __rv; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vtrn_u16 (uint16x4_t __a, uint16x4_t __b) +{ + uint16x4x2_t __rv; + __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vtrn_p8 (poly8x8_t __a, poly8x8_t __b) +{ + poly8x8x2_t __rv; + __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); + return __rv; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vtrn_p16 (poly16x4_t __a, poly16x4_t __b) +{ + poly16x4x2_t __rv; + __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vtrn_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vtrn_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vtrn_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_s8 (int8x16_t __a, int8x16_t __b) +{ + int8x16x2_t __rv; + __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); + return __rv; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_s16 (int16x8_t __a, int16x8_t __b) +{ + int16x8x2_t __rv; + __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); + return __rv; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_s32 (int32x4_t __a, int32x4_t __b) +{ + int32x4x2_t __rv; + __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_f32 (float32x4_t __a, float32x4_t __b) +{ + float32x4x2_t __rv; + __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + uint8x16x2_t __rv; + __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); + return __rv; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + uint16x8x2_t __rv; + __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); + return __rv; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + uint32x4x2_t __rv; + __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 2, 6 }); + __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 5, 3, 7 }); + return __rv; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + poly8x16x2_t __rv; + __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 }); + __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31 }); + return __rv; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + poly16x8x2_t __rv; + __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 9, 3, 11, 5, 13, 7, 15 }); + return __rv; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vzip_s8 (int8x8_t __a, int8x8_t __b) +{ + int8x8x2_t __rv; + __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); + return __rv; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vzip_s16 (int16x4_t __a, int16x4_t __b) +{ + int16x4x2_t __rv; + __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vzip_u8 (uint8x8_t __a, uint8x8_t __b) +{ + uint8x8x2_t __rv; + __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); + return __rv; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vzip_u16 (uint16x4_t __a, uint16x4_t __b) +{ + uint16x4x2_t __rv; + __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vzip_p8 (poly8x8_t __a, poly8x8_t __b) +{ + poly8x8x2_t __rv; + __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); + return __rv; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vzip_p16 (poly16x4_t __a, poly16x4_t __b) +{ + poly16x4x2_t __rv; + __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vzip_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vzip_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vzip_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vzipq_s8 (int8x16_t __a, int8x16_t __b) +{ + int8x16x2_t __rv; + __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); + return __rv; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vzipq_s16 (int16x8_t __a, int16x8_t __b) +{ + int16x8x2_t __rv; + __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); + return __rv; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vzipq_s32 (int32x4_t __a, int32x4_t __b) +{ + int32x4x2_t __rv; + __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vzipq_f32 (float32x4_t __a, float32x4_t __b) +{ + float32x4x2_t __rv; + __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vzipq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + uint8x16x2_t __rv; + __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); + return __rv; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vzipq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + uint16x8x2_t __rv; + __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); + return __rv; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vzipq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + uint32x4x2_t __rv; + __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 }); + __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 }); + return __rv; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vzipq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + poly8x16x2_t __rv; + __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); + __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); + return __rv; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vzipq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + poly16x8x2_t __rv; + __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 4, 12, 5, 13, 6, 14, 7, 15 }); + return __rv; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vuzp_s8 (int8x8_t __a, int8x8_t __b) +{ + int8x8x2_t __rv; + __rv.val[0] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (int8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); + return __rv; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vuzp_s16 (int16x4_t __a, int16x4_t __b) +{ + int16x4x2_t __rv; + __rv.val[0] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (int16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); + return __rv; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vuzp_s32 (int32x2_t __a, int32x2_t __b) +{ + int32x2x2_t __rv; + __rv.val[0] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (int32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vuzp_f32 (float32x2_t __a, float32x2_t __b) +{ + float32x2x2_t __rv; + __rv.val[0] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (float32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vuzp_u8 (uint8x8_t __a, uint8x8_t __b) +{ + uint8x8x2_t __rv; + __rv.val[0] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (uint8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); + return __rv; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vuzp_u16 (uint16x4_t __a, uint16x4_t __b) +{ + uint16x4x2_t __rv; + __rv.val[0] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (uint16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); + return __rv; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vuzp_u32 (uint32x2_t __a, uint32x2_t __b) +{ + uint32x2x2_t __rv; + __rv.val[0] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 0, 2 }); + __rv.val[1] = (uint32x2_t) __builtin_shuffle (__a, __b, (uint32x2_t) { 1, 3 }); + return __rv; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vuzp_p8 (poly8x8_t __a, poly8x8_t __b) +{ + poly8x8x2_t __rv; + __rv.val[0] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (poly8x8_t) __builtin_shuffle (__a, __b, (uint8x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); + return __rv; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vuzp_p16 (poly16x4_t __a, poly16x4_t __b) +{ + poly16x4x2_t __rv; + __rv.val[0] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (poly16x4_t) __builtin_shuffle (__a, __b, (uint16x4_t) { 1, 3, 5, 7 }); + return __rv; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vuzpq_s8 (int8x16_t __a, int8x16_t __b) +{ + int8x16x2_t __rv; + __rv.val[0] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = (int8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); + return __rv; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_s16 (int16x8_t __a, int16x8_t __b) +{ + int16x8x2_t __rv; + __rv.val[0] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (int16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); + return __rv; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vuzpq_s32 (int32x4_t __a, int32x4_t __b) +{ + int32x4x2_t __rv; + __rv.val[0] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (int32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); + return __rv; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vuzpq_f32 (float32x4_t __a, float32x4_t __b) +{ + float32x4x2_t __rv; + __rv.val[0] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (float32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); + return __rv; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + uint8x16x2_t __rv; + __rv.val[0] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = (uint8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); + return __rv; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + uint16x8x2_t __rv; + __rv.val[0] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (uint16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); + return __rv; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + uint32x4x2_t __rv; + __rv.val[0] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 }); + __rv.val[1] = (uint32x4_t) __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 }); + return __rv; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + poly8x16x2_t __rv; + __rv.val[0] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }); + __rv.val[1] = (poly8x16_t) __builtin_shuffle (__a, __b, (uint8x16_t) { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); + return __rv; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + poly16x8x2_t __rv; + __rv.val[0] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = (poly16x8_t) __builtin_shuffle (__a, __b, (uint16x8_t) { 1, 3, 5, 7, 9, 11, 13, 15 }); + return __rv; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vld1_p64 (const poly64_t * __a) +{ + return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_s8 (const int8_t * __a) +{ + return (int8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_s16 (const int16_t * __a) +{ + return (int16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_s32 (const int32_t * __a) +{ + return (int32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_s64 (const int64_t * __a) +{ + return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_f32 (const float32_t * __a) +{ + return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_u8 (const uint8_t * __a) +{ + return (uint8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_u16 (const uint16_t * __a) +{ + return (uint16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_u32 (const uint32_t * __a) +{ + return (uint32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_u64 (const uint64_t * __a) +{ + return (uint64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_p8 (const poly8_t * __a) +{ + return (poly8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_p16 (const poly16_t * __a) +{ + return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vld1q_p64 (const poly64_t * __a) +{ + return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_s8 (const int8_t * __a) +{ + return (int8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_s16 (const int16_t * __a) +{ + return (int16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_s32 (const int32_t * __a) +{ + return (int32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_s64 (const int64_t * __a) +{ + return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_f32 (const float32_t * __a) +{ + return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_u8 (const uint8_t * __a) +{ + return (uint8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_u16 (const uint16_t * __a) +{ + return (uint16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_u32 (const uint32_t * __a) +{ + return (uint32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_u64 (const uint64_t * __a) +{ + return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_p8 (const poly8_t * __a) +{ + return (poly8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_p16 (const poly16_t * __a) +{ + return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) +{ + return (int8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_lane_s16 (const int16_t * __a, int16x4_t __b, const int __c) +{ + return (int16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c) +{ + return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) +{ + return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c) +{ + return (uint8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c) +{ + return (uint16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c) +{ + return (uint32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c) +{ + return (poly8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c) +{ + return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c) +{ + return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c) +{ + return (int64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c) +{ + return (uint64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c) +{ + return (int8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c) +{ + return (int16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c) +{ + return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) +{ + return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c) +{ + return (uint8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c) +{ + return (uint16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c) +{ + return (uint32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c) +{ + return (poly8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c) +{ + return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c) +{ + return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c) +{ + return (int64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c) +{ + return (uint64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vld1_dup_s8 (const int8_t * __a) +{ + return (int8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vld1_dup_s16 (const int16_t * __a) +{ + return (int16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vld1_dup_s32 (const int32_t * __a) +{ + return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vld1_dup_f32 (const float32_t * __a) +{ + return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vld1_dup_u8 (const uint8_t * __a) +{ + return (uint8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vld1_dup_u16 (const uint16_t * __a) +{ + return (uint16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vld1_dup_u32 (const uint32_t * __a) +{ + return (uint32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vld1_dup_p8 (const poly8_t * __a) +{ + return (poly8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vld1_dup_p16 (const poly16_t * __a) +{ + return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vld1_dup_p64 (const poly64_t * __a) +{ + return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vld1_dup_s64 (const int64_t * __a) +{ + return (int64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vld1_dup_u64 (const uint64_t * __a) +{ + return (uint64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_s8 (const int8_t * __a) +{ + return (int8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_s16 (const int16_t * __a) +{ + return (int16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_s32 (const int32_t * __a) +{ + return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_f32 (const float32_t * __a) +{ + return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_u8 (const uint8_t * __a) +{ + return (uint8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_u16 (const uint16_t * __a) +{ + return (uint16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vld1q_dup_u32 (const uint32_t * __a) +{ + return (uint32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vld1q_dup_p8 (const poly8_t * __a) +{ + return (poly8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_p16 (const poly16_t * __a) +{ + return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_p64 (const poly64_t * __a) +{ + return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_s64 (const int64_t * __a) +{ + return (int64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vld1q_dup_u64 (const uint64_t * __a) +{ + return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p64 (poly64_t * __a, poly64x1_t __b) +{ + __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s8 (int8_t * __a, int8x8_t __b) +{ + __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s16 (int16_t * __a, int16x4_t __b) +{ + __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s32 (int32_t * __a, int32x2_t __b) +{ + __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_s64 (int64_t * __a, int64x1_t __b) +{ + __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f32 (float32_t * __a, float32x2_t __b) +{ + __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u8 (uint8_t * __a, uint8x8_t __b) +{ + __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u16 (uint16_t * __a, uint16x4_t __b) +{ + __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u32 (uint32_t * __a, uint32x2_t __b) +{ + __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, (int32x2_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_u64 (uint64_t * __a, uint64x1_t __b) +{ + __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p8 (poly8_t * __a, poly8x8_t __b) +{ + __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_p16 (poly16_t * __a, poly16x4_t __b) +{ + __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p64 (poly64_t * __a, poly64x2_t __b) +{ + __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s8 (int8_t * __a, int8x16_t __b) +{ + __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s16 (int16_t * __a, int16x8_t __b) +{ + __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s32 (int32_t * __a, int32x4_t __b) +{ + __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_s64 (int64_t * __a, int64x2_t __b) +{ + __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f32 (float32_t * __a, float32x4_t __b) +{ + __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u8 (uint8_t * __a, uint8x16_t __b) +{ + __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u16 (uint16_t * __a, uint16x8_t __b) +{ + __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u32 (uint32_t * __a, uint32x4_t __b) +{ + __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, (int32x4_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_u64 (uint64_t * __a, uint64x2_t __b) +{ + __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p8 (poly8_t * __a, poly8x16_t __b) +{ + __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_p16 (poly16_t * __a, poly16x8_t __b) +{ + __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, (int32x2_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c) +{ + __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c) +{ + __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c) +{ + __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, (int64x1_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c) +{ + __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c) +{ + __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, (int32x4_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c) +{ + __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c) +{ + __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_s8 (const int8_t * __a) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_s16 (const int16_t * __a) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_s32 (const int32_t * __a) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_f32 (const float32_t * __a) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_u8 (const uint8_t * __a) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_u16 (const uint16_t * __a) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_u32 (const uint32_t * __a) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_p8 (const poly8_t * __a) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_p16 (const poly16_t * __a) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) +vld2_p64 (const poly64_t * __a) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif +__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) +vld2_s64 (const int64_t * __a) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) +vld2_u64 (const uint64_t * __a) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) +vld2q_s8 (const int8_t * __a) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vld2q_s16 (const int16_t * __a) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vld2q_s32 (const int32_t * __a) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vld2q_f32 (const float32_t * __a) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) +vld2q_u8 (const uint8_t * __a) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vld2q_u16 (const uint16_t * __a) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vld2q_u32 (const uint32_t * __a) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) +vld2q_p8 (const poly8_t * __a) +{ + union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vld2q_p16 (const poly16_t * __a) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) +vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) +vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) +vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) +vld2_dup_s8 (const int8_t * __a) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_s16 (const int16_t * __a) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) +vld2_dup_s32 (const int32_t * __a) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) +vld2_dup_f32 (const float32_t * __a) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) +vld2_dup_u8 (const uint8_t * __a) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_u16 (const uint16_t * __a) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) +vld2_dup_u32 (const uint32_t * __a) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) +vld2_dup_p8 (const poly8_t * __a) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_p16 (const poly16_t * __a) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) +vld2_dup_p64 (const poly64_t * __a) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif +__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) +vld2_dup_s64 (const int64_t * __a) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) +vld2_dup_u64 (const uint64_t * __a) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s8 (int8_t * __a, int8x8x2_t __b) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s16 (int16_t * __a, int16x4x2_t __b) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s32 (int32_t * __a, int32x2x2_t __b) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_f32 (float32_t * __a, float32x2x2_t __b) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u8 (uint8_t * __a, uint8x8x2_t __b) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u16 (uint16_t * __a, uint16x4x2_t __b) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u32 (uint32_t * __a, uint32x2x2_t __b) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p8 (poly8_t * __a, poly8x8x2_t __b) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p16 (poly16_t * __a, poly16x4x2_t __b) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_p64 (poly64_t * __a, poly64x1x2_t __b) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_s64 (int64_t * __a, int64x1x2_t __b) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_u64 (uint64_t * __a, uint64x1x2_t __b) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s8 (int8_t * __a, int8x16x2_t __b) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s16 (int16_t * __a, int16x8x2_t __b) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_s32 (int32_t * __a, int32x4x2_t __b) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_f32 (float32_t * __a, float32x4x2_t __b) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u8 (uint8_t * __a, uint8x16x2_t __b) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u16 (uint16_t * __a, uint16x8x2_t __b) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_u32 (uint32_t * __a, uint32x4x2_t __b) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_p8 (poly8_t * __a, poly8x16x2_t __b) +{ + union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_p16 (poly16_t * __a, poly16x8x2_t __b) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_s8 (const int8_t * __a) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_s16 (const int16_t * __a) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_s32 (const int32_t * __a) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_f32 (const float32_t * __a) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_u8 (const uint8_t * __a) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_u16 (const uint16_t * __a) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_u32 (const uint32_t * __a) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_p8 (const poly8_t * __a) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_p16 (const poly16_t * __a) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) +vld3_p64 (const poly64_t * __a) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif +__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) +vld3_s64 (const int64_t * __a) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) +vld3_u64 (const uint64_t * __a) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) +vld3q_s8 (const int8_t * __a) +{ + union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) +vld3q_s16 (const int16_t * __a) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) +vld3q_s32 (const int32_t * __a) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) +vld3q_f32 (const float32_t * __a) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) +vld3q_u8 (const uint8_t * __a) +{ + union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) +vld3q_u16 (const uint16_t * __a) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) +vld3q_u32 (const uint32_t * __a) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) +vld3q_p8 (const poly8_t * __a) +{ + union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) +vld3q_p16 (const poly16_t * __a) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) +vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) +vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) +vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) +vld3_dup_s8 (const int8_t * __a) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_s16 (const int16_t * __a) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) +vld3_dup_s32 (const int32_t * __a) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) +vld3_dup_f32 (const float32_t * __a) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) +vld3_dup_u8 (const uint8_t * __a) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_u16 (const uint16_t * __a) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) +vld3_dup_u32 (const uint32_t * __a) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) +vld3_dup_p8 (const poly8_t * __a) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_p16 (const poly16_t * __a) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) +vld3_dup_p64 (const poly64_t * __a) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif +__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) +vld3_dup_s64 (const int64_t * __a) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) +vld3_dup_u64 (const uint64_t * __a) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s8 (int8_t * __a, int8x8x3_t __b) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s16 (int16_t * __a, int16x4x3_t __b) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s32 (int32_t * __a, int32x2x3_t __b) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_f32 (float32_t * __a, float32x2x3_t __b) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u8 (uint8_t * __a, uint8x8x3_t __b) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u16 (uint16_t * __a, uint16x4x3_t __b) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u32 (uint32_t * __a, uint32x2x3_t __b) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p8 (poly8_t * __a, poly8x8x3_t __b) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p16 (poly16_t * __a, poly16x4x3_t __b) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_p64 (poly64_t * __a, poly64x1x3_t __b) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_s64 (int64_t * __a, int64x1x3_t __b) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_u64 (uint64_t * __a, uint64x1x3_t __b) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s8 (int8_t * __a, int8x16x3_t __b) +{ + union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s16 (int16_t * __a, int16x8x3_t __b) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_s32 (int32_t * __a, int32x4x3_t __b) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_f32 (float32_t * __a, float32x4x3_t __b) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u8 (uint8_t * __a, uint8x16x3_t __b) +{ + union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u16 (uint16_t * __a, uint16x8x3_t __b) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_u32 (uint32_t * __a, uint32x4x3_t __b) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_p8 (poly8_t * __a, poly8x16x3_t __b) +{ + union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_p16 (poly16_t * __a, poly16x8x3_t __b) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_s8 (const int8_t * __a) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_s16 (const int16_t * __a) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_s32 (const int32_t * __a) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_f32 (const float32_t * __a) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_u8 (const uint8_t * __a) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_u16 (const uint16_t * __a) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_u32 (const uint32_t * __a) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_p8 (const poly8_t * __a) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_p16 (const poly16_t * __a) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) +vld4_p64 (const poly64_t * __a) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif +__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) +vld4_s64 (const int64_t * __a) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) +vld4_u64 (const uint64_t * __a) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) +vld4q_s8 (const int8_t * __a) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) +vld4q_s16 (const int16_t * __a) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) +vld4q_s32 (const int32_t * __a) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) +vld4q_f32 (const float32_t * __a) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) +vld4q_u8 (const uint8_t * __a) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) +vld4q_u16 (const uint16_t * __a) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) +vld4q_u32 (const uint32_t * __a) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) +vld4q_p8 (const poly8_t * __a) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) +vld4q_p16 (const poly16_t * __a) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev2sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev2si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8qi ((const __builtin_neon_qi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) +vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) +vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4sf ((const __builtin_neon_sf *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) +vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4si ((const __builtin_neon_si *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hi ((const __builtin_neon_hi *) __a, __bu.__o, __c); + return __rv.__i; +} + +__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) +vld4_dup_s8 (const int8_t * __a) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_s16 (const int16_t * __a) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) +vld4_dup_s32 (const int32_t * __a) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) +vld4_dup_f32 (const float32_t * __a) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + +__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) +vld4_dup_u8 (const uint8_t * __a) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_u16 (const uint16_t * __a) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) +vld4_dup_u32 (const uint32_t * __a) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) +vld4_dup_p8 (const poly8_t * __a) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_p16 (const poly16_t * __a) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) +vld4_dup_p64 (const poly64_t * __a) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +#endif +__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) +vld4_dup_s64 (const int64_t * __a) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) +vld4_dup_u64 (const uint64_t * __a) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupdi ((const __builtin_neon_di *) __a); + return __rv.__i; +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s8 (int8_t * __a, int8x8x4_t __b) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s16 (int16_t * __a, int16x4x4_t __b) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s32 (int32_t * __a, int32x2x4_t __b) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_f32 (float32_t * __a, float32x2x4_t __b) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u8 (uint8_t * __a, uint8x8x4_t __b) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u16 (uint16_t * __a, uint16x4x4_t __b) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u32 (uint32_t * __a, uint32x2x4_t __b) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p8 (poly8_t * __a, poly8x8x4_t __b) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p16 (poly16_t * __a, poly16x4x4_t __b) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_p64 (poly64_t * __a, poly64x1x4_t __b) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); +} + +#endif +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_s64 (int64_t * __a, int64x1x4_t __b) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_u64 (uint64_t * __a, uint64x1x4_t __b) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s8 (int8_t * __a, int8x16x4_t __b) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s16 (int16_t * __a, int16x8x4_t __b) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_s32 (int32_t * __a, int32x4x4_t __b) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_f32 (float32_t * __a, float32x4x4_t __b) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u8 (uint8_t * __a, uint8x16x4_t __b) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u16 (uint16_t * __a, uint16x8x4_t __b) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_u32 (uint32_t * __a, uint32x4x4_t __b) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_p8 (poly8_t * __a, poly8x16x4_t __b) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_p16 (poly16_t * __a, poly16x8x4_t __b) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vand_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vand_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vand_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vand_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vand_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vand_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vand_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vand_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vandq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vandq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vandq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vandq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vandq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vandq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vandq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vandq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vorr_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vorr_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vorr_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vorr_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vorr_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vorr_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vorr_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vorr_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vorrq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vorrq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vorrq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vorrq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vorrq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vorrq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vorrq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vorrq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +veor_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +veor_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +veor_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +veor_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +veor_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +veor_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +veor_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_veordi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +veor_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +veorq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +veorq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +veorq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +veorq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +veorq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +veorq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +veorq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +veorq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vbic_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vbic_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vbic_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vbic_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vbic_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vbic_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vbic_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vbic_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vbicq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vbicq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vbicq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vbicq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vbicq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vbicq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vbicq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vbicq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vorn_s8 (int8x8_t __a, int8x8_t __b) +{ + return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vorn_s16 (int16x4_t __a, int16x4_t __b) +{ + return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vorn_s32 (int32x2_t __a, int32x2_t __b) +{ + return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vorn_u8 (uint8x8_t __a, uint8x8_t __b) +{ + return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vorn_u16 (uint16x4_t __a, uint16x4_t __b) +{ + return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vorn_u32 (uint32x2_t __a, uint32x2_t __b) +{ + return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vorn_s64 (int64x1_t __a, int64x1_t __b) +{ + return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vorn_u64 (uint64x1_t __a, uint64x1_t __b) +{ + return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vornq_s8 (int8x16_t __a, int8x16_t __b) +{ + return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vornq_s16 (int16x8_t __a, int16x8_t __b) +{ + return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vornq_s32 (int32x4_t __a, int32x4_t __b) +{ + return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vornq_s64 (int64x2_t __a, int64x2_t __b) +{ + return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vornq_u8 (uint8x16_t __a, uint8x16_t __b) +{ + return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vornq_u16 (uint16x8_t __a, uint16x8_t __b) +{ + return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vornq_u32 (uint32x4_t __a, uint32x4_t __b) +{ + return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vornq_u64 (uint64x2_t __a, uint64x2_t __b) +{ + return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0); +} + + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_p16 (poly16x4_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_f32 (float32x2_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_p64 (poly64x1_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +#endif +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s64 (int64x1_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u64 (uint64x1_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s8 (int8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s16 (int16x4_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_s32 (int32x2_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u8 (uint8x8_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u16 (uint16x4_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_u32 (uint32x2_t __a) +{ + return (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_p8 (poly8x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_f32 (float32x2_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_p64 (poly64x1_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +#endif +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s64 (int64x1_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u64 (uint64x1_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s8 (int8x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s16 (int16x4_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_s32 (int32x2_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u8 (uint8x8_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u16 (uint16x4_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_u32 (uint32x2_t __a) +{ + return (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p8 (poly8x8_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p16 (poly16x4_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_p64 (poly64x1_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); +} + +#endif +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s64 (int64x1_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u64 (uint64x1_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfdi ((int64x1_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s8 (int8x8_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s16 (int16x4_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_s32 (int32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u8 (uint8x8_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u16 (uint16x4_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_u32 (uint32x2_t __a) +{ + return (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_p8 (poly8x8_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_p16 (poly16x4_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_f32 (float32x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s64 (int64x1_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdidi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u64 (uint64x1_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s8 (int8x8_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s16 (int16x4_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_s32 (int32x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv2si (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u8 (uint8x8_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u16 (uint16x4_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) +vreinterpret_p64_u32 (uint32x2_t __a) +{ + return (poly64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p8 (poly8x8_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p16 (poly16x4_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_f32 (float32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_p64 (poly64x1_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdidi (__a); +} + +#endif +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u64 (uint64x1_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdidi ((int64x1_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s8 (int8x8_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s16 (int16x4_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_s32 (int32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2si (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u8 (uint8x8_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u16 (uint16x4_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_u32 (uint32x2_t __a) +{ + return (int64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p8 (poly8x8_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p16 (poly16x4_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_f32 (float32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_p64 (poly64x1_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); +} + +#endif +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s64 (int64x1_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdidi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s8 (int8x8_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s16 (int16x4_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_s32 (int32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv2si (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u8 (uint8x8_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u16 (uint16x4_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_u32 (uint32x2_t __a) +{ + return (uint64x1_t)__builtin_neon_vreinterpretdiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p8 (poly8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p16 (poly16x4_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_f32 (float32x2_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_p64 (poly64x1_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +#endif +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s64 (int64x1_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u64 (uint64x1_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s16 (int16x4_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_s32 (int32x2_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u8 (uint8x8_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u16 (uint16x4_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_u32 (uint32x2_t __a) +{ + return (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p8 (poly8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p16 (poly16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_f32 (float32x2_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_p64 (poly64x1_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +#endif +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s64 (int64x1_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u64 (uint64x1_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s8 (int8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_s32 (int32x2_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u8 (uint8x8_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u16 (uint16x4_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_u32 (uint32x2_t __a) +{ + return (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p8 (poly8x8_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p16 (poly16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_f32 (float32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_p64 (poly64x1_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); +} + +#endif +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s64 (int64x1_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2sidi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u64 (uint64x1_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s8 (int8x8_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_s16 (int16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u8 (uint8x8_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u16 (uint16x4_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_u32 (uint32x2_t __a) +{ + return (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p8 (poly8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p16 (poly16x4_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_f32 (float32x2_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_p64 (poly64x1_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +#endif +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s64 (int64x1_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u64 (uint64x1_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qidi ((int64x1_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s8 (int8x8_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s16 (int16x4_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_s32 (int32x2_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u16 (uint16x4_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_u32 (uint32x2_t __a) +{ + return (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p8 (poly8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p16 (poly16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_f32 (float32x2_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_p64 (poly64x1_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +#endif +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s64 (int64x1_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u64 (uint64x1_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hidi ((int64x1_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s8 (int8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s16 (int16x4_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_s32 (int32x2_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u8 (uint8x8_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_u32 (uint32x2_t __a) +{ + return (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p8 (poly8x8_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p16 (poly16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_f32 (float32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_p64 (poly64x1_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); +} + +#endif +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s64 (int64x1_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u64 (uint64x1_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2sidi ((int64x1_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s8 (int8x8_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s16 (int16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_s32 (int32x2_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u8 (uint8x8_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_u16 (uint16x4_t __a) +{ + return (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p16 (poly16x8_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_f32 (float32x4_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p64 (poly64x2_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_p128 (poly128_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s64 (int64x2_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u64 (uint64x2_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s8 (int8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s16 (int16x8_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_s32 (int32x4_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u8 (uint8x16_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u16 (uint16x8_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_u32 (uint32x4_t __a) +{ + return (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p8 (poly8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_f32 (float32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p64 (poly64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_p128 (poly128_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s64 (int64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u64 (uint64x2_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s8 (int8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s16 (int16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_s32 (int32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u8 (uint8x16_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u16 (uint16x8_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_u32 (uint32x4_t __a) +{ + return (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p8 (poly8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p16 (poly16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p64 (poly64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_p128 (poly128_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s64 (int64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u64 (uint64x2_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s8 (int8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s16 (int16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_s32 (int32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u8 (uint8x16_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u16 (uint16x8_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_u32 (uint32x4_t __a) +{ + return (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_p8 (poly8x16_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_p16 (poly16x8_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_f32 (float32x4_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_p128 (poly128_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s64 (int64x2_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div2di (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u64 (uint64x2_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s8 (int8x16_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s16 (int16x8_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_s32 (int32x4_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u8 (uint8x16_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u16 (uint16x8_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_p64_u32 (uint32x4_t __a) +{ + return (poly64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_p8 (poly8x16_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_p16 (poly16x8_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_f32 (float32x4_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv4sf (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_p64 (poly64x2_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s64 (int64x2_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv2di (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u64 (uint64x2_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s8 (int8x16_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv16qi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s16 (int16x8_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv8hi (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_s32 (int32x4_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv4si (__a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u8 (uint8x16_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv16qi ((int8x16_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u16 (uint16x8_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv8hi ((int16x8_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vreinterpretq_p128_u32 (uint32x4_t __a) +{ + return (poly128_t)__builtin_neon_vreinterprettiv4si ((int32x4_t) __a); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p8 (poly8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p16 (poly16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_f32 (float32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p64 (poly64x2_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_p128 (poly128_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u64 (uint64x2_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s8 (int8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s16 (int16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_s32 (int32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u8 (uint8x16_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u16 (uint16x8_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_u32 (uint32x4_t __a) +{ + return (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p8 (poly8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p16 (poly16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_f32 (float32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p64 (poly64x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_p128 (poly128_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2diti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s64 (int64x2_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s8 (int8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s16 (int16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_s32 (int32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u8 (uint8x16_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u16 (uint16x8_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_u32 (uint32x4_t __a) +{ + return (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p8 (poly8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p16 (poly16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_f32 (float32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p64 (poly64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_p128 (poly128_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s64 (int64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u64 (uint64x2_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s16 (int16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_s32 (int32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u8 (uint8x16_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u16 (uint16x8_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_u32 (uint32x4_t __a) +{ + return (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p8 (poly8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p16 (poly16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_f32 (float32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p64 (poly64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_p128 (poly128_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s64 (int64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u64 (uint64x2_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s8 (int8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_s32 (int32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u8 (uint8x16_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u16 (uint16x8_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_u32 (uint32x4_t __a) +{ + return (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p8 (poly8x16_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p16 (poly16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_f32 (float32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p64 (poly64x2_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_p128 (poly128_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s64 (int64x2_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u64 (uint64x2_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s8 (int8x16_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_s16 (int16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u8 (uint8x16_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u16 (uint16x8_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_u32 (uint32x4_t __a) +{ + return (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p8 (poly8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p16 (poly16x8_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_f32 (float32x4_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p64 (poly64x2_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_p128 (poly128_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s64 (int64x2_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u64 (uint64x2_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s8 (int8x16_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s16 (int16x8_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_s32 (int32x4_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u16 (uint16x8_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_u32 (uint32x4_t __a) +{ + return (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p8 (poly8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p16 (poly16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_f32 (float32x4_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p64 (poly64x2_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_p128 (poly128_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s64 (int64x2_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u64 (uint64x2_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s8 (int8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s16 (int16x8_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_s32 (int32x4_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u8 (uint8x16_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_u32 (uint32x4_t __a) +{ + return (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p8 (poly8x16_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p16 (poly16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_f32 (float32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a); +} + +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p64 (poly64x2_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); +} + +#endif +#ifdef __ARM_FEATURE_CRYPTO +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_p128 (poly128_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siti ((__builtin_neon_ti) __a); +} + +#endif +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s64 (int64x2_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u64 (uint64x2_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s8 (int8x16_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s16 (int16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_s32 (int32x4_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u8 (uint8x16_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_u16 (uint16x8_t __a) +{ + return (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a); +} + + +#ifdef __ARM_FEATURE_CRYPTO + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vldrq_p128 (poly128_t const * __ptr) +{ +#ifdef __ARM_BIG_ENDIAN + poly64_t* __ptmp = (poly64_t*) __ptr; + poly64_t __d0 = vld1_p64 (__ptmp); + poly64_t __d1 = vld1_p64 (__ptmp + 1); + return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0)); +#else + return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr)); +#endif +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vstrq_p128 (poly128_t * __ptr, poly128_t __val) +{ +#ifdef __ARM_BIG_ENDIAN + poly64x2_t __tmp = vreinterpretq_p64_p128 (__val); + poly64_t __d0 = vget_high_p64 (__tmp); + poly64_t __d1 = vget_low_p64 (__tmp); + vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1)); +#else + vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val)); +#endif +} + +/* The vceq_p64 intrinsic does not map to a single instruction. + Instead we emulate it by performing a 32-bit variant of the vceq + and applying a pairwise min reduction to the result. + vceq_u32 will produce two 32-bit halves, each of which will contain either + all ones or all zeros depending on whether the corresponding 32-bit + halves of the poly64_t were equal. The whole poly64_t values are equal + if and only if both halves are equal, i.e. vceq_u32 returns all ones. + If the result is all zeroes for any half then the whole result is zeroes. + This is what the pairwise min reduction achieves. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_p64 (poly64x1_t __a, poly64x1_t __b) +{ + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vceq_u32 (__t_a, __t_b); + uint32x2_t __m = vpmin_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); +} + +/* The vtst_p64 intrinsic does not map to a single instruction. + We emulate it in way similar to vceq_p64 above but here we do + a reduction with max since if any two corresponding bits + in the two poly64_t's match, then the whole result must be all ones. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_p64 (poly64x1_t __a, poly64x1_t __b) +{ + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vtst_u32 (__t_a, __t_b); + uint32x2_t __m = vpmax_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aese (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aesd (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesmcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesmc (__data); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesimcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesimc (__data); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vsha1h_u32 (uint32_t __hash_e) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + __t = __builtin_arm_crypto_sha1h (__t); + return vgetq_lane_u32 (__t, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) +{ + return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) +{ + return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) +{ + return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) +{ + return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) +{ + return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) +{ + return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_p64 (poly64_t __a, poly64_t __b) +{ + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) +{ + poly64_t __t1 = vget_high_p64 (__a); + poly64_t __t2 = vget_high_p64 (__b); + + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); +} + +#endif +#ifdef __cplusplus +} +#endif +#endif +#endif diff --git a/gcc-4.9/gcc/config/arm/arm_neon_builtins.def b/gcc-4.9/gcc/config/arm/arm_neon_builtins.def new file mode 100644 index 000000000..a00951ab6 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/arm_neon_builtins.def @@ -0,0 +1,212 @@ +/* NEON builtin definitions for ARM. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +VAR10 (BINOP, vadd, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), +VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), +VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR3 (BINOP, vaddhn, v8hi, v4si, v2di), +VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si), +VAR2 (TERNOP, vfma, v2sf, v4sf), +VAR2 (TERNOP, vfms, v2sf, v4sf), +VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si), +VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si), +VAR2 (TERNOP, vqdmlal, v4hi, v2si), +VAR2 (TERNOP, vqdmlsl, v4hi, v2si), +VAR3 (BINOP, vmull, v8qi, v4hi, v2si), +VAR2 (SCALARMULL, vmull_n, v4hi, v2si), +VAR2 (LANEMULL, vmull_lane, v4hi, v2si), +VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si), +VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si), +VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si), +VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si), +VAR2 (BINOP, vqdmull, v4hi, v2si), +VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), +VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), +VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), +VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR3 (BINOP, vsubhn, v8hi, v4si, v2di), +VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR2 (BINOP, vcage, v2sf, v4sf), +VAR2 (BINOP, vcagt, v2sf, v4sf), +VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR3 (BINOP, vabdl, v8qi, v4hi, v2si), +VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR3 (TERNOP, vabal, v8qi, v4hi, v2si), +VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf), +VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf), +VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf), +VAR2 (BINOP, vrecps, v2sf, v4sf), +VAR2 (BINOP, vrsqrts, v2sf, v4sf), +VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR2 (UNOP, vcnt, v8qi, v16qi), +VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), +VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), +VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si), + /* FIXME: vget_lane supports more variants than this! */ +VAR10 (GETLANE, vget_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (SETLANE, vset_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di), +VAR10 (DUP, vdup_n, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (DUPLANE, vdup_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di), +VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di), +VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di), +VAR3 (UNOP, vmovn, v8hi, v4si, v2di), +VAR3 (UNOP, vqmovn, v8hi, v4si, v2di), +VAR3 (UNOP, vqmovun, v8hi, v4si, v2di), +VAR3 (UNOP, vmovl, v8qi, v4hi, v2si), +VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (LANEMAC, vmlal_lane, v4hi, v2si), +VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si), +VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si), +VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si), +VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (SCALARMAC, vmlal_n, v4hi, v2si), +VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si), +VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si), +VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si), +VAR10 (BINOP, vext, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi), +VAR2 (UNOP, vrev16, v8qi, v16qi), +VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf), +VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf), +VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf), +VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf), +VAR10 (SELECT, vbsl, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR2 (RINT, vrintn, v2sf, v4sf), +VAR2 (RINT, vrinta, v2sf, v4sf), +VAR2 (RINT, vrintp, v2sf, v4sf), +VAR2 (RINT, vrintm, v2sf, v4sf), +VAR2 (RINT, vrintz, v2sf, v4sf), +VAR2 (RINT, vrintx, v2sf, v4sf), +VAR1 (VTBL, vtbl1, v8qi), +VAR1 (VTBL, vtbl2, v8qi), +VAR1 (VTBL, vtbl3, v8qi), +VAR1 (VTBL, vtbl4, v8qi), +VAR1 (VTBX, vtbx1, v8qi), +VAR1 (VTBX, vtbx2, v8qi), +VAR1 (VTBX, vtbx3, v8qi), +VAR1 (VTBX, vtbx4, v8qi), +VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di), +VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di), +VAR6 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR6 (REINTERP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti), +VAR10 (LOAD1, vld1, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOAD1LANE, vld1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOAD1, vld1_dup, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (STORE1, vst1, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (STORE1LANE, vst1_lane, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR9 (LOADSTRUCT, + vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (LOADSTRUCTLANE, vld2_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di), +VAR9 (STORESTRUCT, vst2, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (STORESTRUCTLANE, vst2_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR9 (LOADSTRUCT, + vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (LOADSTRUCTLANE, vld3_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di), +VAR9 (STORESTRUCT, vst3, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (STORESTRUCTLANE, vst3_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR9 (LOADSTRUCT, vld4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (LOADSTRUCTLANE, vld4_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di), +VAR9 (STORESTRUCT, vst4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), +VAR7 (STORESTRUCTLANE, vst4_lane, + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +VAR10 (LOGICBINOP, vand, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOGICBINOP, vorr, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (BINOP, veor, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOGICBINOP, vbic, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR10 (LOGICBINOP, vorn, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) diff --git a/gcc-4.9/gcc/config/arm/bpabi.h b/gcc-4.9/gcc/config/arm/bpabi.h new file mode 100644 index 000000000..bc223f8e3 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/bpabi.h @@ -0,0 +1,163 @@ +/* Configuration file for ARM BPABI targets. + Copyright (C) 2004-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Use the AAPCS ABI by default. */ +#define ARM_DEFAULT_ABI ARM_ABI_AAPCS + +/* Assume that AAPCS ABIs should adhere to the full BPABI. */ +#define TARGET_BPABI (TARGET_AAPCS_BASED) + +/* BPABI targets use EABI frame unwinding tables. */ +#undef ARM_UNWIND_INFO +#define ARM_UNWIND_INFO 1 + +/* Section 4.1 of the AAPCS requires the use of VFP format. */ +#undef FPUTYPE_DEFAULT +#define FPUTYPE_DEFAULT "vfp" + +/* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +#if TARGET_BIG_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT MASK_BIG_END +#else +#define TARGET_ENDIAN_DEFAULT 0 +#endif + +/* EABI targets should enable interworking by default. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_INTERWORK | TARGET_ENDIAN_DEFAULT) + +/* The ARM BPABI functions return a boolean; they use no special + calling convention. */ +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(MODE, COMPARISON) TARGET_BPABI + +/* The BPABI integer comparison routines return { -1, 0, 1 }. */ +#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI + +#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\ + "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}" + +#if TARGET_BIG_ENDIAN_DEFAULT +#define BE8_LINK_SPEC \ + " %{!mlittle-endian:%{march=armv7-a|mcpu=cortex-a5 \ + |mcpu=cortex-a7 \ + |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \ + |mcpu=cortex-a12 \ + |mcpu=cortex-a15.cortex-a7 \ + |mcpu=marvell-pj4 \ + |mcpu=cortex-a53 \ + |mcpu=cortex-a57 \ + |mcpu=cortex-a57.cortex-a53 \ + |mcpu=generic-armv7-a \ + |march=armv7ve \ + |march=armv7-m|mcpu=cortex-m3 \ + |march=armv7e-m|mcpu=cortex-m4 \ + |march=armv6-m|mcpu=cortex-m0 \ + |march=armv8-a \ + :%{!r:--be8}}}" +#else +#define BE8_LINK_SPEC \ + " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5 \ + |mcpu=cortex-a7 \ + |mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15 \ + |mcpu=cortex-a12 \ + |mcpu=cortex-a15.cortex-a7 \ + |mcpu=cortex-a53 \ + |mcpu=cortex-a57 \ + |mcpu=cortex-a57.cortex-a53 \ + |mcpu=marvell-pj4 \ + |mcpu=generic-armv7-a \ + |march=armv7ve \ + |march=armv7-m|mcpu=cortex-m3 \ + |march=armv7e-m|mcpu=cortex-m4 \ + |march=armv6-m|mcpu=cortex-m0 \ + |march=armv8-a \ + :%{!r:--be8}}}" +#endif + +/* Tell the assembler to build BPABI binaries. */ +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC \ + "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC + +#ifndef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC "" +#endif + +/* Split out the EABI common values so other targets can use it. */ +#define EABI_LINK_SPEC \ + TARGET_FIX_V4BX_SPEC BE8_LINK_SPEC + +/* The generic link spec in elf.h does not support shared libraries. */ +#define BPABI_LINK_SPEC \ + "%{mbig-endian:-EB} %{mlittle-endian:-EL} " \ + "%{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic} " \ + "-X" SUBTARGET_EXTRA_LINK_SPEC EABI_LINK_SPEC + +#undef LINK_SPEC +#define LINK_SPEC BPABI_LINK_SPEC + +/* The BPABI requires that we always use an out-of-line implementation + of RTTI comparison, even if the target supports weak symbols, + because the same object file might be used on a target that does + not support merging symbols across DLL boundaries. This macro is + broken out separately so that it can be used within + TARGET_OS_CPP_BUILTINS in configuration files for systems based on + the BPABI. */ +#define TARGET_BPABI_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0"); \ + } \ + while (false) + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + TARGET_BPABI_CPP_BUILTINS() + +/* The BPABI specifies the use of .{init,fini}_array. Therefore, we + do not want GCC to put anything into the .{init,fini} sections. */ +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP +#define INIT_ARRAY_SECTION_ASM_OP ARM_EABI_CTORS_SECTION_OP +#define FINI_ARRAY_SECTION_ASM_OP ARM_EABI_DTORS_SECTION_OP + +/* The legacy _mcount implementation assumes r11 points to a + 4-word APCS frame. This is generally not true for EABI targets, + particularly not in Thumb mode. We assume the mcount + implementation does not require a counter variable (No Counter). + Note that __gnu_mcount_nc will be entered with a misaligned stack. + This is OK because it uses a special calling convention anyway. */ + +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 1 +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + fprintf (STREAM, "\tpush\t{lr}\n"); \ + fprintf (STREAM, "\tbl\t__gnu_mcount_nc\n"); \ +} + +#undef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED 0 + +/* __gnu_mcount_nc restores the original LR value before returning. Ensure + that there is no unnecessary hook set up. */ +#undef PROFILE_HOOK diff --git a/gcc-4.9/gcc/config/arm/coff.h b/gcc-4.9/gcc/config/arm/coff.h new file mode 100644 index 000000000..7deb23898 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/coff.h @@ -0,0 +1,82 @@ +/* Definitions of target machine for GNU compiler. + For ARM with COFF object format. + Copyright (C) 1995-2014 Free Software Foundation, Inc. + Contributed by Doug Evans (devans@cygnus.com). + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Note - it is important that this definition matches the one in tcoff.h. */ +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + + +/* Run-time Target Specification. */ +#undef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) + +#ifndef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "mfloat-abi=soft", "mno-thumb-interwork" } +#endif + +/* This is COFF, but prefer stabs. */ +#define SDB_DEBUGGING_INFO 1 + +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + + +#define TARGET_ASM_FILE_START_APP_OFF true + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_coff_asm_named_section + +/* Support the ctors/dtors and other sections. */ + +#undef INIT_SECTION_ASM_OP + +/* Define this macro if jump tables (for `tablejump' insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. */ +/* We put ARM and Thumb-2 jump tables in the text section, because it makes + the code more efficient, but for Thumb-1 it's better to put them out of + band unless we are generating compressed tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION \ + (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) + +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata" +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP "\t.section .ctors,\"x\"" +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP "\t.section .dtors,\"x\"" + +/* Support the ctors/dtors sections for g++. */ + +/* __CTOR_LIST__ and __DTOR_LIST__ must be defined by the linker script. */ +#define CTOR_LISTS_DEFINED_EXTERNALLY + +#undef DO_GLOBAL_CTORS_BODY +#undef DO_GLOBAL_DTORS_BODY + +/* The ARM development system defines __main. */ +#define NAME__MAIN "__gccmain" +#define SYMBOL__MAIN __gccmain + +#define SUPPORTS_INIT_PRIORITY 0 diff --git a/gcc-4.9/gcc/config/arm/constraints.md b/gcc-4.9/gcc/config/arm/constraints.md new file mode 100644 index 000000000..85dd116ce --- /dev/null +++ b/gcc-4.9/gcc/config/arm/constraints.md @@ -0,0 +1,438 @@ +;; Constraint definitions for ARM and Thumb +;; Copyright (C) 2006-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The following register constraints have been used: +;; - in ARM/Thumb-2 state: t, w, x, y, z +;; - in Thumb state: h, b +;; - in both states: l, c, k, q, US +;; In ARM state, 'l' is an alias for 'r' +;; 'f' and 'v' were previously used for FPA and MAVERICK registers. + +;; The following normal constraints have been used: +;; in ARM/Thumb-2 state: G, I, j, J, K, L, M +;; in Thumb-1 state: I, J, K, L, M, N, O +;; 'H' was previously used for FPA. + +;; The following multi-letter normal constraints have been used: +;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Do, Dv, Dy, Di, Dt, Dp, Dz +;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe +;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py + +;; The following memory constraints have been used: +;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us +;; in ARM state: Uq +;; in Thumb state: Uu, Uw + + +(define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS" + "The VFP registers @code{s0}-@code{s31}.") + +(define_register_constraint "w" + "TARGET_32BIT ? (TARGET_VFPD32 ? VFP_REGS : VFP_LO_REGS) : NO_REGS" + "The VFP registers @code{d0}-@code{d15}, or @code{d0}-@code{d31} for VFPv3.") + +(define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS" + "The VFP registers @code{d0}-@code{d7}.") + +(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS" + "The Intel iWMMX co-processor registers.") + +(define_register_constraint "z" + "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS" + "The Intel iWMMX GR registers.") + +(define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS" + "In Thumb state the core registers @code{r0}-@code{r7}.") + +(define_register_constraint "h" "TARGET_THUMB ? HI_REGS : NO_REGS" + "In Thumb state the core registers @code{r8}-@code{r15}.") + +(define_constraint "j" + "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" + (and (match_test "TARGET_32BIT && arm_arch_thumb2") + (ior (match_code "high") + (and (match_code "const_int") + (match_test "(ival & 0xffff0000) == 0"))))) + +(define_constraint "Pj" + "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)" + (and (match_code "const_int") + (and (match_test "TARGET_THUMB2") + (match_test "(ival & 0xfffff000) == 0")))) + +(define_constraint "PJ" + "@internal A constant that satisfies the Pj constrant if negated." + (and (match_code "const_int") + (and (match_test "TARGET_THUMB2") + (match_test "((-ival) & 0xfffff000) == 0")))) + +(define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + +(define_register_constraint "q" "(TARGET_ARM && TARGET_LDRD) ? CORE_REGS : GENERAL_REGS" + "@internal In ARM state with LDRD support, core registers, otherwise general registers.") + +(define_register_constraint "b" "TARGET_THUMB ? BASE_REGS : NO_REGS" + "@internal + Thumb only. The union of the low registers and the stack register.") + +(define_register_constraint "c" "CC_REG" + "@internal The condition code register.") + +(define_register_constraint "Cs" "CALLER_SAVE_REGS" + "@internal The caller save registers. Useful for sibcalls.") + +(define_constraint "I" + "In ARM/Thumb-2 state a constant that can be used as an immediate value in a + Data Processing instruction. In Thumb-1 state a constant in the range + 0-255." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? const_ok_for_arm (ival) + : ival >= 0 && ival <= 255"))) + +(define_constraint "J" + "In ARM/Thumb-2 state a constant in the range @minus{}4095-4095. In Thumb-1 + state a constant in the range @minus{}255-@minus{}1." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? (ival >= -4095 && ival <= 4095) + : (ival >= -255 && ival <= -1)"))) + +(define_constraint "K" + "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if + inverted. In Thumb-1 state a constant that satisfies the @code{I} + constraint multiplied by any power of 2." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? const_ok_for_arm (~ival) + : thumb_shiftable_const (ival)"))) + +(define_constraint "L" + "In ARM/Thumb-2 state a constant that satisfies the @code{I} constraint if + negated. In Thumb-1 state a constant in the range @minus{}7-7." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? const_ok_for_arm (-ival) + : (ival >= -7 && ival <= 7)"))) + +;; The ARM state version is internal... +;; @internal In ARM/Thumb-2 state a constant in the range 0-32 or any +;; power of 2. +(define_constraint "M" + "In Thumb-1 state a constant that is a multiple of 4 in the range 0-1020." + (and (match_code "const_int") + (match_test "TARGET_32BIT ? ((ival >= 0 && ival <= 32) + || (((ival & (ival - 1)) & 0xFFFFFFFF) == 0)) + : ival >= 0 && ival <= 1020 && (ival & 3) == 0"))) + +(define_constraint "N" + "Thumb-1 state a constant in the range 0-31." + (and (match_code "const_int") + (match_test "!TARGET_32BIT && (ival >= 0 && ival <= 31)"))) + +(define_constraint "O" + "In Thumb-1 state a constant that is a multiple of 4 in the range + @minus{}508-508." + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -508 && ival <= 508 + && ((ival & 3) == 0)"))) + +(define_constraint "Pa" + "@internal In Thumb-1 state a constant in the range -510 to +510" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -510 && ival <= 510 + && (ival > 255 || ival < -255)"))) + +(define_constraint "Pb" + "@internal In Thumb-1 state a constant in the range -262 to +262" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= -262 && ival <= 262 + && (ival > 255 || ival < -255)"))) + +(define_constraint "Pc" + "@internal In Thumb-1 state a constant that is in the range 1021 to 1275" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 + && ival > 1020 && ival <= 1275"))) + +(define_constraint "Pd" + "@internal In Thumb state a constant in the range 0 to 7" + (and (match_code "const_int") + (match_test "TARGET_THUMB && ival >= 0 && ival <= 7"))) + +(define_constraint "Pe" + "@internal In Thumb-1 state a constant in the range 256 to +510" + (and (match_code "const_int") + (match_test "TARGET_THUMB1 && ival >= 256 && ival <= 510"))) + +(define_constraint "Ps" + "@internal In Thumb-2 state a constant in the range -255 to +255" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 255"))) + +(define_constraint "Pt" + "@internal In Thumb-2 state a constant in the range -7 to +7" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -7 && ival <= 7"))) + +(define_constraint "Pu" + "@internal In Thumb-2 state a constant in the range +1 to +8" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 8"))) + +(define_constraint "Pv" + "@internal In Thumb-2 state a constant in the range -255 to 0" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -255 && ival <= 0"))) + +(define_constraint "Pw" + "@internal In Thumb-2 state a constant in the range -255 to -1" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -255 && ival <= -1"))) + +(define_constraint "Px" + "@internal In Thumb-2 state a constant in the range -7 to -1" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= -7 && ival <= -1"))) + +(define_constraint "Py" + "@internal In Thumb-2 state a constant in the range 0 to 255" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255"))) + +(define_constraint "Pz" + "@internal In Thumb-2 state the constant 0" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && (ival == 0)"))) + +(define_constraint "G" + "In ARM/Thumb-2 state the floating-point constant 0." + (and (match_code "const_double") + (match_test "TARGET_32BIT && arm_const_double_rtx (op)"))) + +(define_constraint "Dz" + "@internal + In ARM/Thumb-2 state a vector of constant zeros." + (and (match_code "const_vector") + (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + +(define_constraint "Da" + "@internal + In ARM/Thumb-2 state a const_int, const_double or const_vector that can + be generated with two Data Processing insns." + (and (match_code "const_double,const_int,const_vector") + (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 2"))) + +(define_constraint "Db" + "@internal + In ARM/Thumb-2 state a const_int, const_double or const_vector that can + be generated with three Data Processing insns." + (and (match_code "const_double,const_int,const_vector") + (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 3"))) + +(define_constraint "Dc" + "@internal + In ARM/Thumb-2 state a const_int, const_double or const_vector that can + be generated with four Data Processing insns. This pattern is disabled + if optimizing for space or when we have load-delay slots to fill." + (and (match_code "const_double,const_int,const_vector") + (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4 + && !(optimize_size || arm_ld_sched)"))) + +(define_constraint "Dd" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn adddi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)"))) + +(define_constraint "De" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn anddi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)"))) + +(define_constraint "Df" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn iordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)"))) + +(define_constraint "Dg" + "@internal + In ARM/Thumb-2 state a const_int that can be used by insn xordi." + (and (match_code "const_int") + (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)"))) + +(define_constraint "Di" + "@internal + In ARM/Thumb-2 state a const_int or const_double where both the high + and low SImode words can be generated as immediates in 32-bit instructions." + (and (match_code "const_double,const_int") + (match_test "TARGET_32BIT && arm_const_double_by_immediates (op)"))) + +(define_constraint "Dn" + "@internal + In ARM/Thumb-2 state a const_vector or const_int which can be loaded with a + Neon vmov immediate instruction." + (and (match_code "const_vector,const_int") + (match_test "TARGET_32BIT + && imm_for_neon_mov_operand (op, GET_MODE (op))"))) + +(define_constraint "Dl" + "@internal + In ARM/Thumb-2 state a const_vector which can be used with a Neon vorr or + vbic instruction." + (and (match_code "const_vector") + (match_test "TARGET_32BIT + && imm_for_neon_logic_operand (op, GET_MODE (op))"))) + +(define_constraint "DL" + "@internal + In ARM/Thumb-2 state a const_vector which can be used with a Neon vorn or + vand instruction." + (and (match_code "const_vector") + (match_test "TARGET_32BIT + && imm_for_neon_inv_logic_operand (op, GET_MODE (op))"))) + +(define_constraint "Do" + "@internal + In ARM/Thumb2 state valid offset for an ldrd/strd instruction." + (and (match_code "const_int") + (match_test "TARGET_LDRD && offset_ok_for_ldrd_strd (ival)"))) + +(define_constraint "Dv" + "@internal + In ARM/Thumb-2 state a const_double which can be used with a VFP fconsts + instruction." + (and (match_code "const_double") + (match_test "TARGET_32BIT && vfp3_const_double_rtx (op)"))) + +(define_constraint "Dy" + "@internal + In ARM/Thumb-2 state a const_double which can be used with a VFP fconstd + instruction." + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP_DOUBLE && vfp3_const_double_rtx (op)"))) + +(define_constraint "Dt" + "@internal + In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation" + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)"))) + +(define_constraint "Dp" + "@internal + In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation" + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_bits (op)"))) + +(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS" + "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.") + +(define_memory_constraint "Ua" + "@internal + An address valid for loading/storing register exclusive" + (match_operand 0 "mem_noofs_operand")) + +(define_memory_constraint "Ut" + "@internal + In ARM/Thumb-2 state an address valid for loading/storing opaque structure + types wider than TImode." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_struct_mem_operand (op)"))) + +(define_memory_constraint "Uv" + "@internal + In ARM/Thumb-2 state a valid VFP load/store address." + (and (match_code "mem") + (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)"))) + +(define_memory_constraint "Uy" + "@internal + In ARM/Thumb-2 state a valid iWMMX load/store address." + (and (match_code "mem") + (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)"))) + +(define_memory_constraint "Un" + "@internal + In ARM/Thumb-2 state a valid address for Neon doubleword vector + load/store instructions." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0, true)"))) + +(define_memory_constraint "Um" + "@internal + In ARM/Thumb-2 state a valid address for Neon element and structure + load/store instructions." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)"))) + +(define_memory_constraint "Us" + "@internal + In ARM/Thumb-2 state a valid address for non-offset loads/stores of + quad-word values in four ARM registers." + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)"))) + +(define_memory_constraint "Uq" + "@internal + In ARM state an address valid in ldrsb instructions." + (and (match_code "mem") + (match_test "TARGET_ARM + && arm_legitimate_address_outer_p (GET_MODE (op), XEXP (op, 0), + SIGN_EXTEND, 0)"))) + +(define_memory_constraint "Q" + "@internal + In ARM/Thumb-2 state an address that is a single base register." + (and (match_code "mem") + (match_test "REG_P (XEXP (op, 0))"))) + +(define_memory_constraint "Uu" + "@internal + In Thumb state an address that is valid in 16bit encoding." + (and (match_code "mem") + (match_test "TARGET_THUMB + && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), + 0)"))) + +; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p +; are actually LDM/STM instructions, so cannot be used to access unaligned +; data. +(define_memory_constraint "Uw" + "@internal + In Thumb state an address that is valid in 16bit encoding, and that can be + used for unaligned accesses." + (and (match_code "mem") + (match_test "TARGET_THUMB + && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0), + 0) + && GET_CODE (XEXP (op, 0)) != POST_INC"))) + +(define_constraint "US" + "@internal + US is a symbol reference." + (match_code "symbol_ref") +) + +;; We used to have constraint letters for S and R in ARM state, but +;; all uses of these now appear to have been removed. + +;; Additionally, we used to have a Q constraint in Thumb state, but +;; this wasn't really a valid memory constraint. Again, all uses of +;; this now seem to have been removed. + diff --git a/gcc-4.9/gcc/config/arm/cortex-a15-neon.md b/gcc-4.9/gcc/config/arm/cortex-a15-neon.md new file mode 100644 index 000000000..02d4a530b --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a15-neon.md @@ -0,0 +1,677 @@ +;; ARM Cortex-A15 NEON pipeline description +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_attr "cortex_a15_neon_type" + "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q, + neon_arith_basic, neon_arith_complex, + neon_reduc_add_acc, neon_multiply, neon_multiply_q, + neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long, + neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,\ + neon_shift_imm_complex, + neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex, + neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith, + neon_fp_arith_q, neon_fp_cvt_int, + neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul, + neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte, + neon_fp_recpe_rsqrte_q, neon_bitops, neon_bitops_q, neon_from_gp, + neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp, + neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e, + neon_load_f, neon_store_a, neon_store_b, neon_store_c, neon_store_d, + neon_store_e, neon_store_f, neon_store_g, neon_store_h, + unknown" + (cond [ + (eq_attr "type" "neon_abd, neon_abd_long") + (const_string "neon_abd") + (eq_attr "type" "neon_abd_q") + (const_string "neon_abd_q") + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\ + neon_reduc_add_acc_q") + (const_string "neon_arith_acc") + (eq_attr "type" "neon_arith_acc_q") + (const_string "neon_arith_acc_q") + (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ + neon_add_widen, neon_neg, neon_neg_q,\ + neon_reduc_add, neon_reduc_add_q,\ + neon_reduc_add_long, neon_sub, neon_sub_q,\ + neon_sub_long, neon_sub_widen, neon_logic,\ + neon_logic_q, neon_tst, neon_tst_q") + (const_string "neon_arith_basic") + (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\ + neon_add_halve, neon_add_halve_q,\ + neon_sub_halve, neon_sub_halve_q, neon_qabs,\ + neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ + neon_qneg_q, neon_qsub, neon_qsub_q,\ + neon_sub_halve_narrow_q,\ + neon_compare, neon_compare_q,\ + neon_compare_zero, neon_compare_zero_q,\ + neon_minmax, neon_minmax_q, neon_reduc_minmax,\ + neon_reduc_minmax_q") + (const_string "neon_arith_complex") + + (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\ + neon_mul_h_scalar, neon_mul_s_scalar,\ + neon_sat_mul_b, neon_sat_mul_h,\ + neon_sat_mul_s, neon_sat_mul_h_scalar,\ + neon_sat_mul_s_scalar,\ + neon_mul_b_long, neon_mul_h_long,\ + neon_mul_s_long,\ + neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ + neon_sat_mul_b_long, neon_sat_mul_h_long,\ + neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long") + (const_string "neon_multiply") + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\ + neon_mul_h_scalar_q, neon_mul_s_scalar_q,\ + neon_sat_mul_b_q, neon_sat_mul_h_q,\ + neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\ + neon_sat_mul_s_scalar_q") + (const_string "neon_multiply_q") + (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ + neon_mla_h_scalar, neon_mla_s_scalar,\ + neon_mla_b_long, neon_mla_h_long,\ + neon_mla_s_long,\ + neon_mla_h_scalar_long, neon_mla_s_scalar_long") + (const_string "neon_mla") + (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ + neon_mla_h_scalar_q, neon_mla_s_scalar_q") + (const_string "neon_mla_q") + (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ + neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long") + (const_string "neon_sat_mla_long") + + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") + (const_string "neon_shift_acc") + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ + neon_shift_imm_narrow_q, neon_shift_imm_long") + (const_string "neon_shift_imm_basic") + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ + neon_sat_shift_imm_narrow_q") + (const_string "neon_shift_imm_complex") + (eq_attr "type" "neon_shift_reg") + (const_string "neon_shift_reg_basic") + (eq_attr "type" "neon_shift_reg_q") + (const_string "neon_shift_reg_basic_q") + (eq_attr "type" "neon_sat_shift_reg") + (const_string "neon_shift_reg_complex") + (eq_attr "type" "neon_sat_shift_reg_q") + (const_string "neon_shift_reg_complex_q") + + (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ + neon_fp_abs_s, neon_fp_abs_s_q") + (const_string "neon_fp_negabs") + (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\ + neon_fp_reduc_add_s, neon_fp_compare_s,\ + neon_fp_minmax_s, neon_fp_minmax_s_q,\ + neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q") + (const_string "neon_fp_arith") + (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\ + neon_fp_reduc_add_s_q, neon_fp_compare_s_q") + (const_string "neon_fp_arith_q") + (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s") + (const_string "neon_fp_cvt_int") + (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q") + (const_string "neon_fp_cvt_int_q") + (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h") + (const_string "neon_fp_cvt16") + (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar") + (const_string "neon_fp_mul") + (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q") + (const_string "neon_fp_mul_q") + (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar") + (const_string "neon_fp_mla") + (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q") + (const_string "neon_fp_mla_q") + (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s") + (const_string "neon_fp_recpe_rsqrte") + (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q") + (const_string "neon_fp_recpe_rsqrte_q") + + (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\ + neon_rev, neon_permute,\ + neon_tbl1, neon_tbl2, neon_zip,\ + neon_dup, neon_dup_q, neon_ext, neon_ext_q,\ + neon_move, neon_move_q, neon_move_narrow_q") + (const_string "neon_bitops") + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\ + neon_rev_q, neon_permute_q") + (const_string "neon_bitops_q") + (eq_attr "type" "neon_from_gp") + (const_string "neon_from_gp") + (eq_attr "type" "neon_from_gp_q") + (const_string "neon_from_gp_q") + (eq_attr "type" "neon_tbl3, neon_tbl4") + (const_string "neon_tbl3_tbl4") + (eq_attr "type" "neon_zip_q") + (const_string "neon_zip_q") + (eq_attr "type" "neon_to_gp, neon_to_gp_q") + (const_string "neon_to_gp") + + (eq_attr "type" "f_loads, f_loadd,\ + neon_load1_1reg, neon_load1_1reg_q,\ + neon_load1_2reg, neon_load1_2reg_q") + (const_string "neon_load_a") + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\ + neon_load1_4reg, neon_load1_4reg_q") + (const_string "neon_load_b") + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\ + neon_load1_all_lanes, neon_load1_all_lanes_q,\ + neon_load2_2reg, neon_load2_2reg_q,\ + neon_load2_all_lanes, neon_load2_all_lanes_q") + (const_string "neon_load_c") + (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\ + neon_load3_3reg, neon_load3_3reg_q,\ + neon_load3_one_lane, neon_load3_one_lane_q,\ + neon_load4_4reg, neon_load4_4reg_q") + (const_string "neon_load_d") + (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\ + neon_load3_all_lanes, neon_load3_all_lanes_q,\ + neon_load4_all_lanes, neon_load4_all_lanes_q") + (const_string "neon_load_e") + (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") + (const_string "neon_load_f") + + (eq_attr "type" "f_stores, f_stored,\ + neon_store1_1reg, neon_store1_1reg_q") + (const_string "neon_store_a") + (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q") + (const_string "neon_store_b") + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q") + (const_string "neon_store_c") + (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q") + (const_string "neon_store_d") + (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\ + neon_store2_one_lane, neon_store2_one_lane_q") + (const_string "neon_store_e") + (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\ + neon_store3_one_lane, neon_store3_one_lane_q,\ + neon_store4_one_lane, neon_store4_one_lane_q") + (const_string "neon_store_f") + (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\ + neon_store4_4reg, neon_store4_4reg_q") + (const_string "neon_store_g") + (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q") + (const_string "neon_store_h")] + (const_string "unknown"))) + +(define_automaton "cortex_a15_neon") + +;; Dispatch unit. +(define_cpu_unit "ca15_cx_ij, ca15_cx_ik" "cortex_a15_neon") + +;; Accumulate. +(define_cpu_unit "ca15_cx_acc" "cortex_a15_neon") + +;; The 32x32 integer multiply-accumulate pipeline. +(define_cpu_unit "ca15_cx_imac1" "cortex_a15_neon") +(define_reservation "ca15_cx_imac" "(ca15_cx_ij+ca15_cx_imac1)") + + +;; The 64-bit ALU pipeline. +(define_cpu_unit "ca15_cx_ialu1, ca15_cx_ialu2" "cortex_a15_neon") + +;; IALU with accumulate. +(define_reservation "ca15_cx_ialu_with_acc" "ca15_cx_ik+ca15_cx_ialu2+ca15_cx_acc") + +(define_reservation "ca15_cx_ialu" + "((ca15_cx_ij+ca15_cx_ialu1)|(ca15_cx_ik+ca15_cx_ialu2))") + +;; Integer shift pipeline. +(define_cpu_unit "ca15_cx_ishf" "cortex_a15_neon") +(define_reservation "ca15_cx_ishf_with_acc" "ca15_cx_ik+ca15_cx_ishf+ca15_cx_acc") + +;; SIMD multiply pipeline. +(define_cpu_unit "ca15_cx_fmul1, ca15_cx_fmul2, ca15_cx_fmul3, ca15_cx_fmul4" + "cortex_a15_neon") + +(define_reservation "ca15_cx_fmul" + "(ca15_cx_ij+(ca15_cx_fmul1|ca15_cx_fmul2))|\ + (ca15_cx_ik+(ca15_cx_fmul3|ca15_cx_fmul4))") + +(define_reservation "ca15_cx_fmul_2" + "(ca15_cx_ij+(ca15_cx_fmul1|ca15_cx_fmul2))+\ + (ca15_cx_ik+(ca15_cx_fmul3|ca15_cx_fmul4))") + +;; SIMD ALU pipeline. +(define_cpu_unit "ca15_cx_falu1, ca15_cx_falu2, ca15_cx_falu3, ca15_cx_falu4" + "cortex_a15_neon") + +(define_reservation "ca15_cx_falu" + "(ca15_cx_ij+(ca15_cx_falu1|ca15_cx_falu2))|\ + (ca15_cx_ik+(ca15_cx_falu3|ca15_cx_falu4))") + +(define_reservation "ca15_cx_falu_2" + "(ca15_cx_ij+(ca15_cx_falu1|ca15_cx_falu2))+\ + (ca15_cx_ik+(ca15_cx_falu3|ca15_cx_falu4))") + +;; SIMD multiply-accumulate pipeline. +;; This can be used if fmul and falu are not reserved. +(define_reservation "ca15_cx_fmac" + "((ca15_cx_ij+ca15_cx_fmul1),nothing*2,ca15_cx_falu1)|\ + ((ca15_cx_ij+ca15_cx_fmul2),nothing*2,ca15_cx_falu2)|\ + ((ca15_cx_ik+ca15_cx_fmul3),nothing*2,ca15_cx_falu3)|\ + ((ca15_cx_ik+ca15_cx_fmul4),nothing*2,ca15_cx_falu4)") + +(define_reservation "ca15_cx_fmac_2" + "(((ca15_cx_ij+ca15_cx_fmul1),nothing*2,ca15_cx_falu1)|\ + ((ca15_cx_ij+ca15_cx_fmul2),nothing*2,ca15_cx_falu2))+\ + (((ca15_cx_ik+ca15_cx_fmul3),nothing*2,ca15_cx_falu3)|\ + ((ca15_cx_ik+ca15_cx_fmul4),nothing*2,ca15_cx_falu4))") + + +;; Vector FP multiply pipeline +(define_cpu_unit "ca15_cx_vfp_i" "cortex_a15_neon") + +(define_reservation "ca15_cx_vfp" "ca15_cx_ik+ca15_cx_vfp_i") + +;; Load permute pipeline +(define_reservation "ca15_cx_perm" "ca15_cx_ij|ca15_cx_ik") +(define_reservation "ca15_cx_perm_2" "ca15_cx_ij+ca15_cx_ik") + +;; Integer Arithmetic Instructions. + +(define_insn_reservation "cortex_a15_neon_abd" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_abd")) + "ca15_issue1,ca15_cx_ialu") + +(define_insn_reservation "cortex_a15_neon_abd_q" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_abd_q")) + "ca15_issue2,ca15_cx_ialu*2") + +(define_insn_reservation "cortex_a15_neon_aba" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_arith_acc")) + "ca15_issue1,ca15_cx_ialu_with_acc") + +(define_insn_reservation "cortex_a15_neon_aba_q" 8 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_arith_acc_q")) + "ca15_issue2,ca15_cx_ialu_with_acc*2") + +(define_insn_reservation "cortex_a15_neon_arith_basic" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_arith_basic")) + "ca15_issue1,ca15_cx_ialu") + +(define_insn_reservation "cortex_a15_neon_arith_complex" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_arith_complex")) + "ca15_issue1,ca15_cx_ialu") + +;; Integer Multiply Instructions. + +(define_insn_reservation "cortex_a15_neon_multiply" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_multiply")) + "ca15_issue1,ca15_cx_imac") + +(define_insn_reservation "cortex_a15_neon_multiply_q" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_multiply_q")) + "ca15_issue2,ca15_cx_imac*2") + +(define_insn_reservation "cortex_a15_neon_mla" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_mla")) + "ca15_issue1,ca15_cx_imac") + +(define_insn_reservation "cortex_a15_neon_mla_q" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_mla_q")) + "ca15_issue1,ca15_cx_imac*2") + +(define_insn_reservation "cortex_a15_neon_sat_mla_long" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_sat_mla_long")) + "ca15_issue1,ca15_cx_imac") + +;; Integer Shift Instructions. + +(define_insn_reservation + "cortex_a15_neon_shift_acc" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_acc")) + "ca15_issue1,ca15_cx_ishf_with_acc") + +(define_insn_reservation + "cortex_a15_neon_shift_imm_basic" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_imm_basic")) + "ca15_issue1,ca15_cx_ik+ca15_cx_ishf") + +(define_insn_reservation + "cortex_a15_neon_shift_imm_complex" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_imm_complex")) + "ca15_issue1,ca15_cx_ik+ca15_cx_ishf") + +(define_insn_reservation + "cortex_a15_neon_shift_reg_basic" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_reg_basic")) + "ca15_issue1,ca15_cx_ik+ca15_cx_ishf") + +(define_insn_reservation + "cortex_a15_neon_shift_reg_basic_q" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_reg_basic_q")) + "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf*2)") + +(define_insn_reservation + "cortex_a15_neon_shift_reg_complex" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_reg_complex")) + "ca15_issue2,ca15_cx_ik+ca15_cx_ishf") + +(define_insn_reservation + "cortex_a15_neon_shift_reg_complex_q" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_shift_reg_complex_q")) + "ca15_issue2,(ca15_cx_ik+ca15_cx_ishf)*2") + +;; Floating Point Instructions. + +(define_insn_reservation + "cortex_a15_neon_fp_negabs" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_negabs")) + "ca15_issue1,ca15_cx_falu") + +(define_insn_reservation + "cortex_a15_neon_fp_arith" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_arith")) + "ca15_issue1,ca15_cx_falu") + +(define_insn_reservation + "cortex_a15_neon_fp_arith_q" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_arith_q")) + "ca15_issue2,ca15_cx_falu_2") + +(define_insn_reservation + "cortex_a15_neon_fp_cvt_int" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_cvt_int")) + "ca15_issue1,ca15_cx_falu+ca15_cx_ishf") + +(define_insn_reservation + "cortex_a15_neon_fp_cvt_int_q" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_cvt_int_q")) + "ca15_issue2,(ca15_cx_falu+ca15_cx_ishf)*2") + +(define_insn_reservation + "cortex_a15_neon_fp_cvt16" 10 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_cvt16")) + "ca15_issue3,(ca15_cx_falu+ca15_cx_ishf)*2+ca15_cx_falu") + +(define_insn_reservation + "cortex_a15_neon_fp_mul" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_mul")) + "ca15_issue1,ca15_cx_fmul") + +(define_insn_reservation + "cortex_a15_neon_fp_mul_q" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_mul_q")) + "ca15_issue2,ca15_cx_fmul_2") + +(define_insn_reservation + "cortex_a15_neon_fp_mla" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_mla")) + "ca15_issue1,ca15_cx_fmul") + +(define_insn_reservation + "cortex_a15_neon_fp_mla_q" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_mla_q")) + "ca15_issue2,ca15_cx_fmul_2") + +(define_insn_reservation + "cortex_a15_neon_fp_recps_rsqrte" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_recpe_rsqrte")) + "ca15_issue1,ca15_cx_fmac") + +(define_insn_reservation + "cortex_a15_neon_fp_recps_rsqrte_q" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_fp_recpe_rsqrte_q")) + "ca15_issue2,ca15_cx_fmac_2") + +;; Miscelaaneous Instructions. + +(define_insn_reservation + "cortex_a15_neon_bitops" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_bitops")) + "ca15_issue1,ca15_cx_perm") + +(define_insn_reservation + "cortex_a15_neon_bitops_q" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_bitops_q")) + "ca15_issue2,ca15_cx_perm_2") + +(define_insn_reservation + "cortex_a15_neon_from_gp" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_from_gp")) + "ca15_issue2,ca15_ls1+ca15_ls2+ca15_cx_perm") + +(define_insn_reservation + "cortex_a15_neon_from_gp_q" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_from_gp_q")) + "ca15_issue2,ca15_ls1+ca15_ls2+ca15_cx_perm_2") + +(define_insn_reservation + "cortex_a15_neon_tbl3_tbl4" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_tbl3_tbl4")) + "ca15_issue2,ca15_cx_perm_2") + +(define_insn_reservation + "cortex_a15_neon_zip_q" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_zip_q")) + "ca15_issue3,ca15_cx_perm*3") + +(define_insn_reservation + "cortex_a15_neon_to_gp" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_to_gp")) + "ca15_issue2,ca15_ls1+ca15_ls2") + +;; Load Instructions. + +(define_insn_reservation + "cortex_a15_neon_load_a" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_load_a")) + "ca15_issue1,ca15_ls,ca15_ldr") + +(define_insn_reservation + "cortex_a15_neon_load_b" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_load_b")) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr") + +(define_insn_reservation + "cortex_a15_neon_load_c" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_load_c")) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr") + +(define_insn_reservation + "cortex_a15_neon_load_d" 11 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_load_d")) + "ca15_issue1,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2") + +(define_insn_reservation + "cortex_a15_neon_load_e" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_load_e")) + "ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2") + +(define_insn_reservation + "cortex_a15_neon_load_f" 11 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_load_f")) + "ca15_issue3,ca15_issue3+ca15_ls1+ca15_ls2,ca15_ldr*2") + +;; Store Instructions. + +(define_insn_reservation + "cortex_a15_neon_store_a" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_a")) + "ca15_issue1,ca15_ls1+ca15_ls2,ca15_str") + +(define_insn_reservation + "cortex_a15_neon_store_b" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_b")) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str*2") + +(define_insn_reservation + "cortex_a15_neon_store_c" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_c")) + "ca15_issue3,ca15_ls1+ca15_ls2,ca15_str*3") + +(define_insn_reservation + "cortex_a15_neon_store_d" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_d")) + "ca15_issue3,ca15_issue1,ca15_ls1+ca15_ls2,ca15_str*4") + +(define_insn_reservation + "cortex_a15_neon_store_e" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_e")) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str+ca15_cx_perm") + +(define_insn_reservation + "cortex_a15_neon_store_f" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_f")) + "ca15_issue3,ca15_ls1+ca15_ls2,ca15_str*2+ca15_cx_perm") + +(define_insn_reservation + "cortex_a15_neon_store_g" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_g")) + "ca15_issue3,ca15_issue3+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2") + +(define_insn_reservation + "cortex_a15_neon_store_h" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "cortex_a15_neon_type" "neon_store_h")) + "ca15_issue3,ca15_issue2+ca15_cx_perm+ca15_ls1+ca15_ls2,ca15_str*2") + +;; VFP Operations. + +(define_insn_reservation "cortex_a15_vfp_const" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fconsts,fconstd")) + "ca15_issue1,ca15_cx_perm") + +(define_insn_reservation "cortex_a15_vfp_adds_subs" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fadds")) + "ca15_issue1,ca15_cx_vfp") + +(define_insn_reservation "cortex_a15_vfp_addd_subd" 10 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "faddd")) + "ca15_issue2,ca15_cx_vfp*2") + +(define_insn_reservation "cortex_a15_vfp_muls" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fmuls")) + "ca15_issue1,ca15_cx_vfp") + +(define_insn_reservation "cortex_a15_vfp_muld" 12 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fmuld")) + "ca15_issue2,ca15_cx_vfp*2") + +(define_insn_reservation "cortex_a15_vfp_macs" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fmacs,ffmas")) + "ca15_issue1,ca15_cx_vfp") + +(define_insn_reservation "cortex_a15_vfp_macd" 11 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fmacd,ffmad")) + "ca15_issue2,ca15_cx_vfp*2") + +(define_insn_reservation "cortex_a15_vfp_cvt" 6 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) + "ca15_issue1,ca15_cx_vfp") + +(define_insn_reservation "cortex_a15_vfp_cmpd" 8 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fcmpd")) + "ca15_issue2,ca15_cx_perm,ca15_cx_vfp") + +(define_insn_reservation "cortex_a15_vfp_cmps" 8 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fcmps")) + "ca15_issue2,ca15_cx_perm,ca15_cx_vfp") + +(define_insn_reservation "cortex_a15_vfp_arithd" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "ffarithd")) + "ca15_issue2,ca15_cx_perm*2") + +(define_insn_reservation "cortex_a15_vfp_cpys" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fmov")) + "ca15_issue1,ca15_cx_perm") + +(define_insn_reservation "cortex_a15_vfp_to_from_gp" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "f_mcr, f_mcrr, f_mrc, f_mrrc")) + "ca15_issue1,ca15_ls1+ca15_ls2") + +(define_insn_reservation "cortex_a15_vfp_ariths" 7 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "ffariths")) + "ca15_issue1,ca15_cx_perm") + +(define_insn_reservation "cortex_a15_vfp_divs" 10 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fdivs, fsqrts")) + "ca15_issue1,ca15_cx_ik") + +(define_insn_reservation "cortex_a15_vfp_divd" 18 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "fdivd, fsqrtd")) + "ca15_issue1,ca15_cx_ik") + diff --git a/gcc-4.9/gcc/config/arm/cortex-a15.md b/gcc-4.9/gcc/config/arm/cortex-a15.md new file mode 100644 index 000000000..b3f126a72 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a15.md @@ -0,0 +1,186 @@ +;; ARM Cortex-A15 pipeline description +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; +;; Written by Matthew Gretton-Dann + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a15") + +;; The Cortex-A15 core is modelled as a triple issue pipeline that has +;; the following dispatch units. +;; 1. Two pipelines for simple integer operations: SX1, SX2 +;; 2. Individual units for Neon and FP operations as in cortex-a15-neon.md +;; 3. One pipeline for branch operations: BX +;; 4. One pipeline for integer multiply and divide operations: MX +;; 5. Two pipelines for load and store operations: LS1, LS2 +;; +;; We can issue into three pipelines per-cycle. +;; +;; We assume that where we have unit pairs xx1 is always filled before xx2. + +;; The three issue units +(define_cpu_unit "ca15_i0, ca15_i1, ca15_i2" "cortex_a15") + +(define_reservation "ca15_issue1" "(ca15_i0|ca15_i1|ca15_i2)") +(define_reservation "ca15_issue2" "((ca15_i0+ca15_i1)|(ca15_i1+ca15_i2))") +(define_reservation "ca15_issue3" "(ca15_i0+ca15_i1+ca15_i2)") +(final_presence_set "ca15_i1" "ca15_i0") +(final_presence_set "ca15_i2" "ca15_i1") + +;; The main dispatch units +(define_cpu_unit "ca15_sx1, ca15_sx2" "cortex_a15") +(define_cpu_unit "ca15_ls1, ca15_ls2" "cortex_a15") +(define_cpu_unit "ca15_bx, ca15_mx" "cortex_a15") + +(define_reservation "ca15_ls" "(ca15_ls1|ca15_ls2)") + +;; The extended load-store pipeline +(define_cpu_unit "ca15_ldr, ca15_str" "cortex_a15") + +;; The extended ALU pipeline +(define_cpu_unit "ca15_sx1_alu, ca15_sx1_shf, ca15_sx1_sat" "cortex_a15") +(define_cpu_unit "ca15_sx2_alu, ca15_sx2_shf, ca15_sx2_sat" "cortex_a15") + +;; Simple Execution Unit: +;; +;; Simple ALU without shift +(define_insn_reservation "cortex_a15_alu" 2 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,\ + mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) + "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") + +;; ALU ops with immediate shift +(define_insn_reservation "cortex_a15_alu_shift" 3 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + mov_shift,mvn_shift")) + "ca15_issue1,(ca15_sx1,ca15_sx1+ca15_sx1_shf,ca15_sx1_alu)\ + |(ca15_sx2,ca15_sx2+ca15_sx2_shf,ca15_sx2_alu)") + +;; ALU ops with register controlled shift +(define_insn_reservation "cortex_a15_alu_shift_reg" 3 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg")) + "(ca15_issue2,ca15_sx1+ca15_sx2,ca15_sx1_shf,ca15_sx2_alu)\ + |(ca15_issue1,(ca15_issue1+ca15_sx2,ca15_sx1+ca15_sx2_shf)\ + |(ca15_issue1+ca15_sx1,ca15_sx1+ca15_sx1_shf),ca15_sx1_alu)") + +;; Multiply Execution Unit: +;; +;; 32-bit multiplies +(define_insn_reservation "cortex_a15_mult32" 3 + (and (eq_attr "tune" "cortexa15") + (eq_attr "mul32" "yes")) + "ca15_issue1,ca15_mx") + +;; 64-bit multiplies +(define_insn_reservation "cortex_a15_mult64" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "mul64" "yes")) + "ca15_issue1,ca15_mx*2") + +;; Integer divide +(define_insn_reservation "cortex_a15_udiv" 9 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "udiv")) + "ca15_issue1,ca15_mx") + +(define_insn_reservation "cortex_a15_sdiv" 10 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "sdiv")) + "ca15_issue1,ca15_mx") + +;; Block all issue pipes for a cycle +(define_insn_reservation "cortex_a15_block" 1 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "block")) + "ca15_issue3") + +;; Branch execution Unit +;; +;; Branches take one issue slot. +;; No latency as there is no result +(define_insn_reservation "cortex_a15_branch" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "branch")) + "ca15_issue1,ca15_bx") + +;; Load-store execution Unit +;; +;; Loads of up to two words. +(define_insn_reservation "cortex_a15_load1" 4 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "load_byte,load1,load2")) + "ca15_issue1,ca15_ls,ca15_ldr,nothing") + +;; Loads of three or four words. +(define_insn_reservation "cortex_a15_load3" 5 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "load3,load4")) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_ldr,ca15_ldr,nothing") + +;; Stores of up to two words. +(define_insn_reservation "cortex_a15_store1" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "store1,store2")) + "ca15_issue1,ca15_ls,ca15_str") + +;; Stores of three or four words. +(define_insn_reservation "cortex_a15_store3" 0 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "store3,store4")) + "ca15_issue2,ca15_ls1+ca15_ls2,ca15_str,ca15_str") + +;; We include Neon.md here to ensure that the branch can block the Neon units. +(include "../arm/cortex-a15-neon.md") + +;; We lie with calls. They take up all issue slots, and form a block in the +;; pipeline. The result however is available the next cycle. +(define_insn_reservation "cortex_a15_call" 1 + (and (eq_attr "tune" "cortexa15") + (eq_attr "type" "call")) + "ca15_issue3,\ + ca15_sx1+ca15_sx2+ca15_bx+ca15_mx+ca15_cx_ij+ca15_cx_ik+ca15_ls1+ca15_ls2+\ + ca15_cx_imac1+ca15_cx_ialu1+ca15_cx_ialu2+ca15_cx_ishf+\ + ca15_cx_acc+ca15_cx_fmul1+ca15_cx_fmul2+ca15_cx_fmul3+ca15_cx_fmul4+\ + ca15_cx_falu1+ca15_cx_falu2+ca15_cx_falu3+ca15_cx_falu4+ca15_cx_vfp_i,\ + ca15_sx1_alu+ca15_sx1_shf+ca15_sx1_sat+ca15_sx2_alu+\ + ca15_sx2_shf+ca15_sx2_sat+ca15_ldr+ca15_str") + +;; Simple execution unit bypasses +(define_bypass 1 "cortex_a15_alu" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 2 "cortex_a15_alu_shift" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 2 "cortex_a15_alu_shift_reg" + "cortex_a15_alu,cortex_a15_alu_shift,cortex_a15_alu_shift_reg") +(define_bypass 1 "cortex_a15_alu" "cortex_a15_load1,cortex_a15_load3") +(define_bypass 2 "cortex_a15_alu_shift" "cortex_a15_load1,cortex_a15_load3") +(define_bypass 2 "cortex_a15_alu_shift_reg" + "cortex_a15_load1,cortex_a15_load3") diff --git a/gcc-4.9/gcc/config/arm/cortex-a5.md b/gcc-4.9/gcc/config/arm/cortex-a5.md new file mode 100644 index 000000000..eed098ef9 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a5.md @@ -0,0 +1,311 @@ +;; ARM Cortex-A5 pipeline description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The integer (ALU) pipeline. There are five DPU pipeline +;; stages. However the decode/issue stages operate the same for all +;; instructions, so do not model them. We only need to model the +;; first execute stage because instructions always advance one stage +;; per cycle in order. Only branch instructions may dual-issue, so a +;; single unit covers all of the LS, ALU, MAC and FPU pipelines. + +(define_cpu_unit "cortex_a5_ex1" "cortex_a5") + +;; The branch pipeline. Branches can dual-issue with other instructions +;; (except when those instructions take multiple cycles to issue). + +(define_cpu_unit "cortex_a5_branch" "cortex_a5") + +;; Pseudo-unit for blocking the multiply pipeline when a double-precision +;; multiply is in progress. + +(define_cpu_unit "cortex_a5_fpmul_pipe" "cortex_a5") + +;; The floating-point add pipeline (ex1/f1 stage), used to model the usage +;; of the add pipeline by fmac instructions, etc. + +(define_cpu_unit "cortex_a5_fpadd_pipe" "cortex_a5") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a5_fp_div_sqrt" "cortex_a5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a5_alu" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_alu_shift" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) + "cortex_a5_ex1") + +;; Forwarding path for unshifted operands. + +(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift" + "cortex_a5_alu") + +(define_bypass 1 "cortex_a5_alu,cortex_a5_alu_shift" + "cortex_a5_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results from wr stage only so +;; there's no need to specify bypasses). + +(define_insn_reservation "cortex_a5_mul" 2 + (and (eq_attr "tune" "cortexa5") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage, which is one stage behind +;; the ex1 stage (the first stage we care about for scheduling purposes). The +;; dc1 stage is parallel with ex1, dc2 with ex2 and rot with wr. + +(define_insn_reservation "cortex_a5_load1" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load_byte,load1")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store1" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store1")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_load2" 3 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load2")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store2" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store2")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_load3" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store3" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_load4" 5 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "load3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_store4" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "store3")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1+cortex_a5_branch,\ + cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Direct branches are the only instructions we can dual-issue (also IT and +;; nop, but those aren't very interesting for scheduling). (The latency here +;; is meant to represent when the branch actually takes place, but may not be +;; entirely correct.) + +(define_insn_reservation "cortex_a5_branch" 3 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "branch,call")) + "cortex_a5_branch") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a5_fpalu" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\ + f_cvt,f_cvtf2i,f_cvti2f,\ + fcmps, fcmpd")) + "cortex_a5_ex1+cortex_a5_fpadd_pipe") + +;; For fconsts and fconstd, 8-bit immediate data is passed directly from +;; f1 to f3 (which I think reduces the latency by one cycle). + +(define_insn_reservation "cortex_a5_fconst" 3 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fconsts,fconstd")) + "cortex_a5_ex1+cortex_a5_fpadd_pipe") + +;; We should try not to attempt to issue a single-precision multiplication in +;; the middle of a double-precision multiplication operation (the usage of +;; cortex_a5_fpmul_pipe). + +(define_insn_reservation "cortex_a5_fpmuls" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmuls")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe") + +;; For single-precision multiply-accumulate, the add (accumulate) is issued +;; whilst the multiply is in F4. The multiply result can then be forwarded +;; from F5 to F1. The issue unit is only used once (when we first start +;; processing the instruction), but the usage of the FP add pipeline could +;; block other instructions attempting to use it simultaneously. We try to +;; avoid that using cortex_a5_fpadd_pipe. + +(define_insn_reservation "cortex_a5_fpmacs" 8 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmacs,ffmas")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe") + +;; Non-multiply instructions can issue in the middle two instructions of a +;; double-precision multiply. Note that it isn't entirely clear when a branch +;; can dual-issue when a multi-cycle multiplication is in progress; we ignore +;; that for now though. + +(define_insn_reservation "cortex_a5_fpmuld" 7 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmuld")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\ + cortex_a5_ex1+cortex_a5_fpmul_pipe") + +(define_insn_reservation "cortex_a5_fpmacd" 11 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fmacd,ffmad")) + "cortex_a5_ex1+cortex_a5_fpmul_pipe, cortex_a5_fpmul_pipe*2,\ + cortex_a5_ex1+cortex_a5_fpmul_pipe, nothing*3, cortex_a5_fpadd_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ??? Not sure if the 14 cycles taken for single-precision divide to complete +;; includes the time taken for the special instruction used to collect the +;; result to travel down the multiply pipeline, or not. Assuming so. (If +;; that's wrong, the latency should be increased by a few cycles.) + +;; fsqrt takes one cycle less, but that is not modelled, nor is the use of the +;; multiply pipeline to collect the divide/square-root result. + +(define_insn_reservation "cortex_a5_fdivs" 14 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fdivs, fsqrts")) + "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 13") + +;; ??? Similarly for fdivd. + +(define_insn_reservation "cortex_a5_fdivd" 29 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "fdivd, fsqrtd")) + "cortex_a5_ex1, cortex_a5_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; FP loads take data from wr/rot/f3. + +;; Core-to-VFP transfers use the multiply pipeline. + +(define_insn_reservation "cortex_a5_r2f" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_mcr,f_mcrr")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f2r" 2 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_mrc,f_mrrc")) + "cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ??? The flag forwarding from fmstat to the ex2 stage of the second +;; instruction is not modeled at present. + +(define_insn_reservation "cortex_a5_f_flags" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_flag")) + "cortex_a5_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a5_f_loads" 4 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_loads")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f_loadd" 5 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_loadd")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f_stores" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_stores")) + "cortex_a5_ex1") + +(define_insn_reservation "cortex_a5_f_stored" 0 + (and (eq_attr "tune" "cortexa5") + (eq_attr "type" "f_stored")) + "cortex_a5_ex1+cortex_a5_branch, cortex_a5_ex1") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a5_f_loads" + "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\ + cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\ + cortex_a5_f2r") + +(define_bypass 3 "cortex_a5_f_loadd" + "cortex_a5_fpalu, cortex_a5_fpmacs, cortex_a5_fpmuld,\ + cortex_a5_fpmacd, cortex_a5_fdivs, cortex_a5_fdivd,\ + cortex_a5_f2r") diff --git a/gcc-4.9/gcc/config/arm/cortex-a53.md b/gcc-4.9/gcc/config/arm/cortex-a53.md new file mode 100644 index 000000000..deae8eba5 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a53.md @@ -0,0 +1,309 @@ +;; ARM Cortex-A53 pipeline description +;; Copyright (C) 2013-2014 Free Software Foundation, Inc. +;; +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a53") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There are two main integer execution pipelines, described as +;; slot 0 and issue slot 1. + +(define_cpu_unit "cortex_a53_slot0" "cortex_a53") +(define_cpu_unit "cortex_a53_slot1" "cortex_a53") + +(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1") +(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1") + +;; The load/store pipeline. Load/store instructions can dual-issue from +;; either pipeline, but two load/stores cannot simultaneously issue. + +(define_cpu_unit "cortex_a53_ls" "cortex_a53") + +;; The store pipeline. Shared between both execution pipelines. + +(define_cpu_unit "cortex_a53_store" "cortex_a53") + +;; The branch pipeline. Branches can dual-issue with other instructions +;; (except when those instructions take multiple cycles to issue). + +(define_cpu_unit "cortex_a53_branch" "cortex_a53") + +;; The integer divider. + +(define_cpu_unit "cortex_a53_idiv" "cortex_a53") + +;; The floating-point add pipeline used to model the usage +;; of the add pipeline by fmac instructions. + +(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_alu" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,csel,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) + "cortex_a53_slot_any") + +(define_insn_reservation "cortex_a53_alu_shift" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + extend,mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) + "cortex_a53_slot_any") + +;; Forwarding path for unshifted operands. + +(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_alu") + +(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results so there's no need to specify +;; bypasses. Multiplies can only single-issue currently. + +(define_insn_reservation "cortex_a53_mul" 3 + (and (eq_attr "tune" "cortexa53") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "cortex_a53_single_issue") + +;; A multiply with a single-register result or an MLA, followed by an +;; MLA with an accumulator dependency, has its result forwarded so two +;; such instructions can issue back-to-back. + +(define_bypass 1 "cortex_a53_mul" + "cortex_a53_mul" + "arm_mac_accumulator_is_mul_result") + +;; Punt with a high enough latency for divides. +(define_insn_reservation "cortex_a53_udiv" 8 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "udiv")) + "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7") + +(define_insn_reservation "cortex_a53_sdiv" 9 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "sdiv")) + "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8") + + +(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv" + "cortex_a53_alu") +(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv" + "cortex_a53_alu_shift" + "arm_no_early_alu_shift_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage. + +(define_insn_reservation "cortex_a53_load1" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "load_byte,load1,load_acq")) + "cortex_a53_slot_any+cortex_a53_ls") + +(define_insn_reservation "cortex_a53_store1" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "store1,store_rel")) + "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store") + +(define_insn_reservation "cortex_a53_load2" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "load2")) + "cortex_a53_single_issue+cortex_a53_ls") + +(define_insn_reservation "cortex_a53_store2" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "store2")) + "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store") + +(define_insn_reservation "cortex_a53_load3plus" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "load3,load4")) + "(cortex_a53_single_issue+cortex_a53_ls)*2") + +(define_insn_reservation "cortex_a53_store3plus" 3 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "store3,store4")) + "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2") + +;; Load/store addresses are required early in Issue. +(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_load*" + "arm_early_load_addr_dep") +(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_store*" + "arm_early_store_addr_dep") + +;; Load data can forward in the ALU pipeline +(define_bypass 2 "cortex_a53_load1,cortex_a53_load2" + "cortex_a53_alu") +(define_bypass 2 "cortex_a53_load1,cortex_a53_load2" + "cortex_a53_alu_shift" + "arm_no_early_alu_shift_dep") + +;; ALU ops can forward to stores. +(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift" + "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus" + "arm_no_early_store_addr_dep") + +(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus" + "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus" + "arm_no_early_store_addr_dep") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Currently models all branches as dual-issuable from either execution +;; slot, which isn't true for all cases. We still need to model indirect +;; branches. + +(define_insn_reservation "cortex_a53_branch" 0 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "branch,call")) + "cortex_a53_slot_any+cortex_a53_branch") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_fpalu" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\ + f_cvt,f_cvtf2i,f_cvti2f,\ + fcmps, fcmpd, fcsel")) + "cortex_a53_slot0+cortex_a53_fpadd_pipe") + +(define_insn_reservation "cortex_a53_fconst" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fconsts,fconstd")) + "cortex_a53_slot0+cortex_a53_fpadd_pipe") + +(define_insn_reservation "cortex_a53_fpmul" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fmuls,fmuld")) + "cortex_a53_slot0") + +;; For single-precision multiply-accumulate, the add (accumulate) is issued after +;; the multiply completes. Model that accordingly. + +(define_insn_reservation "cortex_a53_fpmac" 8 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fmacs,fmacd,ffmas,ffmad")) + "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; fsqrt really takes one cycle less, but that is not modelled. + +(define_insn_reservation "cortex_a53_fdivs" 14 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fdivs, fsqrts")) + "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13") + +(define_insn_reservation "cortex_a53_fdivd" 29 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "fdivd, fsqrtd")) + "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_r2f" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_mcr,f_mcrr")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f2r" 2 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_mrc,f_mrrc")) + "cortex_a53_slot0") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_f_flags" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_flag")) + "cortex_a53_slot0") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a53_f_loads" 4 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_loads")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f_loadd" 5 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_loadd")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f_stores" 0 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_stores")) + "cortex_a53_slot0") + +(define_insn_reservation "cortex_a53_f_stored" 0 + (and (eq_attr "tune" "cortexa53") + (eq_attr "type" "f_stored")) + "cortex_a53_slot0") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a53_f_loads" + "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\ + cortex_a53_fdivs, cortex_a53_fdivd,\ + cortex_a53_f2r") + +(define_bypass 2 "cortex_a53_f_loadd" + "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\ + cortex_a53_fdivs, cortex_a53_fdivd,\ + cortex_a53_f2r") + diff --git a/gcc-4.9/gcc/config/arm/cortex-a7.md b/gcc-4.9/gcc/config/arm/cortex-a7.md new file mode 100644 index 000000000..8291d7fa9 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a7.md @@ -0,0 +1,394 @@ +;; ARM Cortex-A7 pipeline description +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; +;; Contributed by ARM Ltd. +;; Based on cortex-a5.md which was originally contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_attr "cortex_a7_neon_type" + "neon_mul, neon_mla, neon_other" + (cond [ + (eq_attr "type" "neon_mul_b, neon_mul_b_q,\ + neon_mul_h, neon_mul_h_q,\ + neon_mul_s, neon_mul_s_q,\ + neon_mul_b_long, neon_mul_h_long,\ + neon_mul_s_long, neon_mul_h_scalar,\ + neon_mul_h_scalar_q, neon_mul_s_scalar,\ + neon_mul_s_scalar_q, neon_mul_h_scalar_long,\ + neon_mul_s_scalar_long,\ + neon_sat_mul_b, neon_sat_mul_b_q,\ + neon_sat_mul_h, neon_sat_mul_h_q,\ + neon_sat_mul_s, neon_sat_mul_s_q,\ + neon_sat_mul_b_long, neon_sat_mul_h_long,\ + neon_sat_mul_s_long,\ + neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\ + neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\ + neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long,\ + neon_fp_mul_s, neon_fp_mul_s_q,\ + neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q") + (const_string "neon_mul") + (eq_attr "type" "neon_mla_b, neon_mla_b_q, neon_mla_h,\ + neon_mla_h_q, neon_mla_s, neon_mla_s_q,\ + neon_mla_b_long, neon_mla_h_long,\ + neon_mla_s_long,\ + neon_mla_h_scalar, neon_mla_h_scalar_q,\ + neon_mla_s_scalar, neon_mla_s_scalar_q,\ + neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ + neon_sat_mla_b_long, neon_sat_mla_h_long,\ + neon_sat_mla_s_long,\ + neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long,\ + neon_fp_mla_s, neon_fp_mla_s_q,\ + neon_fp_mla_s_scalar, neon_fp_mla_s_scalar_q") + (const_string "neon_mla")] + (const_string "neon_other"))) + +(define_automaton "cortex_a7") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Functional units. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The Cortex-A7 pipeline integer and vfp pipeline. +;; The decode is the same for all instructions, so do not model it. +;; We only model the first execution stage because +;; instructions always advance one stage per cycle in order. +;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together. + +(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7") + +(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2") + +(define_cpu_unit "cortex_a7_branch" "cortex_a7") + +;; Cortex-A7 is in order and can dual-issue under limited circumstances. +;; ex2 can be reserved only after ex1 is reserved. + +(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1") + +;; Pseudo-unit for blocking the multiply pipeline when a double-precision +;; multiply is in progress. + +(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7") + +;; The floating-point add pipeline (ex1/f1 stage), used to model the usage +;; of the add pipeline by fmac instructions, etc. + +(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7") + +;; Floating-point div/sqrt (long latency, out-of-order completion). + +(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branches. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; A direct branch can dual issue either as younger or older instruction, +;; but branches cannot dual issue with branches. +;; No latency as there is no result. + +(define_insn_reservation "cortex_a7_branch" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "branch")) + "(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch") + +;; Call cannot dual-issue as an older instruction. It can dual-issue +;; as a younger instruction, or single-issue. Call cannot dual-issue +;; with another branch instruction. The result is available the next +;; cycle. +(define_insn_reservation "cortex_a7_call" 1 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "call")) + "(cortex_a7_ex2|cortex_a7_both)+cortex_a7_branch") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instruction with an immediate operand can dual-issue. +(define_insn_reservation "cortex_a7_alu_imm" 2 + (and (eq_attr "tune" "cortexa7") + (ior (eq_attr "type" "adr,alu_imm,alus_imm,logic_imm,logics_imm,\ + mov_imm,mvn_imm,extend") + (and (eq_attr "type" "mov_reg,mov_shift,mov_shift_reg") + (not (eq_attr "length" "8"))))) + "cortex_a7_ex2|cortex_a7_ex1") + +;; ALU instruction with register operands can dual-issue +;; with a younger immediate-based instruction. +(define_insn_reservation "cortex_a7_alu_reg" 2 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + bfm,rev,\ + shift_imm,shift_reg,mov_reg,mvn_reg")) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_alu_shift" 2 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg,\ + mrs,multiple,no_insn")) + "cortex_a7_ex1") + +;; Forwarding path for unshifted operands. +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul") + +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_store*" + "arm_no_early_store_addr_dep") + +(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift" + "cortex_a7_alu_shift" + "arm_no_early_alu_shift_dep") + +;; The multiplier pipeline can forward results from wr stage only so +;; there's no need to specify bypasses. +;; Multiply instructions cannot dual-issue. + +(define_insn_reservation "cortex_a7_mul" 2 + (and (eq_attr "tune" "cortexa7") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "cortex_a7_both") + +;; Forward the result of a multiply operation to the accumulator +;; of the following multiply and accumulate instruction. +(define_bypass 1 "cortex_a7_mul" + "cortex_a7_mul" + "arm_mac_accumulator_is_result") + +;; The latency depends on the operands, so we use an estimate here. +(define_insn_reservation "cortex_a7_idiv" 5 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "udiv,sdiv")) + "cortex_a7_both*5") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/store instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Address-generation happens in the issue stage. +;; Double-word accesses can be issued in a single cycle, +;; and occupy only one pipeline stage. + +(define_insn_reservation "cortex_a7_load1" 2 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "load_byte,load1")) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_store1" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "store1")) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_load2" 2 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "load2")) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_store2" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "store2")) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_load3" 3 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "load3")) + "cortex_a7_both, cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_store3" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "store4")) + "cortex_a7_both, cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_load4" 3 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "load4")) + "cortex_a7_both, cortex_a7_both") + +(define_insn_reservation "cortex_a7_store4" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "store3")) + "cortex_a7_both, cortex_a7_both") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point arithmetic. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Neon integer, neon floating point, and single-precision floating +;; point instructions of the same type have the same timing +;; characteristics, but neon instructions cannot dual-issue. + +(define_insn_reservation "cortex_a7_fpalu" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov,\ + f_cvt, f_cvtf2i, f_cvti2f, fcmps, fcmpd")) + "cortex_a7_ex1+cortex_a7_fpadd_pipe") + +;; For fconsts and fconstd, 8-bit immediate data is passed directly from +;; f1 to f3 (which I think reduces the latency by one cycle). + +(define_insn_reservation "cortex_a7_fconst" 3 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fconsts,fconstd")) + "cortex_a7_ex1+cortex_a7_fpadd_pipe") + +;; We should try not to attempt to issue a single-precision multiplication in +;; the middle of a double-precision multiplication operation (the usage of +;; cortex_a7_fpmul_pipe). + +(define_insn_reservation "cortex_a7_fpmuls" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fmuls")) + "cortex_a7_ex1+cortex_a7_fpmul_pipe") + +(define_insn_reservation "cortex_a7_neon_mul" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "cortex_a7_neon_type" "neon_mul")) + "(cortex_a7_both+cortex_a7_fpmul_pipe)*2") + +(define_insn_reservation "cortex_a7_fpmacs" 8 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fmacs,ffmas")) + "cortex_a7_ex1+cortex_a7_fpmul_pipe") + +(define_insn_reservation "cortex_a7_neon_mla" 8 + (and (eq_attr "tune" "cortexa7") + (eq_attr "cortex_a7_neon_type" "neon_mla")) + "cortex_a7_both+cortex_a7_fpmul_pipe") + +(define_bypass 4 "cortex_a7_fpmacs,cortex_a7_neon_mla" + "cortex_a7_fpmacs,cortex_a7_neon_mla" + "arm_mac_accumulator_is_result") + +;; Non-multiply instructions can issue between two cycles of a +;; double-precision multiply. + +(define_insn_reservation "cortex_a7_fpmuld" 7 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fmuld")) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") + +(define_insn_reservation "cortex_a7_fpmacd" 11 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fmacd")) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*3") + +(define_insn_reservation "cortex_a7_fpfmad" 8 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "ffmad")) + "cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*4") + +(define_bypass 7 "cortex_a7_fpmacd" + "cortex_a7_fpmacd,cortex_a7_fpfmad" + "arm_mac_accumulator_is_result") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating-point divide/square root instructions. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_fdivs" 16 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fdivs, fsqrts")) + "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 13") + +(define_insn_reservation "cortex_a7_fdivd" 31 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "fdivd, fsqrtd")) + "cortex_a7_ex1+cortex_a7_fp_div_sqrt, cortex_a7_fp_div_sqrt * 28") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP to/from core transfers. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Core-to-VFP transfers. + +(define_insn_reservation "cortex_a7_r2f" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_mcr,f_mcrr")) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_f2r" 2 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_mrc,f_mrrc")) + "cortex_a7_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP flag transfer. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Fuxne: The flag forwarding from fmstat to the second instruction is +;; not modeled at present. + +(define_insn_reservation "cortex_a7_f_flags" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_flag")) + "cortex_a7_ex1") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; VFP load/store. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "cortex_a7_f_loads" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_loads")) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_f_loadd" 4 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_loadd")) + "cortex_a7_both") + +(define_insn_reservation "cortex_a7_f_stores" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_stores")) + "cortex_a7_ex1") + +(define_insn_reservation "cortex_a7_f_stored" 0 + (and (eq_attr "tune" "cortexa7") + (eq_attr "type" "f_stored")) + "cortex_a7_both") + +;; Load-to-use for floating-point values has a penalty of one cycle, +;; i.e. a latency of two. + +(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd" + "cortex_a7_fpalu,\ + cortex_a7_fpmuls,cortex_a7_fpmacs,\ + cortex_a7_fpmuld,cortex_a7_fpmacd, cortex_a7_fpfmad,\ + cortex_a7_fdivs, cortex_a7_fdivd,\ + cortex_a7_f2r") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NEON +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Simple modeling for all neon instructions not covered earlier. + +(define_insn_reservation "cortex_a7_neon" 4 + (and (eq_attr "tune" "cortexa7") + (and (eq_attr "is_neon_type" "yes") + (eq_attr "cortex_a7_neon_type" "neon_other"))) + "cortex_a7_both*2") diff --git a/gcc-4.9/gcc/config/arm/cortex-a8-neon.md b/gcc-4.9/gcc/config/arm/cortex-a8-neon.md new file mode 100644 index 000000000..1bb0ab237 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a8-neon.md @@ -0,0 +1,1534 @@ +;; ARM Cortex-A8 NEON scheduling description. +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_attr "cortex_a8_neon_type" + "neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs, + neon_bit_ops_q, + neon_vaba,neon_vaba_qqq, neon_vmov, + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32, + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar, + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16, + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long, + neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar, + neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar, + neon_shift_1,neon_shift_2,neon_shift_3, + neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd, + neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd, + neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar, + neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd, + neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle, + neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs, + neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4, + neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs, + neon_vst2_4_regs_vst3_vst4,neon_vld1_vld2_lane, + neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane, + neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc, + neon_ldm_2,neon_stm_2,none,unknown" + (cond [ + (eq_attr "type" "neon_logic, neon_logic_q,\ + neon_bsl, neon_cls, neon_cnt,\ + neon_add, neon_add_q") + (const_string "neon_int_1") + (eq_attr "type" "neon_add_widen, neon_sub_widen,\ + neon_sub, neon_sub_q") + (const_string "neon_int_2") + (eq_attr "type" "neon_neg, neon_neg_q,\ + neon_reduc_add, neon_reduc_add_q,\ + neon_reduc_add_long,\ + neon_add_long, neon_sub_long") + (const_string "neon_int_3") + (eq_attr "type" "neon_abs, neon_abs_q, + neon_compare_zero, neon_compare_zero_q,\ + neon_add_halve_narrow_q,\ + neon_sub_halve_narrow_q,\ + neon_add_halve, neon_add_halve_q,\ + neon_qadd, neon_qadd_q,\ + neon_tst, neon_tst_q") + (const_string "neon_int_4") + (eq_attr "type" "neon_abd_long, neon_sub_halve, neon_sub_halve_q,\ + neon_qsub, neon_qsub_q,\ + neon_abd, neon_abd_q,\ + neon_compare, neon_compare_q,\ + neon_minmax, neon_minmax_q, neon_reduc_minmax,\ + neon_reduc_minmax_q") + (const_string "neon_int_5") + (eq_attr "type" "neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q") + (const_string "neon_vqneg_vqabs") + (eq_attr "type" "neon_move, neon_move_q") + (const_string "neon_vmov") + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q") + (const_string "neon_bit_ops_q") + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc") + (const_string "neon_vaba") + (eq_attr "type" "neon_arith_acc_q") + (const_string "neon_vaba_qqq") + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ + neon_shift_imm_long, neon_shift_imm_narrow_q,\ + neon_shift_reg") + (const_string "neon_shift_1") + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q, + neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg") + (const_string "neon_shift_2") + (eq_attr "type" "neon_shift_reg_q") + (const_string "neon_shift_3") + (eq_attr "type" "neon_sat_shift_reg_q") + (const_string "neon_vqshl_vrshl_vqrshl_qqq") + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") + (const_string "neon_vsra_vrsra") + (eq_attr "type" "neon_mul_b, neon_mul_h,\ + neon_mul_b_long, neon_mul_h_long,\ + neon_sat_mul_b, neon_sat_mul_h,\ + neon_sat_mul_b_long, neon_sat_mul_h_long") + (const_string + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q,\ + neon_sat_mul_b_q, neon_sat_mul_h_q") + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (eq_attr "type" "neon_mul_s, neon_mul_s_long,\ + neon_sat_mul_s, neon_sat_mul_s_long,\ + neon_mul_h_scalar_q, neon_sat_mul_h_scalar_q,\ + neon_mul_s_scalar, neon_sat_mul_s_scalar,\ + neon_mul_s_scalar_long,\ + neon_sat_mul_s_scalar_long") + (const_string + "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") + (eq_attr "type" "neon_mla_b, neon_mla_h,\ + neon_mla_b_long, neon_mla_h_long,\ + neon_sat_mla_b_long, neon_sat_mla_h_long,\ + neon_sat_mla_h_scalar_long") + (const_string + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (eq_attr "type" "neon_mla_b_q, neon_mla_h_q") + (const_string "neon_mla_qqq_8_16") + (eq_attr "type" "neon_mla_s, neon_mla_s_long,\ + neon_sat_mla_s_long,\ + neon_mla_h_scalar_q, neon_mla_s_scalar,\ + neon_mla_s_scalar_long,\ + neon_sat_mla_s_scalar_long") + (const_string + "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long") + (eq_attr "type" "neon_mla_s_q, neon_mla_s_scalar_q") + (const_string "neon_mla_qqq_32_qqd_32_scalar") + (eq_attr "type" "neon_mul_h_scalar, neon_sat_mul_h_scalar,\ + neon_mul_h_scalar_long,\ + neon_sat_mul_h_scalar_long") + (const_string + "neon_mul_ddd_16_scalar_32_16_long_scalar") + (eq_attr "type" "neon_mul_s_q, neon_sat_mul_s_q,\ + neon_mul_s_scalar_q") + (const_string "neon_mul_qqd_32_scalar") + (eq_attr "type" "neon_mla_h_scalar, neon_mla_h_scalar_long") + (const_string + "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (eq_attr "type" "neon_fp_abd_s, neon_fp_abs_s, neon_fp_neg_s,\ + neon_fp_addsub_s, neon_fp_compare_s,\ + neon_fp_minmax_s, neon_fp_mul_s,\ + neon_fp_recpe_s, neon_fp_rsqrte_s,\ + neon_fp_to_int_s, neon_int_to_fp_s") + (const_string "neon_fp_vadd_ddd_vabs_dd") + (eq_attr "type" "neon_fp_abd_s_q, neon_fp_abs_s_q,\ + neon_fp_neg_s_q,\ + neon_fp_addsub_s_q, neon_fp_compare_s_q,\ + neon_fp_minmax_s_q, neon_fp_mul_s_q,\ + neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\ + neon_fp_to_int_s_q, neon_int_to_fp_s_q") + (const_string "neon_fp_vadd_qqq_vabs_qq") + (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_minmax_s,\ + neon_fp_reduc_add_s_q, neon_fp_reduc_minmax_s_q") + (const_string "neon_fp_vsum") + (eq_attr "type" "neon_fp_mul_s_scalar") + (const_string "neon_fp_vmul_ddd") + (eq_attr "type" "neon_fp_mul_s_scalar_q") + (const_string "neon_fp_vmul_qqd") + (eq_attr "type" "neon_fp_mla_s") + (const_string "neon_fp_vmla_ddd") + (eq_attr "type" "neon_fp_mla_s_q") + (const_string "neon_fp_vmla_qqq") + (eq_attr "type" "neon_fp_mla_s_scalar") + (const_string "neon_fp_vmla_ddd_scalar") + (eq_attr "type" "neon_fp_mla_s_scalar_q") + (const_string "neon_fp_vmla_qqq_scalar") + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s") + (const_string "neon_fp_vrecps_vrsqrts_ddd") + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q") + (const_string "neon_fp_vrecps_vrsqrts_qqq") + (eq_attr "type" "neon_move_narrow_q, neon_dup,\ + neon_dup_q, neon_permute, neon_zip,\ + neon_ext, neon_rev, neon_rev_q") + (const_string "neon_bp_simple") + (eq_attr "type" "neon_permute_q, neon_ext_q, neon_tbl1, neon_tbl2") + (const_string "neon_bp_2cycle") + (eq_attr "type" "neon_zip_q, neon_tbl3, neon_tbl4") + (const_string "neon_bp_3cycle") + (eq_attr "type" "neon_ldr") + (const_string "neon_ldr") + (eq_attr "type" "neon_str") + (const_string "neon_str") + (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\ + neon_load1_2reg, neon_load1_2reg_q,\ + neon_load2_2reg, neon_load2_2reg_q") + (const_string "neon_vld1_1_2_regs") + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\ + neon_load1_4reg, neon_load1_4reg_q") + (const_string "neon_vld1_3_4_regs") + (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q,\ + neon_load2_all_lanes, neon_load2_all_lanes_q") + (const_string + "neon_vld2_2_regs_vld1_vld2_all_lanes") + (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q,\ + neon_load4_all_lanes, neon_load4_all_lanes_q,\ + neon_load2_4reg, neon_load2_4reg_q") + (const_string "neon_vld2_4_regs") + (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q,\ + neon_load4_4reg, neon_load4_4reg_q") + (const_string "neon_vld3_vld4") + (eq_attr "type" "f_loads, f_loadd, f_stores, f_stored,\ + neon_load1_one_lane, neon_load1_one_lane_q,\ + neon_load2_one_lane, neon_load2_one_lane_q") + (const_string "neon_vld1_vld2_lane") + (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q,\ + neon_load4_one_lane, neon_load4_one_lane_q") + (const_string "neon_vld3_vld4_lane") + (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\ + neon_store1_2reg, neon_store1_2reg_q,\ + neon_store2_2reg, neon_store2_2reg_q") + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\ + neon_store1_4reg, neon_store1_4reg_q") + (const_string "neon_vst1_3_4_regs") + (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\ + neon_store3_3reg, neon_store3_3reg_q,\ + neon_store4_4reg, neon_store4_4reg_q") + (const_string "neon_vst2_4_regs_vst3_vst4") + (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\ + neon_store2_one_lane, neon_store2_one_lane_q") + (const_string "neon_vst1_vst2_lane") + (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q,\ + neon_store4_one_lane, neon_store4_one_lane_q") + (const_string "neon_vst3_vst4_lane") + (eq_attr "type" "neon_from_gp, f_mcr") + (const_string "neon_mcr") + (eq_attr "type" "neon_from_gp_q, f_mcrr") + (const_string "neon_mcr_2_mcrr") + (eq_attr "type" "neon_to_gp, f_mrc") + (const_string "neon_mrc") + (eq_attr "type" "neon_to_gp_q, f_mrrc") + (const_string "neon_mrrc")] + (const_string "unknown"))) + +(define_automaton "cortex_a8_neon") + +;; Only one load, store, permute, MCR or MRC instruction can be issued +;; per cycle. +(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon") + +;; Only one data-processing instruction can be issued per cycle. +(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon") + +;; The VFPLite unit (non-pipelined). +(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon") + +;; We need a special mutual exclusion (to be used in addition to +;; cortex_a8_neon_issue_dp) for the case when an instruction such as +;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to +;; E2 of the floating-point add pipeline. On the cycle previous to that +;; forward we must prevent issue of any instruction to the floating-point +;; add pipeline, but still allow issue of a data-processing instruction +;; to any of the other pipelines. +(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon") + +;; Patterns of reservation. +;; We model the NEON issue units as running in parallel with the core ones. +;; We assume that multi-cycle NEON instructions get decomposed into +;; micro-ops as they are issued into the NEON pipeline, and not as they +;; are issued into the ARM pipeline. Dual issue may not occur except +;; upon the first and last cycles of a multi-cycle instruction, but it +;; is unclear whether two multi-cycle instructions can issue together (in +;; this model they cannot). It is also unclear whether a pair of +;; a multi-cycle and single-cycle instructions, that could potentially +;; issue together, only do so if (say) the single-cycle one precedes +;; the other. + +(define_reservation "cortex_a8_neon_dp" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp") +(define_reservation "cortex_a8_neon_dp_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp") +(define_reservation "cortex_a8_neon_dp_4" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp") + +(define_reservation "cortex_a8_neon_fadd" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_fadd") +(define_reservation "cortex_a8_neon_fadd_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_fadd,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd") + +(define_reservation "cortex_a8_neon_perm" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_perm_2" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_perm_3" + "(cortex_a8_alu0|cortex_a8_alu1)+\ + cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") + +(define_reservation "cortex_a8_neon_ls" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_2" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_3" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_4" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") +(define_reservation "cortex_a8_neon_ls_5" + "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\ + cortex_a8_neon_issue_perm") + +(define_reservation "cortex_a8_neon_fmul_then_fadd" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + nothing*3,\ + cortex_a8_neon_issue_fadd") +(define_reservation "cortex_a8_neon_fmul_then_fadd_2" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\ + cortex_a8_neon_issue_dp,\ + nothing*2,\ + cortex_a8_neon_issue_fadd,\ + cortex_a8_neon_issue_fadd") + +;; VFP instructions can only be single-issued into the NEON pipeline. +(define_reservation "cortex_a8_vfp" + "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\ + cortex_a8_neon_issue_perm+cortex_a8_vfplite") + +;; VFP instructions. +;; The VFPLite unit that executes these isn't pipelined; we give the +;; worst-case latencies (and choose the double-precision ones where we +;; do not distinguish on precision). We assume RunFast mode is not +;; enabled and therefore do not model the possible VFP instruction +;; execution in the NEON floating point pipelines, nor additional +;; latencies for the processing of subnormals. +;; +;; TODO: RunFast mode could potentially be enabled when -ffast-math +;; is specified. + +(define_insn_reservation "cortex_a8_vfp_add_sub" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fconsts,fconstd,fadds,faddd")) + "cortex_a8_vfp,cortex_a8_vfplite*9") + +(define_insn_reservation "cortex_a8_vfp_muls" 12 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmuls")) + "cortex_a8_vfp,cortex_a8_vfplite*11") + +(define_insn_reservation "cortex_a8_vfp_muld" 17 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmuld")) + "cortex_a8_vfp,cortex_a8_vfplite*16") + +(define_insn_reservation "cortex_a8_vfp_macs" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmacs,ffmas")) + "cortex_a8_vfp,cortex_a8_vfplite*20") + +(define_insn_reservation "cortex_a8_vfp_macd" 26 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmacd,ffmad")) + "cortex_a8_vfp,cortex_a8_vfplite*25") + +(define_insn_reservation "cortex_a8_vfp_divs" 37 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fdivs, fsqrts")) + "cortex_a8_vfp,cortex_a8_vfplite*36") + +(define_insn_reservation "cortex_a8_vfp_divd" 65 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fdivd, fsqrtd")) + "cortex_a8_vfp,cortex_a8_vfplite*64") + +;; Comparisons can actually take 7 cycles sometimes instead of four, +;; but given all the other instructions lumped into type=ffarith that +;; take four cycles, we pick that latency. +(define_insn_reservation "cortex_a8_vfp_farith" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd")) + "cortex_a8_vfp,cortex_a8_vfplite*3") + +(define_insn_reservation "cortex_a8_vfp_cvt" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) + "cortex_a8_vfp,cortex_a8_vfplite*6") + +;; NEON -> core transfers. + +(define_insn_reservation "cortex_a8_neon_mrc" 20 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mrc")) + "cortex_a8_neon_ls") + +(define_insn_reservation "cortex_a8_neon_mrrc" 21 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mrrc")) + "cortex_a8_neon_ls_2") + +;; Arithmetic Operations + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3. +(define_insn_reservation "cortex_a8_neon_int_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_int_1")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N3. +(define_insn_reservation "cortex_a8_neon_int_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_int_2")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a8_neon_int_3" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_int_3")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N4. +(define_insn_reservation "cortex_a8_neon_int_4" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_int_4")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N4. +(define_insn_reservation "cortex_a8_neon_int_5" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_int_5")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vqneg_vqabs")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation produce a result at N3. +(define_insn_reservation "cortex_a8_neon_vmov" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vmov")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_vaba" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vaba")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vaba_qqq")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a8_neon_bit_ops_q" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_bit_ops_q")) + "cortex_a8_neon_dp_2") + +;; Integer Multiply/Accumulate Operations + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" + "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_8_16")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" + "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" + "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a8_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" + "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a8_neon_dp") + +;; Shift Operations + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a8_neon_shift_1" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_shift_1")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a8_neon_shift_2" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_shift_2")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a8_neon_shift_3" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_shift_3")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vsra_vrsra")) + "cortex_a8_neon_dp") + +;; Floating point Operations + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5. +(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a8_neon_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a8_neon_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N5. +(define_insn_reservation "cortex_a8_neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vsum")) + "cortex_a8_neon_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5. +(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_ddd")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_qqd")) + "cortex_a8_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9. +(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a8_neon_fmul_then_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q")) + "cortex_a8_neon_fmul_then_fadd_2") + +;; Permute operations. + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2. +(define_insn_reservation "cortex_a8_neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_bp_simple")) + "cortex_a8_neon_perm") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_bp_2cycle")) + "cortex_a8_neon_perm_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_bp_3cycle")) + "cortex_a8_neon_perm_3") + +;; Load Operations. + +;; Instructions using this reservation produce a result at N1. +(define_insn_reservation "cortex_a8_neon_ldr" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_ldr")) + "cortex_a8_neon_ls") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_str" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_str")) + "cortex_a8_neon_ls") + +;; Instructions using this reservation produce a result at N1 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld1_1_2_regs")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation produce a result at N1 on cycle 3. +(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld1_3_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld2_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 4. +(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4")) + "cortex_a8_neon_ls_4") + +;; Store operations. + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vst1_3_4_regs")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a8_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld1_vld2_lane")) + "cortex_a8_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 5. +(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4_lane")) + "cortex_a8_neon_ls_5") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vst1_vst2_lane")) + "cortex_a8_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_vst3_vst4_lane")) + "cortex_a8_neon_ls_3") + +;; Register Transfer Operations + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a8_neon_mcr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mcr")) + "cortex_a8_neon_perm") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "cortex_a8_neon_type" "neon_mcr_2_mcrr")) + "cortex_a8_neon_perm_2") + +;; Exceptions to the default latencies. + +(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_mcr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_vld3_vld4" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vld2_4_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a8_neon_ldr" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_bp_3cycle" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_bp_2cycle" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a8_neon_bp_simple" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vsum" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vsra_vrsra" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_shift_3" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_shift_2" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_shift_1" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a8_neon_vaba_qqq" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a8_neon_vaba" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_bit_ops_q" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_vqneg_vqabs" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_int_5" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a8_neon_int_4" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_3" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_2" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a8_neon_int_1" + "cortex_a8_neon_int_1,\ + cortex_a8_neon_int_4,\ + cortex_a8_neon_bit_ops_q,\ + cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a8_neon_mla_qqq_8_16,\ + cortex_a8_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a8_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a8_neon_fp_vmla_ddd,\ + cortex_a8_neon_fp_vmla_qqq,\ + cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a8_neon_fp_vrecps_vrsqrts_qqq") + diff --git a/gcc-4.9/gcc/config/arm/cortex-a8.md b/gcc-4.9/gcc/config/arm/cortex-a8.md new file mode 100644 index 000000000..b272472e0 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a8.md @@ -0,0 +1,279 @@ +;; ARM Cortex-A8 scheduling description. +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a8") + +;; Only one load/store instruction can be issued per cycle +;; (although reservation of this unit is only required for single +;; loads and stores -- see below). +(define_cpu_unit "cortex_a8_issue_ls" "cortex_a8") + +;; Only one branch instruction can be issued per cycle. +(define_cpu_unit "cortex_a8_issue_branch" "cortex_a8") + +;; The two ALU pipelines. +(define_cpu_unit "cortex_a8_alu0" "cortex_a8") +(define_cpu_unit "cortex_a8_alu1" "cortex_a8") + +;; The usual flow of an instruction through the pipelines. +(define_reservation "cortex_a8_default" + "cortex_a8_alu0|cortex_a8_alu1") + +;; The flow of a branch instruction through the pipelines. +(define_reservation "cortex_a8_branch" + "(cortex_a8_alu0+cortex_a8_issue_branch)|\ + (cortex_a8_alu1+cortex_a8_issue_branch)") + +;; The flow of a load or store instruction through the pipeline in +;; the case where that instruction consists of only one micro-op... +(define_reservation "cortex_a8_load_store_1" + "(cortex_a8_alu0+cortex_a8_issue_ls)|\ + (cortex_a8_alu1+cortex_a8_issue_ls)") + +;; ...and in the case of two micro-ops. Dual issue is altogether forbidden +;; during the issue cycle of the first micro-op. (Instead of modelling +;; a separate issue unit, we instead reserve alu0 and alu1 to +;; prevent any other instructions from being issued upon that first cycle.) +;; Even though the load/store pipeline is usually available in either +;; ALU pipe, multi-cycle instructions always issue in pipeline 0. +(define_reservation "cortex_a8_load_store_2" + "cortex_a8_alu0+cortex_a8_alu1+cortex_a8_issue_ls,\ + cortex_a8_alu0+cortex_a8_issue_ls") + +;; The flow of a single-cycle multiplication. +(define_reservation "cortex_a8_multiply" + "cortex_a8_alu0") + +;; The flow of a multiplication instruction that gets decomposed into +;; two micro-ops. The two micro-ops will be issued to pipeline 0 on +;; successive cycles. Dual issue cannot happen at the same time as the +;; first of the micro-ops. +(define_reservation "cortex_a8_multiply_2" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; Similarly, the flow of a multiplication instruction that gets +;; decomposed into three micro-ops. Dual issue cannot occur except on +;; the cycle upon which the third micro-op is issued. +(define_reservation "cortex_a8_multiply_3" + "cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0+cortex_a8_alu1,\ + cortex_a8_alu0") + +;; The model given here assumes that all instructions are unconditional. + +;; Data processing instructions, but not move instructions. + +;; We include CLZ with these since it has the same execution pattern +;; (source read in E2 and destination available at the end of that cycle). +(define_insn_reservation "cortex_a8_alu" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,clz,rbit,rev,\ + shift_imm,shift_reg,\ + multiple,no_insn")) + "cortex_a8_default") + +(define_insn_reservation "cortex_a8_alu_shift" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend")) + "cortex_a8_default") + +(define_insn_reservation "cortex_a8_alu_shift_reg" 2 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg")) + "cortex_a8_default") + +;; Move instructions. + +(define_insn_reservation "cortex_a8_mov" 1 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ + mrs")) + "cortex_a8_default") + +;; Exceptions to the default latencies for data processing instructions. + +;; A move followed by an ALU instruction with no early dep. +;; (Such a pair can be issued in parallel, hence latency zero.) +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu") +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 0 "cortex_a8_mov" "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; An ALU instruction followed by an ALU instruction with no early dep. +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu") +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "cortex_a8_alu,cortex_a8_alu_shift,cortex_a8_alu_shift_reg" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Multiplication instructions. These are categorized according to their +;; reservation behavior and the need below to distinguish certain +;; varieties for bypasses. Results are available at the E5 stage +;; (but some of these are multi-cycle instructions which explains the +;; latencies below). + +(define_insn_reservation "cortex_a8_mul" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "mul,smulxy,smmul")) + "cortex_a8_multiply_2") + +(define_insn_reservation "cortex_a8_mla" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd")) + "cortex_a8_multiply_2") + +(define_insn_reservation "cortex_a8_mull" 7 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy")) + "cortex_a8_multiply_3") + +(define_insn_reservation "cortex_a8_smulwy" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "smulwy,smuad,smusd")) + "cortex_a8_multiply") + +;; smlald and smlsld are multiply-accumulate instructions but do not +;; received bypassed data from other multiplication results; thus, they +;; cannot go in cortex_a8_mla above. (See below for bypass details.) +(define_insn_reservation "cortex_a8_smlald" 6 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "smlald,smlsld")) + "cortex_a8_multiply_2") + +;; A multiply with a single-register result or an MLA, followed by an +;; MLA with an accumulator dependency, has its result forwarded so two +;; such instructions can issue back-to-back. +(define_bypass 1 "cortex_a8_mul,cortex_a8_mla,cortex_a8_smulwy" + "cortex_a8_mla" + "arm_mac_accumulator_is_mul_result") + +;; A multiply followed by an ALU instruction needing the multiply +;; result only at E2 has lower latency than one needing it at E1. +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu") +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "cortex_a8_mul,cortex_a8_mla,cortex_a8_mull,\ + cortex_a8_smulwy,cortex_a8_smlald" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Load instructions. +;; The presence of any register writeback is ignored here. + +;; A load result has latency 3 unless the dependent instruction has +;; no early dep, in which case it is only latency two. +;; We assume 64-bit alignment for doubleword loads. +(define_insn_reservation "cortex_a8_load1_2" 3 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "load1,load2,load_byte")) + "cortex_a8_load_store_1") + +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu") +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_a8_load1_2" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; We do not currently model the fact that loads with scaled register +;; offsets that are not LSL #2 have an extra cycle latency (they issue +;; as two micro-ops). + +;; A load multiple of three registers is usually issued as two micro-ops. +;; The first register will be available at E3 of the first iteration, +;; the second at E3 of the second iteration, and the third at E4 of +;; the second iteration. A load multiple of four registers is usually +;; issued as two micro-ops. +(define_insn_reservation "cortex_a8_load3_4" 5 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "load3,load4")) + "cortex_a8_load_store_2") + +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu") +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 4 "cortex_a8_load3_4" + "cortex_a8_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; Store instructions. +;; Writeback is again ignored. + +(define_insn_reservation "cortex_a8_store1_2" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "store1,store2")) + "cortex_a8_load_store_1") + +(define_insn_reservation "cortex_a8_store3_4" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "store3,store4")) + "cortex_a8_load_store_2") + +;; An ALU instruction acting as a producer for a store instruction +;; that only uses the result as the value to be stored (as opposed to +;; using it to calculate the address) has latency zero; the store +;; reads the value to be stored at the start of E3 and the ALU insn +;; writes it at the end of E2. Move instructions actually produce the +;; result at the end of E1, but since we don't have delay slots, the +;; scheduling behavior will be the same. +(define_bypass 0 "cortex_a8_alu,cortex_a8_alu_shift,\ + cortex_a8_alu_shift_reg,cortex_a8_mov" + "cortex_a8_store1_2,cortex_a8_store3_4" + "arm_no_early_store_addr_dep") + +;; Branch instructions + +(define_insn_reservation "cortex_a8_branch" 0 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "branch")) + "cortex_a8_branch") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "cortex_a8_call" 32 + (and (eq_attr "tune" "cortexa8") + (eq_attr "type" "call")) + "cortex_a8_issue_branch") + +;; NEON (including VFP) instructions. + +(include "cortex-a8-neon.md") + diff --git a/gcc-4.9/gcc/config/arm/cortex-a9-neon.md b/gcc-4.9/gcc/config/arm/cortex-a9-neon.md new file mode 100644 index 000000000..3ff93f924 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a9-neon.md @@ -0,0 +1,1471 @@ +;; ARM Cortex-A9 pipeline description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; +;; Neon pipeline description contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_attr "cortex_a9_neon_type" + "neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs, + neon_bit_ops_q, + neon_vaba,neon_vaba_qqq, neon_vmov, + neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32, + neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar, + neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16, + neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long, + neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar, + neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar, + neon_shift_1,neon_shift_2,neon_shift_3, + neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd, + neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd, + neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar, + neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd, + neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle, + neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs, + neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4, + neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs, + neon_vst2_4_regs_vst3_vst4,neon_vld1_vld2_lane, + neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane, + neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc, + neon_ldm_2,neon_stm_2,none,unknown" + (cond [ + (eq_attr "type" "neon_logic, neon_logic_q,\ + neon_bsl, neon_cls, neon_cnt,\ + neon_add, neon_add_q") + (const_string "neon_int_1") + (eq_attr "type" "neon_add_widen, neon_sub_widen,\ + neon_sub, neon_sub_q") + (const_string "neon_int_2") + (eq_attr "type" "neon_neg, neon_neg_q,\ + neon_reduc_add, neon_reduc_add_q,\ + neon_reduc_add_long,\ + neon_add_long, neon_sub_long") + (const_string "neon_int_3") + (eq_attr "type" "neon_abs, neon_abs_q, + neon_compare_zero, neon_compare_zero_q,\ + neon_add_halve_narrow_q,\ + neon_sub_halve_narrow_q,\ + neon_add_halve, neon_add_halve_q,\ + neon_qadd, neon_qadd_q,\ + neon_tst, neon_tst_q") + (const_string "neon_int_4") + (eq_attr "type" "neon_abd_long, neon_sub_halve, neon_sub_halve_q,\ + neon_qsub, neon_qsub_q,\ + neon_abd, neon_abd_q,\ + neon_compare, neon_compare_q,\ + neon_minmax, neon_minmax_q, neon_reduc_minmax,\ + neon_reduc_minmax_q") + (const_string "neon_int_5") + (eq_attr "type" "neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q") + (const_string "neon_vqneg_vqabs") + (eq_attr "type" "neon_move, neon_move_q") + (const_string "neon_vmov") + (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q") + (const_string "neon_bit_ops_q") + (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc") + (const_string "neon_vaba") + (eq_attr "type" "neon_arith_acc_q") + (const_string "neon_vaba_qqq") + (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ + neon_shift_imm_long, neon_shift_imm_narrow_q,\ + neon_shift_reg") + (const_string "neon_shift_1") + (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q, + neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg") + (const_string "neon_shift_2") + (eq_attr "type" "neon_shift_reg_q") + (const_string "neon_shift_3") + (eq_attr "type" "neon_sat_shift_reg_q") + (const_string "neon_vqshl_vrshl_vqrshl_qqq") + (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") + (const_string "neon_vsra_vrsra") + (eq_attr "type" "neon_mul_b, neon_mul_h,\ + neon_mul_b_long, neon_mul_h_long,\ + neon_sat_mul_b, neon_sat_mul_h,\ + neon_sat_mul_b_long, neon_sat_mul_h_long") + (const_string + "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long") + (eq_attr "type" "neon_mul_b_q, neon_mul_h_q,\ + neon_sat_mul_b_q, neon_sat_mul_h_q") + (const_string "neon_mul_qqq_8_16_32_ddd_32") + (eq_attr "type" "neon_mul_s, neon_mul_s_long,\ + neon_sat_mul_s, neon_sat_mul_s_long,\ + neon_mul_h_scalar_q, neon_sat_mul_h_scalar_q,\ + neon_mul_s_scalar, neon_sat_mul_s_scalar,\ + neon_mul_s_scalar_long,\ + neon_sat_mul_s_scalar_long") + (const_string + "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar") + (eq_attr "type" "neon_mla_b, neon_mla_h,\ + neon_mla_b_long, neon_mla_h_long,\ + neon_sat_mla_b_long, neon_sat_mla_h_long,\ + neon_sat_mla_h_scalar_long") + (const_string + "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long") + (eq_attr "type" "neon_mla_b_q, neon_mla_h_q") + (const_string "neon_mla_qqq_8_16") + (eq_attr "type" "neon_mla_s, neon_mla_s_long,\ + neon_sat_mla_s_long,\ + neon_mla_h_scalar_q, neon_mla_s_scalar,\ + neon_mla_s_scalar_long,\ + neon_sat_mla_s_scalar_long") + (const_string + "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long") + (eq_attr "type" "neon_mla_s_q, neon_mla_s_scalar_q") + (const_string "neon_mla_qqq_32_qqd_32_scalar") + (eq_attr "type" "neon_mul_h_scalar, neon_sat_mul_h_scalar,\ + neon_mul_h_scalar_long,\ + neon_sat_mul_h_scalar_long") + (const_string + "neon_mul_ddd_16_scalar_32_16_long_scalar") + (eq_attr "type" "neon_mul_s_q, neon_sat_mul_s_q,\ + neon_mul_s_scalar_q") + (const_string "neon_mul_qqd_32_scalar") + (eq_attr "type" "neon_mla_h_scalar, neon_mla_h_scalar_long") + (const_string + "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar") + (eq_attr "type" "neon_fp_abd_s, neon_fp_abs_s, neon_fp_neg_s,\ + neon_fp_addsub_s, neon_fp_compare_s,\ + neon_fp_minmax_s, neon_fp_mul_s,\ + neon_fp_recpe_s, neon_fp_rsqrte_s,\ + neon_fp_to_int_s, neon_int_to_fp_s") + (const_string "neon_fp_vadd_ddd_vabs_dd") + (eq_attr "type" "neon_fp_abd_s_q, neon_fp_abs_s_q,\ + neon_fp_neg_s_q,\ + neon_fp_addsub_s_q, neon_fp_compare_s_q,\ + neon_fp_minmax_s_q, neon_fp_mul_s_q,\ + neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\ + neon_fp_to_int_s_q, neon_int_to_fp_s_q") + (const_string "neon_fp_vadd_qqq_vabs_qq") + (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_minmax_s,\ + neon_fp_reduc_add_s_q, neon_fp_reduc_minmax_s_q") + (const_string "neon_fp_vsum") + (eq_attr "type" "neon_fp_mul_s_scalar") + (const_string "neon_fp_vmul_ddd") + (eq_attr "type" "neon_fp_mul_s_scalar_q") + (const_string "neon_fp_vmul_qqd") + (eq_attr "type" "neon_fp_mla_s") + (const_string "neon_fp_vmla_ddd") + (eq_attr "type" "neon_fp_mla_s_q") + (const_string "neon_fp_vmla_qqq") + (eq_attr "type" "neon_fp_mla_s_scalar") + (const_string "neon_fp_vmla_ddd_scalar") + (eq_attr "type" "neon_fp_mla_s_scalar_q") + (const_string "neon_fp_vmla_qqq_scalar") + (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s") + (const_string "neon_fp_vrecps_vrsqrts_ddd") + (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q") + (const_string "neon_fp_vrecps_vrsqrts_qqq") + (eq_attr "type" "neon_move_narrow_q, neon_dup,\ + neon_dup_q, neon_permute, neon_zip,\ + neon_ext, neon_rev, neon_rev_q") + (const_string "neon_bp_simple") + (eq_attr "type" "neon_permute_q, neon_ext_q, neon_tbl1, neon_tbl2") + (const_string "neon_bp_2cycle") + (eq_attr "type" "neon_zip_q, neon_tbl3, neon_tbl4") + (const_string "neon_bp_3cycle") + (eq_attr "type" "neon_ldr") + (const_string "neon_ldr") + (eq_attr "type" "neon_str") + (const_string "neon_str") + (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\ + neon_load1_2reg, neon_load1_2reg_q,\ + neon_load2_2reg, neon_load2_2reg_q") + (const_string "neon_vld1_1_2_regs") + (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\ + neon_load1_4reg, neon_load1_4reg_q") + (const_string "neon_vld1_3_4_regs") + (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q,\ + neon_load2_all_lanes, neon_load2_all_lanes_q") + (const_string + "neon_vld2_2_regs_vld1_vld2_all_lanes") + (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q,\ + neon_load4_all_lanes, neon_load4_all_lanes_q,\ + neon_load2_4reg, neon_load2_4reg_q") + (const_string "neon_vld2_4_regs") + (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q,\ + neon_load4_4reg, neon_load4_4reg_q") + (const_string "neon_vld3_vld4") + (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\ + neon_load2_one_lane, neon_load2_one_lane_q") + (const_string "neon_vld1_vld2_lane") + (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q,\ + neon_load4_one_lane, neon_load4_one_lane_q") + (const_string "neon_vld3_vld4_lane") + (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\ + neon_store1_2reg, neon_store1_2reg_q,\ + neon_store2_2reg, neon_store2_2reg_q") + (const_string "neon_vst1_1_2_regs_vst2_2_regs") + (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\ + neon_store1_4reg, neon_store1_4reg_q") + (const_string "neon_vst1_3_4_regs") + (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\ + neon_store3_3reg, neon_store3_3reg_q,\ + neon_store4_4reg, neon_store4_4reg_q") + (const_string "neon_vst2_4_regs_vst3_vst4") + (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\ + neon_store2_one_lane, neon_store2_one_lane_q") + (const_string "neon_vst1_vst2_lane") + (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q,\ + neon_store4_one_lane, neon_store4_one_lane_q") + (const_string "neon_vst3_vst4_lane") + (eq_attr "type" "neon_from_gp") + (const_string "neon_mcr") + (eq_attr "type" "neon_from_gp_q") + (const_string "neon_mcr_2_mcrr") + (eq_attr "type" "neon_to_gp") + (const_string "neon_mrc") + (eq_attr "type" "neon_to_gp_q") + (const_string "neon_mrrc")] + (const_string "unknown"))) + +(define_automaton "cortex_a9_neon") + +;; Only one instruction can be issued per cycle. +(define_cpu_unit "cortex_a9_neon_issue_perm" "cortex_a9_neon") + +;; Only one data-processing instruction can be issued per cycle. +(define_cpu_unit "cortex_a9_neon_issue_dp" "cortex_a9_neon") + +;; We need a special mutual exclusion (to be used in addition to +;; cortex_a9_neon_issue_dp) for the case when an instruction such as +;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to +;; E2 of the floating-point add pipeline. On the cycle previous to that +;; forward we must prevent issue of any instruction to the floating-point +;; add pipeline, but still allow issue of a data-processing instruction +;; to any of the other pipelines. +(define_cpu_unit "cortex_a9_neon_issue_fadd" "cortex_a9_neon") +(define_cpu_unit "cortex_a9_neon_mcr" "cortex_a9_neon") + + +;; Patterns of reservation. +;; We model the NEON issue units as running in parallel with the core ones. +;; We assume that multi-cycle NEON instructions get decomposed into +;; micro-ops as they are issued into the NEON pipeline. + +(define_reservation "cortex_a9_neon_dp" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp") +(define_reservation "cortex_a9_neon_dp_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp") +(define_reservation "cortex_a9_neon_dp_4" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp") + +(define_reservation "cortex_a9_neon_fadd" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp + \ + cortex_a9_neon_issue_fadd") +(define_reservation "cortex_a9_neon_fadd_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_fadd,\ + cortex_a9_neon_issue_dp") + +(define_reservation "cortex_a9_neon_perm" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_perm_2" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm, \ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_perm_3" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") + +(define_reservation "cortex_a9_neon_ls" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm+cortex_a9_ls") +(define_reservation "cortex_a9_neon_ls_2" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_3" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_4" + "ca9_issue_vfp_neon+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") +(define_reservation "cortex_a9_neon_ls_5" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_dp+cortex_a9_neon_issue_perm,\ + cortex_a9_neon_issue_perm") + +(define_reservation "cortex_a9_neon_fmul_then_fadd" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + nothing*3,\ + cortex_a9_neon_issue_fadd") +(define_reservation "cortex_a9_neon_fmul_then_fadd_2" + "ca9_issue_vfp_neon + cortex_a9_neon_issue_dp,\ + cortex_a9_neon_issue_dp,\ + nothing*2,\ + cortex_a9_neon_issue_fadd,\ + cortex_a9_neon_issue_fadd") + +;; NEON -> core transfers. +(define_insn_reservation "ca9_neon_mrc" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mrc")) + "ca9_issue_vfp_neon + cortex_a9_neon_mcr") + +(define_insn_reservation "ca9_neon_mrrc" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mrrc")) + "ca9_issue_vfp_neon + cortex_a9_neon_mcr") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_1" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_int_1")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_2" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_int_2")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_int_3" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_int_3")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_int_4" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_int_4")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)n operands at N2, and produce a result at N4. +(define_insn_reservation "cortex_a9_neon_int_5" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_int_5")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_vqneg_vqabs" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vqneg_vqabs")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation produce a result at N3. +(define_insn_reservation "cortex_a9_neon_vmov" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vmov")) + "cortex_a8_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vaba" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vaba")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vaba_qqq" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vaba_qqq")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a9_neon_bit_ops_q" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_bit_ops_q")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mul_qqq_8_16_32_ddd_32")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mla_qqq_8_16" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mla_qqq_8_16")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 2. +(define_insn_reservation "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mla_qqq_32_qqd_32_scalar")) + "cortex_a9_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mul_ddd_16_scalar_32_16_long_scalar")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4. +(define_insn_reservation "cortex_a9_neon_mul_qqd_32_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mul_qqd_32_scalar")) + "cortex_a9_neon_dp_4") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N6. +(define_insn_reservation "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3. +(define_insn_reservation "cortex_a9_neon_shift_1" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_shift_1")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4. +(define_insn_reservation "cortex_a9_neon_shift_2" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_shift_2")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N3 on cycle 2. +(define_insn_reservation "cortex_a9_neon_shift_3" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_shift_3")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N4 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vqshl_vrshl_vqrshl_qqq")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)m operands at N1, +;; their (D|Q)d operands at N3, and produce a result at N6. +(define_insn_reservation "cortex_a9_neon_vsra_vrsra" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vsra_vrsra")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vadd_ddd_vabs_dd" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vadd_ddd_vabs_dd")) + "cortex_a9_neon_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vadd_qqq_vabs_qq" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vadd_qqq_vabs_qq")) + "cortex_a9_neon_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vsum" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vsum")) + "cortex_a9_neon_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5. +(define_insn_reservation "cortex_a9_neon_fp_vmul_ddd" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vmul_ddd")) + "cortex_a9_neon_dp") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmul_qqd" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vmul_qqd")) + "cortex_a9_neon_dp_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_ddd")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_qqq")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vmla_ddd_scalar" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_ddd_scalar")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their (D|Q)n operands at N2, +;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vmla_qqq_scalar" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vmla_qqq_scalar")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9. +(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vrecps_vrsqrts_ddd")) + "cortex_a9_neon_fmul_then_fadd") + +;; Instructions using this reservation read their source operands at N2, and +;; produce a result at N9 on cycle 2. +(define_insn_reservation "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" 10 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_fp_vrecps_vrsqrts_qqq")) + "cortex_a9_neon_fmul_then_fadd_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2. +(define_insn_reservation "cortex_a9_neon_bp_simple" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_bp_simple")) + "cortex_a9_neon_perm") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_bp_2cycle" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_bp_2cycle")) + "cortex_a9_neon_perm_2") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_bp_3cycle" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_bp_3cycle")) + "cortex_a9_neon_perm_3") + +;; Instructions using this reservation produce a result at N1. +(define_insn_reservation "cortex_a9_neon_ldr" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_ldr")) + "cortex_a9_neon_ls") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_str" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_str")) + "cortex_a9_neon_ls") + +;; Instructions using this reservation produce a result at N1 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld1_1_2_regs" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld1_1_2_regs")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation produce a result at N1 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld1_3_4_regs" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld1_3_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld2_4_regs" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld2_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 4. +(define_insn_reservation "cortex_a9_neon_vld3_vld4" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld3_vld4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_1_2_regs_vst2_2_regs" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vst1_1_2_regs_vst2_2_regs")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_3_4_regs" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vst1_3_4_regs")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst2_4_regs_vst3_vst4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vst2_4_regs_vst3_vst4")) + "cortex_a9_neon_ls_4") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 3. +(define_insn_reservation "cortex_a9_neon_vld1_vld2_lane" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld1_vld2_lane")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation read their source operands at N1, and +;; produce a result at N2 on cycle 5. +(define_insn_reservation "cortex_a9_neon_vld3_vld4_lane" 6 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld3_vld4_lane")) + "cortex_a9_neon_ls_5") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst1_vst2_lane" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vst1_vst2_lane")) + "cortex_a9_neon_ls_2") + +;; Instructions using this reservation read their source operands at N1. +(define_insn_reservation "cortex_a9_neon_vst3_vst4_lane" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vst3_vst4_lane")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2 on cycle 2. +(define_insn_reservation "cortex_a9_neon_vld3_vld4_all_lanes" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_vld3_vld4_all_lanes")) + "cortex_a9_neon_ls_3") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a9_neon_mcr" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mcr")) + "cortex_a9_neon_perm") + +;; Instructions using this reservation produce a result at N2. +(define_insn_reservation "cortex_a9_neon_mcr_2_mcrr" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "cortex_a9_neon_type" "neon_mcr_2_mcrr")) + "cortex_a9_neon_perm_2") + +;; Exceptions to the default latencies. + +(define_bypass 1 "cortex_a9_neon_mcr_2_mcrr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_mcr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld3_vld4_all_lanes" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vld3_vld4_lane" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vld1_vld2_lane" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_vld3_vld4" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vld2_4_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld2_2_regs_vld1_vld2_all_lanes" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vld1_3_4_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_vld1_1_2_regs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 0 "cortex_a9_neon_ldr" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_bp_3cycle" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_bp_2cycle" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 1 "cortex_a9_neon_bp_simple" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vrecps_vrsqrts_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vrecps_vrsqrts_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 9 "cortex_a9_neon_fp_vmla_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_fp_vmla_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_fp_vmul_qqd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vmul_ddd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vsum" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_fp_vadd_qqq_vabs_qq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_fp_vadd_ddd_vabs_dd" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vsra_vrsra" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 4 "cortex_a9_neon_vqshl_vrshl_vqrshl_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_shift_3" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_shift_2" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_shift_1" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_mul_qqd_32_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mul_ddd_16_scalar_32_16_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 8 "cortex_a9_neon_mla_qqq_32_qqd_32_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mla_qqq_8_16" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_mul_qqq_8_16_32_ddd_32" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 6 "cortex_a9_neon_vaba_qqq" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 5 "cortex_a9_neon_vaba" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_vmov" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_bit_ops_q" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_vqneg_vqabs" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_int_5" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 3 "cortex_a9_neon_int_4" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_3" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_2" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + +(define_bypass 2 "cortex_a9_neon_int_1" + "cortex_a9_neon_int_1,\ + cortex_a9_neon_int_4,\ + cortex_a9_neon_bit_ops_q,\ + cortex_a9_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mul_qqq_8_16_32_ddd_32,\ + cortex_a9_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\ + cortex_a9_neon_mla_qqq_8_16,\ + cortex_a9_neon_fp_vadd_ddd_vabs_dd,\ + cortex_a9_neon_fp_vadd_qqq_vabs_qq,\ + cortex_a9_neon_fp_vmla_ddd,\ + cortex_a9_neon_fp_vmla_qqq,\ + cortex_a9_neon_fp_vrecps_vrsqrts_ddd,\ + cortex_a9_neon_fp_vrecps_vrsqrts_qqq") + diff --git a/gcc-4.9/gcc/config/arm/cortex-a9.md b/gcc-4.9/gcc/config/arm/cortex-a9.md new file mode 100644 index 000000000..a888896c5 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-a9.md @@ -0,0 +1,283 @@ +;; ARM Cortex-A9 pipeline description +;; Copyright (C) 2008-2014 Free Software Foundation, Inc. +;; Originally written by CodeSourcery for VFP. +;; +;; Rewritten by Ramana Radhakrishnan +;; Integer Pipeline description contributed by ARM Ltd. +;; VFP Pipeline description rewritten and contributed by ARM Ltd. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_a9") + +;; The Cortex-A9 core is modelled as a dual issue pipeline that has +;; the following components. +;; 1. 1 Load Store Pipeline. +;; 2. P0 / main pipeline for data processing instructions. +;; 3. P1 / Dual pipeline for Data processing instructions. +;; 4. MAC pipeline for multiply as well as multiply +;; and accumulate instructions. +;; 5. 1 VFP and an optional Neon unit. +;; The Load/Store, VFP and Neon issue pipeline are multiplexed. +;; The P0 / main pipeline and M1 stage of the MAC pipeline are +;; multiplexed. +;; The P1 / dual pipeline and M2 stage of the MAC pipeline are +;; multiplexed. +;; There are only 4 integer register read ports and hence at any point of +;; time we can't have issue down the E1 and the E2 ports unless +;; of course there are bypass paths that get exercised. +;; Both P0 and P1 have 2 stages E1 and E2. +;; Data processing instructions issue to E1 or E2 depending on +;; whether they have an early shift or not. + +(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9") +(define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9") +(define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9") +(define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9") +(define_cpu_unit "cortex_a9_mac_m1, cortex_a9_mac_m2" "cortex_a9") +(define_cpu_unit "cortex_a9_branch, cortex_a9_issue_branch" "cortex_a9") + +(define_reservation "cortex_a9_p0_default" "cortex_a9_p0_e2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_p1_default" "cortex_a9_p1_e2, cortex_a9_p1_wb") +(define_reservation "cortex_a9_p0_shift" "cortex_a9_p0_e1, cortex_a9_p0_default") +(define_reservation "cortex_a9_p1_shift" "cortex_a9_p1_e1, cortex_a9_p1_default") + +(define_reservation "cortex_a9_multcycle1" + "cortex_a9_p0_e2 + cortex_a9_mac_m1 + cortex_a9_mac_m2 + \ +cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") + +(define_reservation "cortex_a9_mult16" + "cortex_a9_mac_m1, cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mac16" + "cortex_a9_multcycle1, cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mult" + "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mac" + "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb") +(define_reservation "cortex_a9_mult_long" + "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb") + +;; Issue at the same time along the load store pipeline and +;; the VFP / Neon pipeline is not possible. +(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon") + +;; Default data processing instruction without any shift +;; The only exception to this is the mov instruction +;; which can go down E2 without any problem. +(define_insn_reservation "cortex_a9_dp" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mov_shift_reg,mov_shift,\ + mrs,multiple,no_insn")) + "cortex_a9_p0_default|cortex_a9_p1_default") + +;; An instruction using the shifter will go down E1. +(define_insn_reservation "cortex_a9_dp_shift" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + extend,mvn_shift,mvn_shift_reg")) + "cortex_a9_p0_shift | cortex_a9_p1_shift") + +;; Loads have a latency of 4 cycles. +;; We don't model autoincrement instructions. These +;; instructions use the load store pipeline and 1 of +;; the E2 units to write back the result of the increment. + +(define_insn_reservation "cortex_a9_load1_2" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd")) + "cortex_a9_ls") + +;; Loads multiples and store multiples can't be issued for 2 cycles in a +;; row. The description below assumes that addresses are 64 bit aligned. +;; If not, there is an extra cycle latency which is not modelled. + +(define_insn_reservation "cortex_a9_load3_4" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "load3, load4")) + "cortex_a9_ls, cortex_a9_ls") + +(define_insn_reservation "cortex_a9_store1_2" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "store1, store2, f_stores, f_stored")) + "cortex_a9_ls") + +;; Almost all our store multiples use an auto-increment +;; form. Don't issue back to back load and store multiples +;; because the load store unit will stall. + +(define_insn_reservation "cortex_a9_store3_4" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "store3, store4")) + "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls") + +;; We get 16*16 multiply / mac results in 3 cycles. +(define_insn_reservation "cortex_a9_mult16" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "smulxy")) + "cortex_a9_mult16") + +;; The 16*16 mac is slightly different that it +;; reserves M1 and M2 in the same cycle. +(define_insn_reservation "cortex_a9_mac16" 3 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "smlaxy")) + "cortex_a9_mac16") + +(define_insn_reservation "cortex_a9_multiply" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "mul,smmul,smmulr")) + "cortex_a9_mult") + +(define_insn_reservation "cortex_a9_mac" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "mla,smmla")) + "cortex_a9_mac") + +(define_insn_reservation "cortex_a9_multiply_long" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) + "cortex_a9_mult_long") + +;; An instruction with a result in E2 can be forwarded +;; to E2 or E1 or M1 or the load store unit in the next cycle. + +(define_bypass 1 "cortex_a9_dp" + "cortex_a9_dp_shift, cortex_a9_multiply, + cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, + cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, + cortex_a9_multiply_long") + +(define_bypass 2 "cortex_a9_dp_shift" + "cortex_a9_dp_shift, cortex_a9_multiply, + cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, + cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, + cortex_a9_multiply_long") + +;; An instruction in the load store pipeline can provide +;; read access to a DP instruction in the P0 default pipeline +;; before the writeback stage. + +(define_bypass 3 "cortex_a9_load1_2" "cortex_a9_dp, cortex_a9_load1_2, +cortex_a9_store3_4, cortex_a9_store1_2") + +(define_bypass 4 "cortex_a9_load3_4" "cortex_a9_dp, cortex_a9_load1_2, +cortex_a9_store3_4, cortex_a9_store1_2, cortex_a9_load3_4") + +;; Calls and branches. + +;; Branch instructions + +(define_insn_reservation "cortex_a9_branch" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "branch")) + "cortex_a9_branch") + +;; Call latencies are essentially 0 but make sure +;; dual issue doesn't happen i.e the next instruction +;; starts at the next cycle. +(define_insn_reservation "cortex_a9_call" 0 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "call")) + "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon") + + +;; Pipelining for VFP instructions. +;; Issue happens either along load store unit or the VFP / Neon unit. +;; Pipeline Instruction Classification. +;; FPS - fmov, ffariths, ffarithd,f_mcr,f_mcrr,f_mrc,f_mrrc +;; FP_ADD - fadds, faddd, fcmps (1) +;; FPMUL - fmul{s,d}, fmac{s,d}, ffma{s,d} +;; FPDIV - fdiv{s,d} +(define_cpu_unit "ca9fps" "cortex_a9") +(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9") +(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9") +(define_cpu_unit "ca9fp_ds1" "cortex_a9") + + +;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle. +(define_insn_reservation "cortex_a9_fps" 2 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmov, fconsts, fconstd, ffariths, ffarithd,\ + f_mcr, f_mcrr, f_mrc, f_mrrc, f_flag")) + "ca9_issue_vfp_neon + ca9fps") + +(define_bypass 1 + "cortex_a9_fps" + "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long") + +;; Scheduling on the FP_ADD pipeline. +(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4") + +(define_insn_reservation "cortex_a9_fadd" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fadds, faddd, f_cvt, f_cvtf2i, f_cvti2f")) + "ca9fp_add") + +(define_insn_reservation "cortex_a9_fcmp" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fcmps, fcmpd")) + "ca9_issue_vfp_neon + ca9fp_add1") + +;; Scheduling for the Multiply and MAC instructions. +(define_reservation "ca9fmuls" + "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") + +(define_reservation "ca9fmuld" + "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") + +(define_insn_reservation "cortex_a9_fmuls" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuls")) + "ca9fmuls") + +(define_insn_reservation "cortex_a9_fmuld" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuld")) + "ca9fmuld") + +(define_insn_reservation "cortex_a9_fmacs" 8 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacs,ffmas")) + "ca9fmuls, ca9fp_add") + +(define_insn_reservation "cortex_a9_fmacd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacd,ffmad")) + "ca9fmuld, ca9fp_add") + +;; Division pipeline description. +(define_insn_reservation "cortex_a9_fdivs" 15 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivs, fsqrts")) + "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14") + +(define_insn_reservation "cortex_a9_fdivd" 25 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivd, fsqrtd")) + "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24") + +;; Include Neon pipeline description +(include "cortex-a9-neon.md") diff --git a/gcc-4.9/gcc/config/arm/cortex-m4-fpu.md b/gcc-4.9/gcc/config/arm/cortex-m4-fpu.md new file mode 100644 index 000000000..aa81e52ef --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-m4-fpu.md @@ -0,0 +1,117 @@ +;; ARM Cortex-M4 FPU pipeline description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Use two artificial units to model FPU. +(define_cpu_unit "cortex_m4_v_a" "cortex_m4") +(define_cpu_unit "cortex_m4_v_b" "cortex_m4") + +(define_reservation "cortex_m4_v" "cortex_m4_v_a+cortex_m4_v_b") +(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v") +(define_reservation "cortex_m4_exa_va" "cortex_m4_a+cortex_m4_v_a") +(define_reservation "cortex_m4_exb_vb" "cortex_m4_b+cortex_m4_v_b") + +;; Integer instructions following VDIV or VSQRT complete out-of-order. +(define_insn_reservation "cortex_m4_fdivs" 15 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fdivs, fsqrts")) + "cortex_m4_ex_v,cortex_m4_v*13") + +(define_insn_reservation "cortex_m4_vmov_1" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmov,fconsts")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_vmov_2" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_mrc,f_mrrc,f_mcr,f_mcrr")) + "cortex_m4_ex_v*2") + +(define_insn_reservation "cortex_m4_fmuls" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmuls")) + "cortex_m4_ex_v") + +;; Integer instructions following multiply-accumulate instructions +;; complete out-of-order. +(define_insn_reservation "cortex_m4_fmacs" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fmacs,ffmas")) + "cortex_m4_ex_v,cortex_m4_v*2") + +(define_insn_reservation "cortex_m4_ffariths" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "ffariths")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fadds" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fadds")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_fcmps" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "fcmps")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_flag" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_flag")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_cvt" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) + "cortex_m4_ex_v") + +(define_insn_reservation "cortex_m4_f_load" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_loads")) + "cortex_m4_exa_va,cortex_m4_exb_vb") + +(define_insn_reservation "cortex_m4_f_store" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_stores")) + "cortex_m4_exa_va") + +(define_insn_reservation "cortex_m4_f_loadd" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_loadd")) + "cortex_m4_ex_v*3") + +(define_insn_reservation "cortex_m4_f_stored" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "f_stored")) + "cortex_m4_ex_v*3") + +;; MAC instructions consume their addend one cycle later. If the result +;; of an arithmetic instruction is consumed as the addend of the following +;; MAC instruction, the latency can be decreased by one. + +(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") + +(define_bypass 3 "cortex_m4_fmacs" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") + +(define_bypass 14 "cortex_m4_fdivs" + "cortex_m4_fmacs" + "arm_no_early_mul_dep") diff --git a/gcc-4.9/gcc/config/arm/cortex-m4.md b/gcc-4.9/gcc/config/arm/cortex-m4.md new file mode 100644 index 000000000..690ce751f --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-m4.md @@ -0,0 +1,128 @@ +;; ARM Cortex-M4 pipeline description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_m4") + +;; We model the pipelining of LDR instructions by using two artificial units. + +(define_cpu_unit "cortex_m4_a" "cortex_m4") + +(define_cpu_unit "cortex_m4_b" "cortex_m4") + +(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b") + +;; ALU and multiply is one cycle. +(define_insn_reservation "cortex_m4_alu" 1 + (and (eq_attr "tune" "cortexm4") + (ior (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ + mrs,multiple,no_insn") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes")))) + "cortex_m4_ex") + +;; Byte, half-word and word load is two cycles. +(define_insn_reservation "cortex_m4_load1" 2 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load_byte,load1")) + "cortex_m4_a, cortex_m4_b") + +;; str rx, [ry, #imm] is always one cycle. +(define_insn_reservation "cortex_m4_store1_1" 1 + (and (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store1")) + (match_test "arm_address_offset_is_imm (insn)")) + "cortex_m4_a") + +;; Other byte, half-word and word load is two cycles. +(define_insn_reservation "cortex_m4_store1_2" 2 + (and (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store1")) + (not (match_test "arm_address_offset_is_imm (insn)"))) + "cortex_m4_a*2") + +(define_insn_reservation "cortex_m4_load2" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load2")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_store2" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store2")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_load3" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load3")) + "cortex_m4_ex*4") + +(define_insn_reservation "cortex_m4_store3" 4 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store3")) + "cortex_m4_ex*4") + +(define_insn_reservation "cortex_m4_load4" 5 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "load4")) + "cortex_m4_ex*5") + +(define_insn_reservation "cortex_m4_store4" 5 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "store4")) + "cortex_m4_ex*5") + +(define_bypass 1 "cortex_m4_load1" + "cortex_m4_store1_1,cortex_m4_store1_2" + "arm_no_early_store_addr_dep") + +;; If the address of load or store depends on the result of the preceding +;; instruction, the latency is increased by one. + +(define_bypass 2 "cortex_m4_alu" + "cortex_m4_load1" + "arm_early_load_addr_dep") + +(define_bypass 2 "cortex_m4_alu" + "cortex_m4_store1_1,cortex_m4_store1_2" + "arm_early_store_addr_dep") + +(define_insn_reservation "cortex_m4_branch" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "branch")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_call" 3 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "call")) + "cortex_m4_ex*3") + +(define_insn_reservation "cortex_m4_block" 1 + (and (eq_attr "tune" "cortexm4") + (eq_attr "type" "block")) + "cortex_m4_ex") diff --git a/gcc-4.9/gcc/config/arm/cortex-r4.md b/gcc-4.9/gcc/config/arm/cortex-r4.md new file mode 100644 index 000000000..f000124cb --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-r4.md @@ -0,0 +1,299 @@ +;; ARM Cortex-R4 scheduling description. +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Contributed by CodeSourcery. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "cortex_r4") + +;; We approximate the dual-issue constraints of this core using four +;; "issue units" and a reservation matrix as follows. The numbers indicate +;; the instruction groups' preferences in order. Multiple entries for +;; the same numbered preference indicate units that must be reserved +;; together. +;; +;; Issue unit: A B C ALU +;; +;; ALU w/o reg shift 1st 2nd 1st and 2nd +;; ALU w/ reg shift 1st 2nd 2nd 1st and 2nd +;; Moves 1st 2nd 2nd +;; Multiplication 1st 1st +;; Division 1st 1st +;; Load/store single 1st 1st +;; Other load/store 1st 1st +;; Branches 1st + +(define_cpu_unit "cortex_r4_issue_a" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_b" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_c" "cortex_r4") +(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4") + +(define_reservation "cortex_r4_alu" + "(cortex_r4_issue_a+cortex_r4_issue_alu)|\ + (cortex_r4_issue_b+cortex_r4_issue_alu)") +(define_reservation "cortex_r4_alu_shift_reg" + "(cortex_r4_issue_a+cortex_r4_issue_alu)|\ + (cortex_r4_issue_b+cortex_r4_issue_c+\ + cortex_r4_issue_alu)") +(define_reservation "cortex_r4_mov" + "cortex_r4_issue_a|(cortex_r4_issue_b+\ + cortex_r4_issue_alu)") +(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_mul_2" + "(cortex_r4_issue_a+cortex_r4_issue_alu)*2") +;; Division instructions execute out-of-order with respect to the +;; rest of the pipeline and only require reservations on their first and +;; final cycles. +(define_reservation "cortex_r4_div_9" + "cortex_r4_issue_a+cortex_r4_issue_alu,\ + nothing*7,\ + cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_div_10" + "cortex_r4_issue_a+cortex_r4_issue_alu,\ + nothing*8,\ + cortex_r4_issue_a+cortex_r4_issue_alu") +(define_reservation "cortex_r4_load_store" + "cortex_r4_issue_a+cortex_r4_issue_c") +(define_reservation "cortex_r4_load_store_2" + "(cortex_r4_issue_a+cortex_r4_issue_b)*2") +(define_reservation "cortex_r4_branch" "cortex_r4_issue_b") + +;; We assume that all instructions are unconditional. + +;; Data processing instructions. Moves without shifts are kept separate +;; for the purposes of the dual-issue constraints above. +(define_insn_reservation "cortex_r4_alu" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,mvn_imm,mvn_reg")) + "cortex_r4_alu") + +(define_insn_reservation "cortex_r4_mov" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "mov_imm,mov_reg")) + "cortex_r4_mov") + +(define_insn_reservation "cortex_r4_alu_shift" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + extend,mov_shift,mvn_shift")) + "cortex_r4_alu") + +(define_insn_reservation "cortex_r4_alu_shift_reg" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg,\ + mrs,multiple,no_insn")) + "cortex_r4_alu_shift_reg") + +;; An ALU instruction followed by an ALU instruction with no early dep. +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu") +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; In terms of availabilities, a consumer mov could theoretically be +;; issued together with a producer ALU instruction, without stalls. +;; In practice this cannot happen because mov;add (in that order) is not +;; eligible for dual issue and furthermore dual issue is not permitted +;; when a dependency is involved. We therefore note it as latency one. +;; A mov followed by another of the same is also latency one. +(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\ + cortex_r4_mov" + "cortex_r4_mov") + +;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are +;; media data processing instructions nor sad instructions. + +;; Multiplication instructions. + +(define_insn_reservation "cortex_r4_mul_4" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "mul,smmul")) + "cortex_r4_mul_2") + +(define_insn_reservation "cortex_r4_mul_3" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "smulxy,smulwy,smuad,smusd")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_mla_4" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "mla,smmla")) + "cortex_r4_mul_2") + +(define_insn_reservation "cortex_r4_mla_3" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "smlaxy,smlawy,smlad,smlsd")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_smlald" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "smlald,smlsld")) + "cortex_r4_mul") + +(define_insn_reservation "cortex_r4_mull" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "smull,umull,umlal,umaal")) + "cortex_r4_mul_2") + +;; A multiply or an MLA with a single-register result, followed by an +;; MLA with an accumulator dependency, has its result forwarded. +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3" + "cortex_r4_mla_3,cortex_r4_mla_4" + "arm_mac_accumulator_is_mul_result") + +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4" + "cortex_r4_mla_3,cortex_r4_mla_4" + "arm_mac_accumulator_is_mul_result") + +;; A multiply followed by an ALU instruction needing the multiply +;; result only at ALU has lower latency than one needing it at Shift. +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu") +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; A multiply followed by a mov has one cycle lower latency again. +(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_mov") +(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_mov") + +;; We guess that division of A/B using sdiv or udiv, on average, +;; is performed with B having ten more leading zeros than A. +;; This gives a latency of nine for udiv and ten for sdiv. +(define_insn_reservation "cortex_r4_udiv" 9 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "udiv")) + "cortex_r4_div_9") + +(define_insn_reservation "cortex_r4_sdiv" 10 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "sdiv")) + "cortex_r4_div_10") + +;; Branches. We assume correct prediction. + +(define_insn_reservation "cortex_r4_branch" 0 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "branch")) + "cortex_r4_branch") + +;; Call latencies are not predictable. A semi-arbitrary very large +;; number is used as "positive infinity" so that everything should be +;; finished by the time of return. +(define_insn_reservation "cortex_r4_call" 32 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "call")) + "nothing") + +;; Status register access instructions are not currently emitted. + +;; Load instructions. +;; We do not model the "addr_md_3cycle" cases and assume that +;; accesses following are correctly aligned. + +(define_insn_reservation "cortex_r4_load_1_2" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "load1,load2")) + "cortex_r4_load_store") + +(define_insn_reservation "cortex_r4_load_3_4" 4 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "load3,load4")) + "cortex_r4_load_store_2") + +;; If a producing load is followed by an instruction consuming only +;; as a Normal Reg, there is one fewer cycle of latency. + +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu") +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "cortex_r4_load_1_2" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu") +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu_shift" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "cortex_r4_load_3_4" + "cortex_r4_alu_shift_reg" + "arm_no_early_alu_shift_value_dep") + +;; If a producing load is followed by an instruction consuming only +;; as a Late Reg, there are two fewer cycles of latency. Such consumer +;; instructions are moves and stores. + +(define_bypass 1 "cortex_r4_load_1_2" + "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4") +(define_bypass 2 "cortex_r4_load_3_4" + "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4") + +;; If a producer's result is required as the base or offset of a load, +;; there is an extra cycle latency. + +(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\ + cortex_r4_alu_shift_reg" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull" + "cortex_r4_load_1_2,cortex_r4_load_3_4") + +;; Store instructions. + +(define_insn_reservation "cortex_r4_store_1_2" 0 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "store1,store2")) + "cortex_r4_load_store") + +(define_insn_reservation "cortex_r4_store_3_4" 0 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "store3,store4")) + "cortex_r4_load_store_2") + diff --git a/gcc-4.9/gcc/config/arm/cortex-r4f.md b/gcc-4.9/gcc/config/arm/cortex-r4f.md new file mode 100644 index 000000000..25d949789 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/cortex-r4f.md @@ -0,0 +1,161 @@ +;; ARM Cortex-R4F VFP pipeline description +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; With the exception of simple VMOV , instructions and +;; the accululate operand of a multiply-accumulate instruction, all +;; registers are early registers. Thus base latencies are 1 more than +;; those listed in the TRM. + +;; We use the A, B abd C units from the integer core, plus two additional +;; units to enforce VFP dual issue constraints. + +;; A B C V1 VMLA +;; fcpy 1 2 +;; farith 1 2 1 +;; fmrc 1 2 +;; fconst 1 2 * * +;; ffarith 1 2 * * +;; fmac 1 2 1 2 +;; fdiv 1 2 * +;; f_loads * * * +;; f_stores * * * + +(define_cpu_unit "cortex_r4_v1" "cortex_r4") + +(define_cpu_unit "cortex_r4_vmla" "cortex_r4") + +(define_reservation "cortex_r4_issue_ab" + "(cortex_r4_issue_a|cortex_r4_issue_b)") +(define_reservation "cortex_r4_single_issue" + "cortex_r4_issue_a+cortex_r4_issue_b") + +(define_insn_reservation "cortex_r4_fcpys" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmov")) + "cortex_r4_issue_ab") + +(define_insn_reservation "cortex_r4_ffariths" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "ffariths,fconsts,fcmps")) + "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_fariths" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fadds,fmuls")) + "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b") + +(define_insn_reservation "cortex_r4_fmacs" 6 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmacs,ffmas")) + "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)") + +(define_insn_reservation "cortex_r4_fdivs" 17 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fdivs, fsqrts")) + "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_floads" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_loads")) + "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1") + +(define_insn_reservation "cortex_r4_fstores" 1 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_stores")) + "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla") + +(define_insn_reservation "cortex_r4_mcr" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_mcr,f_mcrr")) + "cortex_r4_issue_ab") + +(define_insn_reservation "cortex_r4_mrc" 3 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_mrc,f_mrrc")) + "cortex_r4_issue_ab") + +;; Bypasses for normal (not early) regs. +(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr" + "cortex_r4_fcpys") +(define_bypass 2 "cortex_r4_fariths" + "cortex_r4_fcpys") +(define_bypass 5 "cortex_r4_fmacs" + "cortex_r4_fcpys") +(define_bypass 16 "cortex_r4_fdivs" + "cortex_r4_fcpys") + +(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +(define_bypass 2 "cortex_r4_fariths" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +;; mac->mac has an extra forwarding path. +(define_bypass 3 "cortex_r4_fmacs" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") +(define_bypass 16 "cortex_r4_fdivs" + "cortex_r4_fmacs" + "arm_no_early_mul_dep") + +;; Double precision operations. These can not dual issue. + +(define_insn_reservation "cortex_r4_fmacd" 20 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fmacd,ffmad")) + "cortex_r4_single_issue*13") + +(define_insn_reservation "cortex_r4_farith" 10 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "faddd,fmuld")) + "cortex_r4_single_issue*3") + +;; FIXME: The short cycle count suggests these instructions complete +;; out of order. Chances are this is not a pipelined operation. +(define_insn_reservation "cortex_r4_fdivd" 97 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fdivd, fsqrtd")) + "cortex_r4_single_issue*3") + +(define_insn_reservation "cortex_r4_ffarithd" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "ffarithd,fconstd")) + "cortex_r4_single_issue") + +(define_insn_reservation "cortex_r4_fcmpd" 2 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "fcmpd")) + "cortex_r4_single_issue*2") + +(define_insn_reservation "cortex_r4_f_cvt" 8 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) + "cortex_r4_single_issue*3") + +(define_insn_reservation "cortex_r4_f_memd" 8 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_loadd,f_stored")) + "cortex_r4_single_issue") + +(define_insn_reservation "cortex_r4_f_flag" 1 + (and (eq_attr "tune_cortexr4" "yes") + (eq_attr "type" "f_stores")) + "cortex_r4_single_issue") + diff --git a/gcc-4.9/gcc/config/arm/crypto.def b/gcc-4.9/gcc/config/arm/crypto.def new file mode 100644 index 000000000..dc805d9ec --- /dev/null +++ b/gcc-4.9/gcc/config/arm/crypto.def @@ -0,0 +1,34 @@ +/* Cryptographic instruction builtin definitions. + Copyright (C) 2013-2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +CRYPTO2 (aesd, AESD, v16uqi, v16uqi, v16uqi) +CRYPTO2 (aese, AESE, v16uqi, v16uqi, v16uqi) +CRYPTO1 (aesimc, AESIMC, v16uqi, v16uqi) +CRYPTO1 (aesmc, AESMC, v16uqi, v16uqi) +CRYPTO1 (sha1h, SHA1H, v4usi, v4usi) +CRYPTO2 (sha1su1, SHA1SU1, v4usi, v4usi, v4usi) +CRYPTO2 (sha256su0, SHA256SU0, v4usi, v4usi, v4usi) +CRYPTO3 (sha1c, SHA1C, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha1m, SHA1M, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha1p, SHA1P, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha1su0, SHA1SU0, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha256h, SHA256H, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha256h2, SHA256H2, v4usi, v4usi, v4usi, v4usi) +CRYPTO3 (sha256su1, SHA256SU1, v4usi, v4usi, v4usi, v4usi) +CRYPTO2 (vmullp64, VMULLP64, uti, udi, udi) diff --git a/gcc-4.9/gcc/config/arm/crypto.md b/gcc-4.9/gcc/config/arm/crypto.md new file mode 100644 index 000000000..9f249803d --- /dev/null +++ b/gcc-4.9/gcc/config/arm/crypto.md @@ -0,0 +1,86 @@ +;; ARMv8-A crypto patterns. +;; Copyright (C) 2013-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_insn "crypto_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 + "register_operand" "w")] + CRYPTO_UNARY))] + "TARGET_CRYPTO" + ".\\t%q0, %q1" + [(set_attr "type" "")] +) + +(define_insn "crypto_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 "register_operand" "0") + (match_operand: 2 "register_operand" "w")] + CRYPTO_BINARY))] + "TARGET_CRYPTO" + ".\\t%q0, %q2" + [(set_attr "type" "")] +) + +(define_insn "crypto_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: [(match_operand: 1 "register_operand" "0") + (match_operand: 2 "register_operand" "w") + (match_operand: 3 "register_operand" "w")] + CRYPTO_TERNARY))] + "TARGET_CRYPTO" + ".\\t%q0, %q2, %q3" + [(set_attr "type" "")] +) + +(define_insn "crypto_sha1h" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (zero_extend:V4SI + (unspec:SI [(vec_select:SI + (match_operand:V4SI 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))] + UNSPEC_SHA1H)))] + "TARGET_CRYPTO" + "sha1h.32\\t%q0, %q1" + [(set_attr "type" "crypto_sha1_fast")] +) + +(define_insn "crypto_vmullp64" + [(set (match_operand:TI 0 "register_operand" "=w") + (unspec:TI [(match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w")] + UNSPEC_VMULLP64))] + "TARGET_CRYPTO" + "vmull.p64\\t%q0, %P1, %P2" + [(set_attr "type" "neon_mul_d_long")] +) + +(define_insn "crypto_" + [(set (match_operand:V4SI 0 "register_operand" "=w") + (unspec: + [(match_operand: 1 "register_operand" "0") + (vec_select:SI + (match_operand: 2 "register_operand" "w") + (parallel [(match_operand:SI 4 "immediate_operand" "i")])) + (match_operand: 3 "register_operand" "w")] + CRYPTO_SELECTING))] + "TARGET_CRYPTO" + ".\\t%q0, %q2, %q3" + [(set_attr "type" "")] +) diff --git a/gcc-4.9/gcc/config/arm/driver-arm.c b/gcc-4.9/gcc/config/arm/driver-arm.c new file mode 100644 index 000000000..6d9c4174c --- /dev/null +++ b/gcc-4.9/gcc/config/arm/driver-arm.c @@ -0,0 +1,151 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "configargs.h" + +struct vendor_cpu { + const char *part_no; + const char *arch_name; + const char *cpu_name; +}; + +static struct vendor_cpu arm_cpu_table[] = { + {"0x926", "armv5te", "arm926ej-s"}, + {"0xa26", "armv5te", "arm1026ej-s"}, + {"0xb02", "armv6k", "mpcore"}, + {"0xb36", "armv6j", "arm1136j-s"}, + {"0xb56", "armv6t2", "arm1156t2-s"}, + {"0xb76", "armv6zk", "arm1176jz-s"}, + {"0xc05", "armv7-a", "cortex-a5"}, + {"0xc07", "armv7ve", "cortex-a7"}, + {"0xc08", "armv7-a", "cortex-a8"}, + {"0xc09", "armv7-a", "cortex-a9"}, + {"0xc0d", "armv7ve", "cortex-a12"}, + {"0xc0f", "armv7ve", "cortex-a15"}, + {"0xc14", "armv7-r", "cortex-r4"}, + {"0xc15", "armv7-r", "cortex-r5"}, + {"0xc20", "armv6-m", "cortex-m0"}, + {"0xc21", "armv6-m", "cortex-m1"}, + {"0xc23", "armv7-m", "cortex-m3"}, + {"0xc24", "armv7e-m", "cortex-m4"}, + {NULL, NULL, NULL} +}; + +static struct { + const char *vendor_no; + const struct vendor_cpu *vendor_parts; +} vendors[] = { + {"0x41", arm_cpu_table}, + {NULL, NULL} +}; + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "arch", "cpu" or "tune" as argument depending on if + -march=native, -mcpu=native or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-march=armv7-a" on a Cortex-A8 for + -march=native. If the routine can't detect a known processor, + the -march or -mtune option is discarded. + + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + const char *val = NULL; + char buf[128]; + FILE *f = NULL; + bool arch; + const struct vendor_cpu *cpu_table = NULL; + + if (argc < 1) + goto not_found; + + arch = strcmp (argv[0], "arch") == 0; + if (!arch && strcmp (argv[0], "cpu") != 0 && strcmp (argv[0], "tune")) + goto not_found; + + f = fopen ("/proc/cpuinfo", "r"); + if (f == NULL) + goto not_found; + + while (fgets (buf, sizeof (buf), f) != NULL) + { + /* Ensure that CPU implementer is ARM (0x41). */ + if (strncmp (buf, "CPU implementer", sizeof ("CPU implementer") - 1) == 0) + { + int i; + for (i = 0; vendors[i].vendor_no != NULL; i++) + if (strstr (buf, vendors[i].vendor_no) != NULL) + { + cpu_table = vendors[i].vendor_parts; + break; + } + } + + /* Detect arch/cpu. */ + if (strncmp (buf, "CPU part", sizeof ("CPU part") - 1) == 0) + { + int i; + + if (cpu_table == NULL) + goto not_found; + + for (i = 0; cpu_table[i].part_no != NULL; i++) + if (strstr (buf, cpu_table[i].part_no) != NULL) + { + val = arch ? cpu_table[i].arch_name : cpu_table[i].cpu_name; + break; + } + break; + } + } + + fclose (f); + + if (val == NULL) + goto not_found; + + return concat ("-m", argv[0], "=", val, NULL); + +not_found: + { + unsigned int i; + unsigned int opt; + const char *search[] = {NULL, "arch"}; + + if (f) + fclose (f); + + search[0] = argv[0]; + for (opt = 0; opt < ARRAY_SIZE (search); opt++) + for (i = 0; i < ARRAY_SIZE (configure_default_options); i++) + if (strcmp (configure_default_options[i].name, search[opt]) == 0) + return concat ("-m", search[opt], "=", + configure_default_options[i].value, NULL); + return NULL; + } +} diff --git a/gcc-4.9/gcc/config/arm/elf.h b/gcc-4.9/gcc/config/arm/elf.h new file mode 100644 index 000000000..2ac8c8d04 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/elf.h @@ -0,0 +1,159 @@ +/* Definitions of target machine for GNU compiler. + For ARM with ELF obj format. + Copyright (C) 1995-2014 Free Software Foundation, Inc. + Contributed by Philip Blundell and + Catherine Moore + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef OBJECT_FORMAT_ELF + #error elf.h included before elfos.h +#endif + +#ifndef LOCAL_LABEL_PREFIX +#define LOCAL_LABEL_PREFIX "." +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__ELF__" +#endif + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "subtarget_extra_asm_spec", SUBTARGET_EXTRA_ASM_SPEC }, \ + { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \ + SUBSUBTARGET_EXTRA_SPECS +#endif + +#ifndef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC "" +#endif + +#ifndef SUBTARGET_ASM_FLOAT_SPEC +#define SUBTARGET_ASM_FLOAT_SPEC "\ +%{mapcs-float:-mfloat}" +#endif + +#undef SUBSUBTARGET_EXTRA_SPECS +#define SUBSUBTARGET_EXTRA_SPECS + +#ifndef ASM_SPEC +#define ASM_SPEC "\ +%{mbig-endian:-EB} \ +%{mlittle-endian:-EL} \ +%(asm_cpu_spec) \ +%{mapcs-*:-mapcs-%*} \ +%(subtarget_asm_float_spec) \ +%{mthumb-interwork:-mthumb-interwork} \ +%{mfloat-abi=*} %{mfpu=*} \ +%(subtarget_extra_asm_spec)" +#endif + +/* The ARM uses @ are a comment character so we need to redefine + TYPE_OPERAND_FMT. */ +#undef TYPE_OPERAND_FMT +#define TYPE_OPERAND_FMT "%%%s" + +/* We might need a ARM specific header to function declarations. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ARM_DECLARE_FUNCTION_NAME (FILE, NAME, DECL); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + ARM_OUTPUT_FN_UNWIND (FILE, TRUE); \ + } \ + while (0) + +/* We might need an ARM specific trailer for function declarations. */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + ARM_OUTPUT_FN_UNWIND (FILE, FALSE); \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ + } \ + while (0) + +/* Define this macro if jump tables (for `tablejump' insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. */ +/* We put ARM and Thumb-2 jump tables in the text section, because it makes + the code more efficient, but for Thumb-1 it's better to put them out of + band unless we are generating compressed tables. */ +#define JUMP_TABLES_IN_TEXT_SECTION \ + (TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) + +#ifndef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X" +#endif + +/* Run-time Target Specification. */ +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) +#endif + +#ifndef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS \ + { "marm", "mlittle-endian", "mfloat-abi=soft", "mno-thumb-interwork", "fno-leading-underscore" } +#endif + +#define TARGET_ASM_FILE_START_APP_OFF true +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + + +/* Output an element in the static constructor array. */ +#undef TARGET_ASM_CONSTRUCTOR +#define TARGET_ASM_CONSTRUCTOR arm_elf_asm_constructor + +#undef TARGET_ASM_DESTRUCTOR +#define TARGET_ASM_DESTRUCTOR arm_elf_asm_destructor + +/* For PIC code we need to explicitly specify (PLT) and (GOT) relocs. */ +#define NEED_PLT_RELOC flag_pic +#define NEED_GOT_RELOC flag_pic + +/* The ELF assembler handles GOT addressing differently to NetBSD. */ +#define GOT_PCREL 0 + +/* Align output to a power of two. Note ".align 0" is redundant, + and also GAS will treat it as ".align 2" which we do not want. */ +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + do \ + { \ + if ((POWER) > 0) \ + fprintf (STREAM, "\t.align\t%d\n", POWER); \ + } \ + while (0) + +/* Horrible hack: We want to prevent some libgcc routines being included + for some multilibs. */ +#ifndef __ARM_ARCH_6M__ +#undef L_fixdfsi +#undef L_fixunsdfsi +#undef L_truncdfsf2 +#undef L_fixsfsi +#undef L_fixunssfsi +#undef L_floatdidf +#undef L_floatdisf +#undef L_floatundidf +#undef L_floatundisf +#endif + diff --git a/gcc-4.9/gcc/config/arm/fa526.md b/gcc-4.9/gcc/config/arm/fa526.md new file mode 100644 index 000000000..c345fdf65 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/fa526.md @@ -0,0 +1,173 @@ +;; Faraday FA526 Pipeline Description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp. +;; +;; Modeled pipeline characteristics: +;; LD -> any use: latency = 3 (2 cycle penalty). +;; ALU -> any use: latency = 2 (1 cycle penalty). + +;; This automaton provides a pipeline description for the Faraday +;; FA526 core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa526") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; S E M W + +(define_cpu_unit "fa526_core" "fa526") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "526_alu_op" 1 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) + "fa526_core") + +(define_insn_reservation "526_alu_shift_op" 2 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) + "fa526_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "526_mult1" 2 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy")) + "fa526_core") + +(define_insn_reservation "526_mult2" 5 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\ + umlals,smulls,smlals,smlawx")) + "fa526_core*4") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "526_load1_op" 3 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load1,load_byte")) + "fa526_core") + +(define_insn_reservation "526_load2_op" 4 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load2")) + "fa526_core*2") + +(define_insn_reservation "526_load3_op" 5 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load3")) + "fa526_core*3") + +(define_insn_reservation "526_load4_op" 6 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "load4")) + "fa526_core*4") + +(define_insn_reservation "526_store1_op" 0 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store1")) + "fa526_core") + +(define_insn_reservation "526_store2_op" 1 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store2")) + "fa526_core*2") + +(define_insn_reservation "526_store3_op" 2 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store3")) + "fa526_core*3") + +(define_insn_reservation "526_store4_op" 3 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "store4")) + "fa526_core*4") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA526 +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "526_branch_op" 0 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "branch")) + "fa526_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. For most cases, the return value is set +;; by a mov instruction, which has 1 cycle latency. +(define_insn_reservation "526_call_op" 1 + (and (eq_attr "tune" "fa526") + (eq_attr "type" "call")) + "fa526_core") + diff --git a/gcc-4.9/gcc/config/arm/fa606te.md b/gcc-4.9/gcc/config/arm/fa606te.md new file mode 100644 index 000000000..01ecfc88c --- /dev/null +++ b/gcc-4.9/gcc/config/arm/fa606te.md @@ -0,0 +1,182 @@ +;; Faraday FA606TE Pipeline Description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA606TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; Modeled pipeline characteristics: +;; LD -> any use: latency = 2 (1 cycle penalty). +;; ALU -> any use: latency = 1 (0 cycle penalty). + +;; This automaton provides a pipeline description for the Faraday +;; FA606TE core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa606te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; E M W + +(define_cpu_unit "fa606te_core" "fa606te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "606te_alu_op" 1 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ + mrs,multiple,no_insn")) + "fa606te_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "606te_mult1" 2 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "smlalxy")) + "fa606te_core") + +(define_insn_reservation "606te_mult2" 3 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy")) + "fa606te_core*2") + +(define_insn_reservation "606te_mult3" 4 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "mul,mla,muls,mlas")) + "fa606te_core*3") + +(define_insn_reservation "606te_mult4" 5 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals")) + "fa606te_core*4") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "606te_load1_op" 2 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load1,load_byte")) + "fa606te_core") + +(define_insn_reservation "606te_load2_op" 3 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load2")) + "fa606te_core*2") + +(define_insn_reservation "606te_load3_op" 4 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load3")) + "fa606te_core*3") + +(define_insn_reservation "606te_load4_op" 5 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "load4")) + "fa606te_core*4") + +(define_insn_reservation "606te_store1_op" 0 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store1")) + "fa606te_core") + +(define_insn_reservation "606te_store2_op" 1 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store2")) + "fa606te_core*2") + +(define_insn_reservation "606te_store3_op" 2 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store3")) + "fa606te_core*3") + +(define_insn_reservation "606te_store4_op" 3 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "store4")) + "fa606te_core*4") + + +;;(define_insn_reservation "606te_ldm_op" 9 +;; (and (eq_attr "tune" "fa606te") +;; (eq_attr "type" "load2,load3,load4,store2,store3,store4")) +;; "fa606te_core*7") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA606TE +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycles to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "606te_branch_op" 0 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "branch")) + "fa606te_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. For most cases, the return value is set +;; by a mov instruction, which has 1 cycle latency. +(define_insn_reservation "606te_call_op" 1 + (and (eq_attr "tune" "fa606te") + (eq_attr "type" "call")) + "fa606te_core") + diff --git a/gcc-4.9/gcc/config/arm/fa626te.md b/gcc-4.9/gcc/config/arm/fa626te.md new file mode 100644 index 000000000..e615bae37 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/fa626te.md @@ -0,0 +1,177 @@ +;; Faraday FA626TE Pipeline Description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA626TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; Modeled pipeline characteristics: +;; ALU -> simple address LDR/STR: latency = 2 (available after 2 cycles). +;; ALU -> shifted address LDR/STR: latency = 3. +;; ( extra 1 cycle unavoidable stall). +;; ALU -> other use: latency = 2 (available after 2 cycles). +;; LD -> simple address LDR/STR: latency = 3 (available after 3 cycles). +;; LD -> shifted address LDR/STR: latency = 4 +;; ( extra 1 cycle unavoidable stall). +;; LD -> any other use: latency = 3 (available after 3 cycles). + +;; This automaton provides a pipeline description for the Faraday +;; FA626TE core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa626te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; S E M W + +(define_cpu_unit "fa626te_core" "fa626te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "626te_alu_op" 1 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mrs,multiple,no_insn")) + "fa626te_core") + +(define_insn_reservation "626te_alu_shift_op" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "extend,\ + alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) + "fa626te_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "626te_mult1" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) + "fa626te_core") + +(define_insn_reservation "626te_mult2" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "mul,mla")) + "fa626te_core") + +(define_insn_reservation "626te_mult3" 3 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + "fa626te_core*2") + +(define_insn_reservation "626te_mult4" 4 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "smulls,smlals,umulls,umlals")) + "fa626te_core*3") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "626te_load1_op" 3 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "load1,load_byte")) + "fa626te_core") + +(define_insn_reservation "626te_load2_op" 4 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "load2,load3")) + "fa626te_core*2") + +(define_insn_reservation "626te_load3_op" 5 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "load4")) + "fa626te_core*3") + +(define_insn_reservation "626te_store1_op" 0 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "store1")) + "fa626te_core") + +(define_insn_reservation "626te_store2_op" 1 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "store2,store3")) + "fa626te_core*2") + +(define_insn_reservation "626te_store3_op" 2 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "store4")) + "fa626te_core*3") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA626TE +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "626te_branch_op" 0 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "branch")) + "fa626te_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. +(define_insn_reservation "626te_call_op" 1 + (and (eq_attr "tune" "fa626,fa626te") + (eq_attr "type" "call")) + "fa626te_core") + diff --git a/gcc-4.9/gcc/config/arm/fa726te.md b/gcc-4.9/gcc/config/arm/fa726te.md new file mode 100644 index 000000000..225b2cfdd --- /dev/null +++ b/gcc-4.9/gcc/config/arm/fa726te.md @@ -0,0 +1,223 @@ +;; Faraday FA726TE Pipeline Description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; This automaton provides a pipeline description for the Faraday +;; FA726TE core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fa726te") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +;; E1 E2 E3 E4 E5 WB +;;______________________________________________________ +;; +;; <-------------- LD/ST -----------> +;; shifter + LU <-- AU --> +;; <-- AU --> shifter + LU CPSR (Pipe 0) +;;______________________________________________________ +;; +;; <---------- MUL ---------> +;; shifter + LU <-- AU --> +;; <-- AU --> shifter + LU CPSR (Pipe 1) + + +(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te") +(define_cpu_unit "fa726te_mac_pipe" "fa726te") +(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te") + +;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly +;; improve code quality. +(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te") +(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te") + +(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)") +;; Reservation to restrict issue to 1. +(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require three cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; Move instructions. +(define_insn_reservation "726te_shift_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg")) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + +;; ALU operations with no shifted operand will finished in 1 cycle +;; Other ALU instructions 2 cycles. +(define_insn_reservation "726te_alu_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_reg,alus_reg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mrs,multiple,no_insn")) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + +;; ALU operations with a shift-by-register operand. +;; These really stall in the decoder, in order to read the shift value +;; in the first cycle. If the instruction uses both shifter and AU, +;; it takes 3 cycles. +(define_insn_reservation "726te_alu_shift_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "extend,alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm")) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + +(define_insn_reservation "726te_alu_shift_reg_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg")) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Multiplication instructions loop in the execute stage until the +;; instruction has been passed through the multiplier array enough +;; times. Multiply operations occur in both the execute and memory +;; stages of the pipeline + +(define_insn_reservation "726te_mult_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\ + umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy")) + "fa726te_issue+fa726te_mac_pipe") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +;; Loads with a shifted offset take 3 cycles, and are (a) probably the +;; most common and (b) the pessimistic assumption will lead to fewer stalls. + +;; Scalar loads are pipelined in FA726TE LSU pipe. +;; Here we model the resource conflict between Load@E3-stage & Store@W-stage. +;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the +;; same "bundle", and the 2nd load will introudce another ISSUE stall but is +;; still ok to execute (and may be benefical sometimes). + +(define_insn_reservation "726te_load1_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "load1,load_byte")) + "(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\ + | (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)") + +(define_insn_reservation "726te_store1_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "store1")) + "fa726te_blockage*2") + +;; Load/Store Multiple blocks all pipelines in EX stages until WB. +;; No other instructions can be issued together. Since they essentially +;; prevent all scheduling opportunities, we model them together here. + +;; The LDM is breaking into multiple load instructions, later instruction in +;; the pipe 1 is stalled. +(define_insn_reservation "726te_ldm2_op" 4 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "load2,load3")) + "fa726te_blockage*4") + +(define_insn_reservation "726te_ldm3_op" 5 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "load4")) + "fa726te_blockage*5") + +(define_insn_reservation "726te_stm2_op" 2 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "store2,store3")) + "fa726te_blockage*3") + +(define_insn_reservation "726te_stm3_op" 3 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "store4")) + "fa726te_blockage*4") + +(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\ + 726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep") +(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\ + 726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op" + "arm_no_early_store_addr_dep") +(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op") +(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op" + "726te_shift_op,726te_alu_op") +(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op" + "726te_alu_shift_op" "arm_no_early_alu_shift_dep") +(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op" + "726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep") +(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op") + +(define_bypass 4 "726te_load1_op" "726te_mult_op") +(define_bypass 5 "726te_ldm2_op" "726te_mult_op") +(define_bypass 6 "726te_ldm3_op" "726te_mult_op") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FA726TE +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "726te_branch_op" 0 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "branch")) + "fa726te_blockage") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 is ready for int return value. +(define_insn_reservation "726te_call_op" 1 + (and (eq_attr "tune" "fa726te") + (eq_attr "type" "call")) + "fa726te_blockage") + diff --git a/gcc-4.9/gcc/config/arm/fmp626.md b/gcc-4.9/gcc/config/arm/fmp626.md new file mode 100644 index 000000000..439054da6 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/fmp626.md @@ -0,0 +1,191 @@ +;; Faraday FA626TE Pipeline Description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; These descriptions are based on the information contained in the +;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp. + +;; Pipeline architecture +;; S E M W(Q1) Q2 +;; ___________________________________________ +;; shifter alu +;; mul1 mul2 mul3 +;; ld/st1 ld/st2 ld/st3 ld/st4 ld/st5 + +;; This automaton provides a pipeline description for the Faraday +;; FMP626 core. +;; +;; The model given here assumes that the condition for all conditional +;; instructions is "true", i.e., that all of the instructions are +;; actually executed. + +(define_automaton "fmp626") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; There is a single pipeline +;; +;; The ALU pipeline has fetch, decode, execute, memory, and +;; write stages. We only need to model the execute, memory and write +;; stages. + +(define_cpu_unit "fmp626_core" "fmp626") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ALU Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; ALU instructions require two cycles to execute, and use the ALU +;; pipeline in each of the three stages. The results are available +;; after the execute stage stage has finished. +;; +;; If the destination register is the PC, the pipelines are stalled +;; for several cycles. That case is not modeled here. + +;; ALU operations +(define_insn_reservation "mp626_alu_op" 1 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\ + logic_imm,logics_imm,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg")) + "fmp626_core") + +(define_insn_reservation "mp626_alu_shift_op" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\ + extend,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg")) + "fmp626_core") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Multiplication Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "mp626_mult1" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy")) + "fmp626_core") + +(define_insn_reservation "mp626_mult2" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "mul,mla")) + "fmp626_core") + +(define_insn_reservation "mp626_mult3" 3 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx")) + "fmp626_core*2") + +(define_insn_reservation "mp626_mult4" 4 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "smulls,smlals,umulls,umlals")) + "fmp626_core*3") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Load/Store Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The models for load/store instructions do not accurately describe +;; the difference between operations with a base register writeback +;; (such as "ldm!"). These models assume that all memory references +;; hit in dcache. + +(define_insn_reservation "mp626_load1_op" 5 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "load1,load_byte")) + "fmp626_core") + +(define_insn_reservation "mp626_load2_op" 6 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "load2,load3")) + "fmp626_core*2") + +(define_insn_reservation "mp626_load3_op" 7 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "load4")) + "fmp626_core*3") + +(define_insn_reservation "mp626_store1_op" 0 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "store1")) + "fmp626_core") + +(define_insn_reservation "mp626_store2_op" 1 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "store2,store3")) + "fmp626_core*2") + +(define_insn_reservation "mp626_store3_op" 2 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "store4")) + "fmp626_core*3") + +(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op" + "mp626_store1_op,mp626_store2_op,mp626_store3_op" + "arm_no_early_store_addr_dep") +(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\ + mp626_mult3,mp626_mult4" "mp626_store1_op" + "arm_no_early_store_addr_dep") +(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op") +(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op" + "arm_no_early_alu_shift_dep") +(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op") +(define_bypass 2 "mp626_mult3" "mp626_alu_op") +(define_bypass 3 "mp626_mult4" "mp626_alu_op") +(define_bypass 4 "mp626_load1_op" "mp626_alu_op") +(define_bypass 5 "mp626_load2_op" "mp626_alu_op") +(define_bypass 6 "mp626_load3_op" "mp626_alu_op") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Branch and Call Instructions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Branch instructions are difficult to model accurately. The FMP626 +;; core can predict most branches. If the branch is predicted +;; correctly, and predicted early enough, the branch can be completely +;; eliminated from the instruction stream. Some branches can +;; therefore appear to require zero cycle to execute. We assume that +;; all branches are predicted correctly, and that the latency is +;; therefore the minimum value. + +(define_insn_reservation "mp626_branch_op" 0 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "branch")) + "fmp626_core") + +;; The latency for a call is actually the latency when the result is available. +;; i.e. R0 ready for int return value. +(define_insn_reservation "mp626_call_op" 1 + (and (eq_attr "tune" "fmp626") + (eq_attr "type" "call")) + "fmp626_core") + diff --git a/gcc-4.9/gcc/config/arm/genopt.sh b/gcc-4.9/gcc/config/arm/genopt.sh new file mode 100755 index 000000000..68fdb564c --- /dev/null +++ b/gcc-4.9/gcc/config/arm/genopt.sh @@ -0,0 +1,95 @@ +#!/bin/sh +# Generate arm-tables.opt from the lists in *.def. +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +cat <. + +Enum +Name(processor_type) Type(enum processor_type) +Known ARM CPUs (for use with the -mcpu= and -mtune= options): + +EOF + +awk -F'[(, ]+' '/^ARM_CORE/ { + name = $2 + enum = $3 + gsub("\"", "", name) + print "EnumValue" + print "Enum(processor_type) String(" name ") Value(" enum ")" + print "" +}' $1/arm-cores.def + +cat <. + +echo ";; -*- buffer-read-only: t -*-" +echo ";; Generated automatically by gentune.sh from arm-cores.def" + +allcores=`awk -F'[(, ]+' '/^ARM_CORE/ { cores = cores$3"," } END { print cores } ' $1` + +echo "(define_attr \"tune\"" +echo " \"$allcores\"" | sed -e 's/,"$/"/' | sed -e 's/\([a-z0-9_]\+,[a-z0-9_]\+,[a-z0-9_]\+,\)/\1\n\t/g' +echo " (const (symbol_ref \"((enum attr_tune) arm_tune)\")))" diff --git a/gcc-4.9/gcc/config/arm/iterators.md b/gcc-4.9/gcc/config/arm/iterators.md new file mode 100644 index 000000000..33e09e4ce --- /dev/null +++ b/gcc-4.9/gcc/config/arm/iterators.md @@ -0,0 +1,585 @@ +;; Code and mode itertator and attribute definitions for the ARM backend +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;;---------------------------------------------------------------------------- +;; Mode iterators +;;---------------------------------------------------------------------------- + +;; A list of modes that are exactly 64 bits in size. This is used to expand +;; some splits that are the same for all modes when operating on ARM +;; registers. +(define_mode_iterator ANY64 [DI DF V8QI V4HI V2SI V2SF]) + +(define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF]) + +;; A list of integer modes that are up to one word long +(define_mode_iterator QHSI [QI HI SI]) + +;; A list of integer modes that are less than a word +(define_mode_iterator NARROW [QI HI]) + +;; A list of all the integer modes up to 64bit +(define_mode_iterator QHSD [QI HI SI DI]) + +;; A list of the 32bit and 64bit integer modes +(define_mode_iterator SIDI [SI DI]) + +;; A list of modes which the VFP unit can handle +(define_mode_iterator SDF [(SF "TARGET_VFP") (DF "TARGET_VFP_DOUBLE")]) + +;; Integer element sizes implemented by IWMMXT. +(define_mode_iterator VMMX [V2SI V4HI V8QI]) + +(define_mode_iterator VMMX2 [V4HI V2SI]) + +;; Integer element sizes for shifts. +(define_mode_iterator VSHFT [V4HI V2SI DI]) + +;; Integer and float modes supported by Neon and IWMMXT. +(define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) + +;; Integer and float modes supported by Neon and IWMMXT, except V2DI. +(define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) + +;; Integer modes supported by Neon and IWMMXT +(define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI]) + +;; Integer modes supported by Neon and IWMMXT, except V2DI +(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI]) + +;; Double-width vector modes. +(define_mode_iterator VD [V8QI V4HI V2SI V2SF]) + +;; Double-width vector modes plus 64-bit elements. +(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI]) + +;; Double-width vector modes without floating-point elements. +(define_mode_iterator VDI [V8QI V4HI V2SI]) + +;; Quad-width vector modes. +(define_mode_iterator VQ [V16QI V8HI V4SI V4SF]) + +;; Quad-width vector modes plus 64-bit elements. +(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI]) + +;; Quad-width vector modes without floating-point elements. +(define_mode_iterator VQI [V16QI V8HI V4SI]) + +;; Quad-width vector modes, with TImode added, for moves. +(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI]) + +;; Opaque structure types wider than TImode. +(define_mode_iterator VSTRUCT [EI OI CI XI]) + +;; Opaque structure types used in table lookups (except vtbl1/vtbx1). +(define_mode_iterator VTAB [TI EI OI]) + +;; Widenable modes. +(define_mode_iterator VW [V8QI V4HI V2SI]) + +;; Narrowable modes. +(define_mode_iterator VN [V8HI V4SI V2DI]) + +;; All supported vector modes (except singleton DImode). +(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI]) + +;; All supported vector modes (except those with 64-bit integer elements). +(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) + +;; Supported integer vector modes (not 64 bit elements). +(define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI]) + +;; Supported integer vector modes (not singleton DI) +(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) + +;; Vector modes, including 64-bit integer elements. +(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI]) + +;; Vector modes including 64-bit integer elements, but no floats. +(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) + +;; Vector modes for float->int conversions. +(define_mode_iterator VCVTF [V2SF V4SF]) + +;; Vector modes form int->float conversions. +(define_mode_iterator VCVTI [V2SI V4SI]) + +;; Vector modes for doubleword multiply-accumulate, etc. insns. +(define_mode_iterator VMD [V4HI V2SI V2SF]) + +;; Vector modes for quadword multiply-accumulate, etc. insns. +(define_mode_iterator VMQ [V8HI V4SI V4SF]) + +;; Above modes combined. +(define_mode_iterator VMDQ [V4HI V2SI V2SF V8HI V4SI V4SF]) + +;; As VMD, but integer modes only. +(define_mode_iterator VMDI [V4HI V2SI]) + +;; As VMQ, but integer modes only. +(define_mode_iterator VMQI [V8HI V4SI]) + +;; Above modes combined. +(define_mode_iterator VMDQI [V4HI V2SI V8HI V4SI]) + +;; Modes with 8-bit and 16-bit elements. +(define_mode_iterator VX [V8QI V4HI V16QI V8HI]) + +;; Modes with 8-bit elements. +(define_mode_iterator VE [V8QI V16QI]) + +;; Modes with 64-bit elements only. +(define_mode_iterator V64 [DI V2DI]) + +;; Modes with 32-bit elements only. +(define_mode_iterator V32 [V2SI V2SF V4SI V4SF]) + +;; Modes with 8-bit, 16-bit and 32-bit elements. +(define_mode_iterator VU [V16QI V8HI V4SI]) + +;; Iterators used for fixed-point support. +(define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA]) + +(define_mode_iterator ADDSUB [V4QQ V2HQ V2HA]) + +(define_mode_iterator UQADDSUB [V4UQQ V2UHQ UQQ UHQ V2UHA UHA]) + +(define_mode_iterator QADDSUB [V4QQ V2HQ QQ HQ V2HA HA SQ SA]) + +(define_mode_iterator QMUL [HQ HA]) + +;;---------------------------------------------------------------------------- +;; Code iterators +;;---------------------------------------------------------------------------- + +;; A list of condition codes used in compare instructions where +;; the carry flag from the addition is used instead of doing the +;; compare a second time. +(define_code_iterator LTUGEU [ltu geu]) + +;; A list of ... +(define_code_iterator ior_xor [ior xor]) + +;; Operations on two halves of a quadword vector. +(define_code_iterator vqh_ops [plus smin smax umin umax]) + +;; Operations on two halves of a quadword vector, +;; without unsigned variants (for use with *SFmode pattern). +(define_code_iterator vqhs_ops [plus smin smax]) + +;; A list of widening operators +(define_code_iterator SE [sign_extend zero_extend]) + +;; Right shifts +(define_code_iterator rshifts [ashiftrt lshiftrt]) + +;;---------------------------------------------------------------------------- +;; Int iterators +;;---------------------------------------------------------------------------- + +(define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM + UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) + +(define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM + UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN]) + +(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W + UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW]) + +(define_int_iterator CRYPTO_UNARY [UNSPEC_AESMC UNSPEC_AESIMC]) + +(define_int_iterator CRYPTO_BINARY [UNSPEC_AESD UNSPEC_AESE + UNSPEC_SHA1SU1 UNSPEC_SHA256SU0]) + +(define_int_iterator CRYPTO_TERNARY [UNSPEC_SHA1SU0 UNSPEC_SHA256H + UNSPEC_SHA256H2 UNSPEC_SHA256SU1]) + +(define_int_iterator CRYPTO_SELECTING [UNSPEC_SHA1C UNSPEC_SHA1M + UNSPEC_SHA1P]) + +;;---------------------------------------------------------------------------- +;; Mode attributes +;;---------------------------------------------------------------------------- + +;; Determine element size suffix from vector mode. +(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")]) + +;; vtbl suffix for NEON vector modes. +(define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")]) + +;; (Opposite) mode to convert to/from for NEON mode conversions. +(define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI") + (V4SI "V4SF") (V4SF "V4SI")]) + +;; As above but in lower case. +(define_mode_attr V_cvtto [(V2SI "v2sf") (V2SF "v2si") + (V4SI "v4sf") (V4SF "v4si")]) + +;; Define element mode for each vector mode. +(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") + (V4HI "HI") (V8HI "HI") + (V2SI "SI") (V4SI "SI") + (V2SF "SF") (V4SF "SF") + (DI "DI") (V2DI "DI")]) + +;; Element modes for vector extraction, padded up to register size. + +(define_mode_attr V_ext [(V8QI "SI") (V16QI "SI") + (V4HI "SI") (V8HI "SI") + (V2SI "SI") (V4SI "SI") + (V2SF "SF") (V4SF "SF") + (DI "DI") (V2DI "DI")]) + +;; Mode of pair of elements for each vector mode, to define transfer +;; size for structure lane/dup loads and stores. +(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") + (V4HI "SI") (V8HI "SI") + (V2SI "V2SI") (V4SI "V2SI") + (V2SF "V2SF") (V4SF "V2SF") + (DI "V2DI") (V2DI "V2DI")]) + +;; Similar, for three elements. +(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") + (V4HI "BLK") (V8HI "BLK") + (V2SI "BLK") (V4SI "BLK") + (V2SF "BLK") (V4SF "BLK") + (DI "EI") (V2DI "EI")]) + +;; Similar, for four elements. +(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") + (V4HI "V4HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V4SI") + (V2SF "V4SF") (V4SF "V4SF") + (DI "OI") (V2DI "OI")]) + +;; Register width from element mode +(define_mode_attr V_reg [(V8QI "P") (V16QI "q") + (V4HI "P") (V8HI "q") + (V2SI "P") (V4SI "q") + (V2SF "P") (V4SF "q") + (DI "P") (V2DI "q") + (SF "") (DF "P")]) + +;; Wider modes with the same number of elements. +(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")]) + +;; Narrower modes with the same number of elements. +(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")]) + +;; Narrower modes with double the number of elements. +(define_mode_attr V_narrow_pack [(V4SI "V8HI") (V8HI "V16QI") (V2DI "V4SI") + (V4HI "V8QI") (V2SI "V4HI") (DI "V2SI")]) + +;; Modes with half the number of equal-sized elements. +(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") + (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF") + (V2DI "DI")]) + +;; Same, but lower-case. +(define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi") + (V4SI "v2si") (V4SF "v2sf") + (V2DI "di")]) + +;; Modes with twice the number of equal-sized elements. +(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI") + (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF") + (DI "V2DI")]) + +;; Same, but lower-case. +(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi") + (V2SI "v4si") (V2SF "v4sf") + (DI "v2di")]) + +;; Modes with double-width elements. +(define_mode_attr V_double_width [(V8QI "V4HI") (V16QI "V8HI") + (V4HI "V2SI") (V8HI "V4SI") + (V2SI "DI") (V4SI "V2DI")]) + +;; Double-sized modes with the same element size. +;; Used for neon_vdup_lane, where the second operand is double-sized +;; even when the first one is quad. +(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI") + (V4SI "V2SI") (V4SF "V2SF") + (V8QI "V8QI") (V4HI "V4HI") + (V2SI "V2SI") (V2SF "V2SF")]) + +;; Mode of result of comparison operations (and bit-select operand 1). +(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") + (V4HI "V4HI") (V8HI "V8HI") + (V2SI "V2SI") (V4SI "V4SI") + (V2SF "V2SI") (V4SF "V4SI") + (DI "DI") (V2DI "V2DI")]) + +(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") + (V4HI "v4hi") (V8HI "v8hi") + (V2SI "v2si") (V4SI "v4si") + (DI "di") (V2DI "v2di") + (V2SF "v2si") (V4SF "v4si")]) + +;; Get element type from double-width mode, for operations where we +;; don't care about signedness. +(define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8") + (V4HI "i16") (V8HI "i16") + (V2SI "i32") (V4SI "i32") + (DI "i64") (V2DI "i64") + (V2SF "f32") (V4SF "f32") + (SF "f32") (DF "f64")]) + +;; Same, but for operations which work on signed values. +(define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8") + (V4HI "s16") (V8HI "s16") + (V2SI "s32") (V4SI "s32") + (DI "s64") (V2DI "s64") + (V2SF "f32") (V4SF "f32")]) + +;; Same, but for operations which work on unsigned values. +(define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8") + (V4HI "u16") (V8HI "u16") + (V2SI "u32") (V4SI "u32") + (DI "u64") (V2DI "u64") + (V2SF "f32") (V4SF "f32")]) + +;; Element types for extraction of unsigned scalars. +(define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8") + (V4HI "u16") (V8HI "u16") + (V2SI "32") (V4SI "32") + (V2SF "32") (V4SF "32")]) + +(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") + (V4HI "16") (V8HI "16") + (V2SI "32") (V4SI "32") + (DI "64") (V2DI "64") + (V2SF "32") (V4SF "32")]) + +(define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b") + (V4HI "h") (V8HI "h") + (V2SI "s") (V4SI "s") + (DI "d") (V2DI "d") + (V2SF "s") (V4SF "s")]) + +;; Element sizes for duplicating ARM registers to all elements of a vector. +(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")]) + +;; Opaque integer types for results of pair-forming intrinsics (vtrn, etc.) +(define_mode_attr V_PAIR [(V8QI "TI") (V16QI "OI") + (V4HI "TI") (V8HI "OI") + (V2SI "TI") (V4SI "OI") + (V2SF "TI") (V4SF "OI") + (DI "TI") (V2DI "OI")]) + +;; Same, but lower-case. +(define_mode_attr V_pair [(V8QI "ti") (V16QI "oi") + (V4HI "ti") (V8HI "oi") + (V2SI "ti") (V4SI "oi") + (V2SF "ti") (V4SF "oi") + (DI "ti") (V2DI "oi")]) + +;; Extra suffix on some 64-bit insn names (to avoid collision with standard +;; names which we don't want to define). +(define_mode_attr V_suf64 [(V8QI "") (V16QI "") + (V4HI "") (V8HI "") + (V2SI "") (V4SI "") + (V2SF "") (V4SF "") + (DI "_neon") (V2DI "")]) + + +;; Scalars to be presented to scalar multiplication instructions +;; must satisfy the following constraints. +;; 1. If the mode specifies 16-bit elements, the scalar must be in D0-D7. +;; 2. If the mode specifies 32-bit elements, the scalar must be in D0-D15. + +;; This mode attribute is used to obtain the correct register constraints. + +(define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t") + (V8HI "x") (V4SI "t") (V4SF "t")]) + +;; Predicates used for setting type for neon instructions + +(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") + (V4HI "false") (V8HI "false") + (V2SI "false") (V4SI "false") + (V2SF "true") (V4SF "true") + (DI "false") (V2DI "false")]) + +(define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true") + (V4HI "true") (V8HI "true") + (V2SI "false") (V4SI "false") + (V2SF "false") (V4SF "false") + (DI "false") (V2DI "false")]) + + +(define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false") + (V4HI "true") (V8HI "false") + (V2SI "true") (V4SI "false") + (V2SF "true") (V4SF "false") + (DI "true") (V2DI "false")]) + +(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") + (V4HI "4") (V8HI "8") + (V2SI "2") (V4SI "4") + (V2SF "2") (V4SF "4") + (DI "1") (V2DI "2") + (DF "1") (V2DF "2")]) + +;; Same as V_widen, but lower-case. +(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")]) + +;; Widen. Result is half the number of elements, but widened to double-width. +(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")]) + +;; Conditions to be used in extenddi patterns. +(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")]) +(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6") + (QI "&& arm_arch6")]) +(define_mode_attr qhs_zextenddi_op [(SI "s_register_operand") + (HI "nonimmediate_operand") + (QI "nonimmediate_operand")]) +(define_mode_attr qhs_extenddi_op [(SI "s_register_operand") + (HI "nonimmediate_operand") + (QI "arm_reg_or_extendqisi_mem_op")]) +(define_mode_attr qhs_extenddi_cstr [(SI "r,0,r,r,r") (HI "r,0,rm,rm,r") (QI "r,0,rUq,rm,r")]) +(define_mode_attr qhs_zextenddi_cstr [(SI "r,0,r,r") (HI "r,0,rm,r") (QI "r,0,rm,r")]) + +;; Mode attributes used for fixed-point support. +(define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16") + (V2UHA "16") (UHA "16") + (V4QQ "8") (V2HQ "16") (QQ "8") (HQ "16") + (V2HA "16") (HA "16") (SQ "") (SA "")]) + +;; Mode attribute for vshll. +(define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) + +;; Mode attributes used for VFP support. +(define_mode_attr F_constraint [(SF "t") (DF "w")]) +(define_mode_attr vfp_type [(SF "s") (DF "d")]) +(define_mode_attr vfp_double_cond [(SF "") (DF "&& TARGET_VFP_DOUBLE")]) + +;; Mode attribute used to build the "type" attribute. +(define_mode_attr q [(V8QI "") (V16QI "_q") + (V4HI "") (V8HI "_q") + (V2SI "") (V4SI "_q") + (V2SF "") (V4SF "_q") + (DI "") (V2DI "_q") + (DF "") (V2DF "_q")]) + +;;---------------------------------------------------------------------------- +;; Code attributes +;;---------------------------------------------------------------------------- + +;; Assembler mnemonics for vqh_ops and vqhs_ops iterators. +(define_code_attr VQH_mnem [(plus "vadd") (smin "vmin") (smax "vmax") + (umin "vmin") (umax "vmax")]) + +;; Type attributes for vqh_ops and vqhs_ops iterators. +(define_code_attr VQH_type [(plus "add") (smin "minmax") (smax "minmax") + (umin "minmax") (umax "minmax")]) + +;; Signs of above, where relevant. +(define_code_attr VQH_sign [(plus "i") (smin "s") (smax "s") (umin "u") + (umax "u")]) + +(define_code_attr cnb [(ltu "CC_C") (geu "CC")]) +(define_code_attr optab [(ltu "ltu") (geu "geu")]) + +;; Assembler mnemonics for signedness of widening operations. +(define_code_attr US [(sign_extend "s") (zero_extend "u")]) + +;; Right shifts +(define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")]) +(define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")]) + +;;---------------------------------------------------------------------------- +;; Int attributes +;;---------------------------------------------------------------------------- + +;; Standard names for floating point to integral rounding instructions. +(define_int_attr vrint_pattern [(UNSPEC_VRINTZ "btrunc") (UNSPEC_VRINTP "ceil") + (UNSPEC_VRINTA "round") (UNSPEC_VRINTM "floor") + (UNSPEC_VRINTR "nearbyint") (UNSPEC_VRINTX "rint")]) + +;; Suffixes for vrint instructions specifying rounding modes. +(define_int_attr vrint_variant [(UNSPEC_VRINTZ "z") (UNSPEC_VRINTP "p") + (UNSPEC_VRINTA "a") (UNSPEC_VRINTM "m") + (UNSPEC_VRINTR "r") (UNSPEC_VRINTX "x")]) + +;; Some of the vrint instuctions are predicable. +(define_int_attr vrint_predicable [(UNSPEC_VRINTZ "yes") (UNSPEC_VRINTP "no") + (UNSPEC_VRINTA "no") (UNSPEC_VRINTM "no") + (UNSPEC_VRINTR "yes") (UNSPEC_VRINTX "yes")]) + +(define_int_attr vrint_conds [(UNSPEC_VRINTZ "nocond") (UNSPEC_VRINTP "unconditional") + (UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional") + (UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")]) + +(define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p") + (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m") + (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")]) + +(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h") + (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32CB "crc32cb") + (UNSPEC_CRC32CH "crc32ch") (UNSPEC_CRC32CW "crc32cw")]) + +(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI") + (UNSPEC_CRC32W "SI") (UNSPEC_CRC32CB "QI") + (UNSPEC_CRC32CH "HI") (UNSPEC_CRC32CW "SI")]) + +(define_int_attr crypto_pattern [(UNSPEC_SHA1H "sha1h") (UNSPEC_AESMC "aesmc") + (UNSPEC_AESIMC "aesimc") (UNSPEC_AESD "aesd") + (UNSPEC_AESE "aese") (UNSPEC_SHA1SU1 "sha1su1") + (UNSPEC_SHA256SU0 "sha256su0") (UNSPEC_SHA1C "sha1c") + (UNSPEC_SHA1M "sha1m") (UNSPEC_SHA1P "sha1p") + (UNSPEC_SHA1SU0 "sha1su0") (UNSPEC_SHA256H "sha256h") + (UNSPEC_SHA256H2 "sha256h2") + (UNSPEC_SHA256SU1 "sha256su1")]) + +(define_int_attr crypto_type + [(UNSPEC_AESE "crypto_aes") (UNSPEC_AESD "crypto_aes") + (UNSPEC_AESMC "crypto_aes") (UNSPEC_AESIMC "crypto_aes") + (UNSPEC_SHA1C "crypto_sha1_slow") (UNSPEC_SHA1P "crypto_sha1_slow") + (UNSPEC_SHA1M "crypto_sha1_slow") (UNSPEC_SHA1SU1 "crypto_sha1_fast") + (UNSPEC_SHA1SU0 "crypto_sha1_xor") (UNSPEC_SHA256H "crypto_sha256_slow") + (UNSPEC_SHA256H2 "crypto_sha256_slow") (UNSPEC_SHA256SU0 "crypto_sha256_fast") + (UNSPEC_SHA256SU1 "crypto_sha256_slow")]) + +(define_int_attr crypto_size_sfx [(UNSPEC_SHA1H "32") (UNSPEC_AESMC "8") + (UNSPEC_AESIMC "8") (UNSPEC_AESD "8") + (UNSPEC_AESE "8") (UNSPEC_SHA1SU1 "32") + (UNSPEC_SHA256SU0 "32") (UNSPEC_SHA1C "32") + (UNSPEC_SHA1M "32") (UNSPEC_SHA1P "32") + (UNSPEC_SHA1SU0 "32") (UNSPEC_SHA256H "32") + (UNSPEC_SHA256H2 "32") (UNSPEC_SHA256SU1 "32")]) + +(define_int_attr crypto_mode [(UNSPEC_SHA1H "V4SI") (UNSPEC_AESMC "V16QI") + (UNSPEC_AESIMC "V16QI") (UNSPEC_AESD "V16QI") + (UNSPEC_AESE "V16QI") (UNSPEC_SHA1SU1 "V4SI") + (UNSPEC_SHA256SU0 "V4SI") (UNSPEC_SHA1C "V4SI") + (UNSPEC_SHA1M "V4SI") (UNSPEC_SHA1P "V4SI") + (UNSPEC_SHA1SU0 "V4SI") (UNSPEC_SHA256H "V4SI") + (UNSPEC_SHA256H2 "V4SI") (UNSPEC_SHA256SU1 "V4SI")]) + +;; Both kinds of return insn. +(define_code_iterator returns [return simple_return]) +(define_code_attr return_str [(return "") (simple_return "simple_")]) +(define_code_attr return_simple_p [(return "false") (simple_return "true")]) +(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)") + (simple_return " && use_simple_return_p ()")]) +(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)") + (simple_return " && use_simple_return_p ()")]) diff --git a/gcc-4.9/gcc/config/arm/iwmmxt.md b/gcc-4.9/gcc/config/arm/iwmmxt.md new file mode 100644 index 000000000..56ff3e9f3 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/iwmmxt.md @@ -0,0 +1,1775 @@ +;; Patterns for the Intel Wireless MMX technology architecture. +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; Contributed by Red Hat. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3, or (at your option) any later +;; version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register numbers. Need to sync with FIRST_IWMMXT_GR_REGNUM in arm.h +(define_constants + [(WCGR0 96) + (WCGR1 97) + (WCGR2 98) + (WCGR3 99) + ] +) + +(define_insn "tbcstv8qi" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_duplicate:V8QI (match_operand:QI 1 "s_register_operand" "r")))] + "TARGET_REALLY_IWMMXT" + "tbcstb%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tbcst")] +) + +(define_insn "tbcstv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_duplicate:V4HI (match_operand:HI 1 "s_register_operand" "r")))] + "TARGET_REALLY_IWMMXT" + "tbcsth%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tbcst")] +) + +(define_insn "tbcstv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_duplicate:V2SI (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_REALLY_IWMMXT" + "tbcstw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tbcst")] +) + +(define_insn "iwmmxt_iordi3" + [(set (match_operand:DI 0 "register_operand" "=y,?&r,?&r") + (ior:DI (match_operand:DI 1 "register_operand" "%y,0,r") + (match_operand:DI 2 "register_operand" "y,r,r")))] + "TARGET_REALLY_IWMMXT" + "@ + wor%?\\t%0, %1, %2 + # + #" + [(set_attr "predicable" "yes") + (set_attr "length" "4,8,8") + (set_attr "type" "wmmx_wor,*,*")] +) + +(define_insn "iwmmxt_xordi3" + [(set (match_operand:DI 0 "register_operand" "=y,?&r,?&r") + (xor:DI (match_operand:DI 1 "register_operand" "%y,0,r") + (match_operand:DI 2 "register_operand" "y,r,r")))] + "TARGET_REALLY_IWMMXT" + "@ + wxor%?\\t%0, %1, %2 + # + #" + [(set_attr "predicable" "yes") + (set_attr "length" "4,8,8") + (set_attr "type" "wmmx_wxor,*,*")] +) + +(define_insn "iwmmxt_anddi3" + [(set (match_operand:DI 0 "register_operand" "=y,?&r,?&r") + (and:DI (match_operand:DI 1 "register_operand" "%y,0,r") + (match_operand:DI 2 "register_operand" "y,r,r")))] + "TARGET_REALLY_IWMMXT" + "@ + wand%?\\t%0, %1, %2 + # + #" + [(set_attr "predicable" "yes") + (set_attr "length" "4,8,8") + (set_attr "type" "wmmx_wand,*,*")] +) + +(define_insn "iwmmxt_nanddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (and:DI (match_operand:DI 1 "register_operand" "y") + (not:DI (match_operand:DI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wandn%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wandn")] +) + +(define_insn "*iwmmxt_arm_movdi" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,yr,y,yrUy,*w, r,*w,*w, *Uv") + (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,r,y,yr,y,yrUy,y, r,*w,*w,*Uvi,*w"))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + return \"#\"; + case 3: case 4: + return output_move_double (operands, true, NULL); + case 5: + return \"wmov%?\\t%0,%1\"; + case 6: + return \"tmcrr%?\\t%0,%Q1,%R1\"; + case 7: + return \"tmrrc%?\\t%Q0,%R0,%1\"; + case 8: + return \"wldrd%?\\t%0,%1\"; + case 9: + return \"wstrd%?\\t%1,%0\"; + case 10: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 11: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 12: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 13: case 14: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set (attr "length") (cond [(eq_attr "alternative" "0,3,4") (const_int 8) + (eq_attr "alternative" "1") (const_int 12) + (eq_attr "alternative" "2") (const_int 16) + (eq_attr "alternative" "12") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "type" "*,*,*,load2,store2,*,*,*,*,*,f_mcrr,f_mrrc,\ + ffarithd,f_loadd,f_stored") + (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*") + (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")] +) + +(define_insn "*iwmmxt_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk, m,z,r,?z,?Uy,*t, r,*t,*t ,*Uv") + (match_operand:SI 1 "general_operand" " rk,I,K,j,mi,rk,r,z,Uy, z, r,*t,*t,*Uvi, *t"))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: return \"mov\\t%0, %1\"; + case 1: return \"mov\\t%0, %1\"; + case 2: return \"mvn\\t%0, #%B1\"; + case 3: return \"movw\\t%0, %1\"; + case 4: return \"ldr\\t%0, %1\"; + case 5: return \"str\\t%1, %0\"; + case 6: return \"tmcr\\t%0, %1\"; + case 7: return \"tmrc\\t%0, %1\"; + case 8: return arm_output_load_gr (operands); + case 9: return \"wstrw\\t%1, %0\"; + case 10:return \"fmsr\\t%0, %1\"; + case 11:return \"fmrs\\t%0, %1\"; + case 12:return \"fcpys\\t%0, %1\\t%@ int\"; + case 13: case 14: + return output_move_vfp (operands); + default: + gcc_unreachable (); + }" + [(set_attr "type" "*,*,*,*,load1,store1,*,*,*,*,f_mcr,f_mrc,\ + fmov,f_loads,f_stores") + (set_attr "length" "*,*,*,*,*, *,*,*, 16, *,*,*,*,*,*") + (set_attr "pool_range" "*,*,*,*,4096, *,*,*,1024, *,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,*,4084, *,*,*, *, 1012,*,*,*,1008,*") + ;; Note - the "predicable" attribute is not allowed to have alternatives. + ;; Since the wSTRw wCx instruction is not predicable, we cannot support + ;; predicating any of the alternatives in this template. Instead, + ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn. + (set_attr "predicable" "no") + ;; Also - we have to pretend that these insns clobber the condition code + ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize + ;; them. + (set_attr "conds" "clob")] +) + +;; Because iwmmxt_movsi_insn is not predicable, we provide the +;; cond_exec version explicitly, with appropriate constraints. + +(define_insn "*cond_iwmmxt_movsi_insn" + [(cond_exec + (match_operator 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") + (const_int 0)]) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r") + (match_operand:SI 1 "general_operand" "rI,K,mi,r,r,z")))] + "TARGET_REALLY_IWMMXT + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: return \"mov%?\\t%0, %1\"; + case 1: return \"mvn%?\\t%0, #%B1\"; + case 2: return \"ldr%?\\t%0, %1\"; + case 3: return \"str%?\\t%1, %0\"; + case 4: return \"tmcr%?\\t%0, %1\"; + default: return \"tmrc%?\\t%0, %1\"; + }" + [(set_attr "type" "*,*,load1,store1,*,*") + (set_attr "pool_range" "*,*,4096, *,*,*") + (set_attr "neg_pool_range" "*,*,4084, *,*,*")] +) + +(define_insn "mov_internal" + [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m") + (match_operand:VMMX 1 "general_operand" "y,y,mi,y,r,r,mi,r"))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: return \"wmov%?\\t%0, %1\"; + case 1: return \"wstrd%?\\t%1, %0\"; + case 2: return \"wldrd%?\\t%0, %1\"; + case 3: return \"tmrrc%?\\t%Q0, %R0, %1\"; + case 4: return \"tmcrr%?\\t%0, %Q1, %R1\"; + case 5: return \"#\"; + default: return output_move_double (operands, true, NULL); + }" + [(set_attr "predicable" "yes") + (set_attr "length" "4, 4, 4,4,4,8, 8,8") + (set_attr "type" "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load1,store1") + (set_attr "pool_range" "*, *, 256,*,*,*, 256,*") + (set_attr "neg_pool_range" "*, *, 244,*,*,*, 244,*")] +) + +(define_expand "iwmmxt_setwcgr0" + [(set (reg:SI WCGR0) + (match_operand:SI 0 "register_operand" ""))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_setwcgr1" + [(set (reg:SI WCGR1) + (match_operand:SI 0 "register_operand" ""))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_setwcgr2" + [(set (reg:SI WCGR2) + (match_operand:SI 0 "register_operand" ""))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_setwcgr3" + [(set (reg:SI WCGR3) + (match_operand:SI 0 "register_operand" ""))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_getwcgr0" + [(set (match_operand:SI 0 "register_operand" "") + (reg:SI WCGR0))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_getwcgr1" + [(set (match_operand:SI 0 "register_operand" "") + (reg:SI WCGR1))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_getwcgr2" + [(set (match_operand:SI 0 "register_operand" "") + (reg:SI WCGR2))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_expand "iwmmxt_getwcgr3" + [(set (match_operand:SI 0 "register_operand" "") + (reg:SI WCGR3))] + "TARGET_REALLY_IWMMXT" + {} +) + +(define_insn "*and3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (and:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wand\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wand")] +) + +(define_insn "*ior3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (ior:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wor\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wor")] +) + +(define_insn "*xor3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (xor:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wxor\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wxor")] +) + + +;; Vector add/subtract + +(define_insn "*add3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (plus:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wadd%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "ssaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddbss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddhss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "ssaddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ss_plus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddwss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddbus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "usaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddhus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "usaddv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (us_plus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "waddwus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "*sub3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (minus:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "sssubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubbss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubhss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "sssubv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubwss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubbus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "ussubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubhus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "ussubv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wsubwus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsub")] +) + +(define_insn "*mulv4hi3_iwmmxt" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmulul%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmul")] +) + +(define_insn "smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_int 16))))] + "TARGET_REALLY_IWMMXT" + "wmulsm%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmul")] +) + +(define_insn "umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_int 16))))] + "TARGET_REALLY_IWMMXT" + "wmulum%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmul")] +) + +(define_insn "iwmmxt_wmacs" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:V4HI 2 "register_operand" "y") + (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))] + "TARGET_REALLY_IWMMXT" + "wmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmac")] +) + +(define_insn "iwmmxt_wmacsz" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))] + "TARGET_REALLY_IWMMXT" + "wmacsz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmac")] +) + +(define_insn "iwmmxt_wmacu" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:DI 1 "register_operand" "0") + (match_operand:V4HI 2 "register_operand" "y") + (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))] + "TARGET_REALLY_IWMMXT" + "wmacu%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmac")] +) + +(define_insn "iwmmxt_wmacuz" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))] + "TARGET_REALLY_IWMMXT" + "wmacuz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmac")] +) + +;; Same as xordi3, but don't show input operands so that we don't think +;; they are live. +(define_insn "iwmmxt_clrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(const_int 0)] UNSPEC_CLRDI))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wxor")] +) + +;; Seems like cse likes to generate these, so we have to support them. + +(define_insn "iwmmxt_clrv8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=y") + (const_vector:V8QI [(const_int 0) (const_int 0) + (const_int 0) (const_int 0) + (const_int 0) (const_int 0) + (const_int 0) (const_int 0)]))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wxor")] +) + +(define_insn "iwmmxt_clrv4hi" + [(set (match_operand:V4HI 0 "s_register_operand" "=y") + (const_vector:V4HI [(const_int 0) (const_int 0) + (const_int 0) (const_int 0)]))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wxor")] +) + +(define_insn "iwmmxt_clrv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (const_vector:V2SI [(const_int 0) (const_int 0)]))] + "TARGET_REALLY_IWMMXT" + "wxor%?\\t%0, %0, %0" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wxor")] +) + +;; Unsigned averages/sum of absolute differences + +(define_insn "iwmmxt_uavgrndv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (lshiftrt:V8HI + (plus:V8HI + (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y"))) + (const_vector:V8HI [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)])) + (const_int 1))))] + "TARGET_REALLY_IWMMXT" + "wavg2br%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wavg2")] +) + +(define_insn "iwmmxt_uavgrndv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_vector:V4SI [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)])) + (const_int 1))))] + "TARGET_REALLY_IWMMXT" + "wavg2hr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wavg2")] +) + +(define_insn "iwmmxt_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (lshiftrt:V8HI + (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y"))) + (const_int 1))))] + "TARGET_REALLY_IWMMXT" + "wavg2b%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wavg2")] +) + +(define_insn "iwmmxt_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_int 1))))] + "TARGET_REALLY_IWMMXT" + "wavg2h%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wavg2")] +) + +;; Insert/extract/shuffle + +(define_insn "iwmmxt_tinsrb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_duplicate:V8QI + (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r"))) + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_REALLY_IWMMXT" + "* + { + return arm_output_iwmmxt_tinsr (operands); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tinsr")] +) + +(define_insn "iwmmxt_tinsrh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_duplicate:V4HI + (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r"))) + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_REALLY_IWMMXT" + "* + { + return arm_output_iwmmxt_tinsr (operands); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tinsr")] +) + +(define_insn "iwmmxt_tinsrw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_duplicate:V2SI + (match_operand:SI 2 "nonimmediate_operand" "r")) + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_REALLY_IWMMXT" + "* + { + return arm_output_iwmmxt_tinsr (operands); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tinsr")] +) + +(define_insn "iwmmxt_textrmub" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_textrm")] +) + +(define_insn "iwmmxt_textrmsb" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmsb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_textrm")] +) + +(define_insn "iwmmxt_textrmuh" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmuh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_textrm")] +) + +(define_insn "iwmmxt_textrmsh" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_REALLY_IWMMXT" + "textrmsh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_textrm")] +) + +;; There are signed/unsigned variants of this instruction, but they are +;; pointless. +(define_insn "iwmmxt_textrmw" + [(set (match_operand:SI 0 "register_operand" "=r") + (vec_select:SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + "TARGET_REALLY_IWMMXT" + "textrmsw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_textrm")] +) + +(define_insn "iwmmxt_wshufh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:SI 2 "immediate_operand" "i")] UNSPEC_WSHUFH))] + "TARGET_REALLY_IWMMXT" + "wshufh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wshufh")] +) + +;; Mask-generating comparisons +;; +;; Note - you cannot use patterns like these here: +;; +;; (set (match:) (: (match:) (match:))) +;; +;; Because GCC will assume that the truth value (1 or 0) is installed +;; into the entire destination vector, (with the '1' going into the least +;; significant element of the vector). This is not how these instructions +;; behave. + +(define_insn "eqv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + VUNSPEC_WCMP_EQ))] + "TARGET_REALLY_IWMMXT" + "wcmpeqb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpeq")] +) + +(define_insn "eqv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] + VUNSPEC_WCMP_EQ))] + "TARGET_REALLY_IWMMXT" + "wcmpeqh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpeq")] +) + +(define_insn "eqv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec_volatile:V2SI + [(match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] + VUNSPEC_WCMP_EQ))] + "TARGET_REALLY_IWMMXT" + "wcmpeqw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpeq")] +) + +(define_insn "gtuv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + VUNSPEC_WCMP_GTU))] + "TARGET_REALLY_IWMMXT" + "wcmpgtub%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpgt")] +) + +(define_insn "gtuv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] + VUNSPEC_WCMP_GTU))] + "TARGET_REALLY_IWMMXT" + "wcmpgtuh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpgt")] +) + +(define_insn "gtuv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] + VUNSPEC_WCMP_GTU))] + "TARGET_REALLY_IWMMXT" + "wcmpgtuw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpgt")] +) + +(define_insn "gtv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] + VUNSPEC_WCMP_GT))] + "TARGET_REALLY_IWMMXT" + "wcmpgtsb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpgt")] +) + +(define_insn "gtv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] + VUNSPEC_WCMP_GT))] + "TARGET_REALLY_IWMMXT" + "wcmpgtsh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpgt")] +) + +(define_insn "gtv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] + VUNSPEC_WCMP_GT))] + "TARGET_REALLY_IWMMXT" + "wcmpgtsw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wcmpgt")] +) + +;; Max/min insns + +(define_insn "*smax3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (smax:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmaxs%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmax")] +) + +(define_insn "*umax3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (umax:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmaxu%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmax")] +) + +(define_insn "*smin3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (smin:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmins%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmin")] +) + +(define_insn "*umin3_iwmmxt" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (umin:VMMX (match_operand:VMMX 1 "register_operand" "y") + (match_operand:VMMX 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wminu%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmin")] +) + +;; Pack/unpack insns. + +(define_insn "iwmmxt_wpackhss" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y")) + (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackhss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wpack")] +) + +(define_insn "iwmmxt_wpackwss" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y")) + (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackwss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wpack")] +) + +(define_insn "iwmmxt_wpackdss" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (ss_truncate:SI (match_operand:DI 1 "register_operand" "y")) + (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackdss%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wpack")] +) + +(define_insn "iwmmxt_wpackhus" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y")) + (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackhus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wpack")] +) + +(define_insn "iwmmxt_wpackwus" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y")) + (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackwus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wpack")] +) + +(define_insn "iwmmxt_wpackdus" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_concat:V2SI + (us_truncate:SI (match_operand:DI 1 "register_operand" "y")) + (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))] + "TARGET_REALLY_IWMMXT" + "wpackdus%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wpack")] +) + +(define_insn "iwmmxt_wunpckihb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (const_int 85)))] + "TARGET_REALLY_IWMMXT" + "wunpckihb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckih")] +) + +(define_insn "iwmmxt_wunpckihh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_REALLY_IWMMXT" + "wunpckihh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckih")] +) + +(define_insn "iwmmxt_wunpckihw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wunpckihw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckih")] +) + +(define_insn "iwmmxt_wunpckilb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (const_int 85)))] + "TARGET_REALLY_IWMMXT" + "wunpckilb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckil")] +) + +(define_insn "iwmmxt_wunpckilh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_REALLY_IWMMXT" + "wunpckilh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckil")] +) + +(define_insn "iwmmxt_wunpckilw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_REALLY_IWMMXT" + "wunpckilw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckil")] +) + +(define_insn "iwmmxt_wunpckehub" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "TARGET_REALLY_IWMMXT" + "wunpckehub%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckeh")] +) + +(define_insn "iwmmxt_wunpckehuh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_REALLY_IWMMXT" + "wunpckehuh%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckeh")] +) + +(define_insn "iwmmxt_wunpckehuw" + [(set (match_operand:DI 0 "register_operand" "=y") + (vec_select:DI + (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (parallel [(const_int 1)])))] + "TARGET_REALLY_IWMMXT" + "wunpckehuw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckeh")] +) + +(define_insn "iwmmxt_wunpckehsb" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "TARGET_REALLY_IWMMXT" + "wunpckehsb%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckeh")] +) + +(define_insn "iwmmxt_wunpckehsh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_REALLY_IWMMXT" + "wunpckehsh%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckeh")] +) + +(define_insn "iwmmxt_wunpckehsw" + [(set (match_operand:DI 0 "register_operand" "=y") + (vec_select:DI + (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (parallel [(const_int 1)])))] + "TARGET_REALLY_IWMMXT" + "wunpckehsw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckeh")] +) + +(define_insn "iwmmxt_wunpckelub" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_REALLY_IWMMXT" + "wunpckelub%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckel")] +) + +(define_insn "iwmmxt_wunpckeluh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_REALLY_IWMMXT" + "wunpckeluh%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckel")] +) + +(define_insn "iwmmxt_wunpckeluw" + [(set (match_operand:DI 0 "register_operand" "=y") + (vec_select:DI + (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (parallel [(const_int 0)])))] + "TARGET_REALLY_IWMMXT" + "wunpckeluw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckel")] +) + +(define_insn "iwmmxt_wunpckelsb" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_select:V4HI + (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_REALLY_IWMMXT" + "wunpckelsb%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckel")] +) + +(define_insn "iwmmxt_wunpckelsh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_REALLY_IWMMXT" + "wunpckelsh%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckel")] +) + +(define_insn "iwmmxt_wunpckelsw" + [(set (match_operand:DI 0 "register_operand" "=y") + (vec_select:DI + (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (parallel [(const_int 0)])))] + "TARGET_REALLY_IWMMXT" + "wunpckelsw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wunpckel")] +) + +;; Shifts + +(define_insn "ror3" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:SI 2 "imm_or_reg_operand" "z,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wrorg%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wror\", operands, true); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wror, wmmx_wror")] +) + +(define_insn "ashr3_iwmmxt" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:SI 2 "imm_or_reg_operand" "z,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wsrag%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wsra\", operands, true); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wsra, wmmx_wsra")] +) + +(define_insn "lshr3_iwmmxt" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:SI 2 "imm_or_reg_operand" "z,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wsrlg%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wsrl\", operands, false); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wsrl, wmmx_wsrl")] +) + +(define_insn "ashl3_iwmmxt" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:SI 2 "imm_or_reg_operand" "z,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wsllg%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wsll\", operands, false); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wsll, wmmx_wsll")] +) + +(define_insn "ror3_di" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:DI 2 "imm_or_reg_operand" "y,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wror%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wror\", operands, true); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wror, wmmx_wror")] +) + +(define_insn "ashr3_di" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:DI 2 "imm_or_reg_operand" "y,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wsra%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wsra\", operands, true); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wsra, wmmx_wsra")] +) + +(define_insn "lshr3_di" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:DI 2 "register_operand" "y,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wsrl%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wsrl\", operands, false); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wsrl, wmmx_wsrl")] +) + +(define_insn "ashl3_di" + [(set (match_operand:VSHFT 0 "register_operand" "=y,y") + (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y") + (match_operand:DI 2 "imm_or_reg_operand" "y,i")))] + "TARGET_REALLY_IWMMXT" + "* + switch (which_alternative) + { + case 0: + return \"wsll%?\\t%0, %1, %2\"; + case 1: + return arm_output_iwmmxt_shift_immediate (\"wsll\", operands, false); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "arch" "*, iwmmxt2") + (set_attr "type" "wmmx_wsll, wmmx_wsll")] +) + +(define_insn "iwmmxt_wmadds" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")) + (parallel [(const_int 1) (const_int 3)]))) + (mult:V2SI + (vec_select:V2SI (sign_extend:V4SI (match_dup 1)) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI (sign_extend:V4SI (match_dup 2)) + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_REALLY_IWMMXT" + "wmadds%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmadd")] +) + +(define_insn "iwmmxt_wmaddu" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")) + (parallel [(const_int 1) (const_int 3)]))) + (mult:V2SI + (vec_select:V2SI (zero_extend:V4SI (match_dup 1)) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI (zero_extend:V4SI (match_dup 2)) + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_REALLY_IWMMXT" + "wmaddu%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmadd")] +) + +(define_insn "iwmmxt_tmia" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (match_operand:SI 2 "register_operand" "r")) + (sign_extend:DI + (match_operand:SI 3 "register_operand" "r")))))] + "TARGET_REALLY_IWMMXT" + "tmia%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmia")] +) + +(define_insn "iwmmxt_tmiaph" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI (sign_extend:DI + (truncate:HI (match_operand:SI 2 "register_operand" "r"))) + (sign_extend:DI + (truncate:HI (match_operand:SI 3 "register_operand" "r")))) + (mult:DI (sign_extend:DI + (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16)))) + (sign_extend:DI + (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))] + "TARGET_REALLY_IWMMXT" + "tmiaph%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmiaph")] +) + +(define_insn "iwmmxt_tmiabb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI (match_operand:SI 2 "register_operand" "r"))) + (sign_extend:DI + (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))] + "TARGET_REALLY_IWMMXT" + "tmiabb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmiaxy")] +) + +(define_insn "iwmmxt_tmiatb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "r") + (const_int 16)))) + (sign_extend:DI + (truncate:HI + (match_operand:SI 3 "register_operand" "r"))))))] + "TARGET_REALLY_IWMMXT" + "tmiatb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmiaxy")] +) + +(define_insn "iwmmxt_tmiabt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI + (match_operand:SI 2 "register_operand" "r"))) + (sign_extend:DI + (truncate:HI + (ashiftrt:SI + (match_operand:SI 3 "register_operand" "r") + (const_int 16)))))))] + "TARGET_REALLY_IWMMXT" + "tmiabt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmiaxy")] +) + +(define_insn "iwmmxt_tmiatt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (mult:DI (sign_extend:DI + (truncate:HI + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "r") + (const_int 16)))) + (sign_extend:DI + (truncate:HI + (ashiftrt:SI + (match_operand:SI 3 "register_operand" "r") + (const_int 16)))))))] + "TARGET_REALLY_IWMMXT" + "tmiatt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmiaxy")] +) + +(define_insn "iwmmxt_tmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))] + "TARGET_REALLY_IWMMXT" + "tmovmskb%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmovmsk")] +) + +(define_insn "iwmmxt_tmovmskh" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))] + "TARGET_REALLY_IWMMXT" + "tmovmskh%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmovmsk")] +) + +(define_insn "iwmmxt_tmovmskw" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))] + "TARGET_REALLY_IWMMXT" + "tmovmskw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tmovmsk")] +) + +(define_insn "iwmmxt_waccb" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))] + "TARGET_REALLY_IWMMXT" + "waccb%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wacc")] +) + +(define_insn "iwmmxt_wacch" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))] + "TARGET_REALLY_IWMMXT" + "wacch%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wacc")] +) + +(define_insn "iwmmxt_waccw" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))] + "TARGET_REALLY_IWMMXT" + "waccw%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wacc")] +) + +;; use unspec here to prevent 8 * imm to be optimized by cse +(define_insn "iwmmxt_waligni" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI [(subreg:V8QI + (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) 0) + (mult:SI + (match_operand:SI 3 "immediate_operand" "i") + (const_int 8))) 0)] UNSPEC_WALIGNI))] + "TARGET_REALLY_IWMMXT" + "waligni%?\\t%0, %1, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_waligni")] +) + +(define_insn "iwmmxt_walignr" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (subreg:V8QI (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) 0) + (mult:SI + (zero_extract:SI (match_operand:SI 3 "register_operand" "z") (const_int 3) (const_int 0)) + (const_int 8))) 0))] + "TARGET_REALLY_IWMMXT" + "walignr%U3%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_walignr")] +) + +(define_insn "iwmmxt_walignr0" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (subreg:V8QI (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) 0) + (mult:SI + (zero_extract:SI (reg:SI WCGR0) (const_int 3) (const_int 0)) + (const_int 8))) 0))] + "TARGET_REALLY_IWMMXT" + "walignr0%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_walignr")] +) + +(define_insn "iwmmxt_walignr1" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (subreg:V8QI (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) 0) + (mult:SI + (zero_extract:SI (reg:SI WCGR1) (const_int 3) (const_int 0)) + (const_int 8))) 0))] + "TARGET_REALLY_IWMMXT" + "walignr1%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_walignr")] +) + +(define_insn "iwmmxt_walignr2" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (subreg:V8QI (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) 0) + (mult:SI + (zero_extract:SI (reg:SI WCGR2) (const_int 3) (const_int 0)) + (const_int 8))) 0))] + "TARGET_REALLY_IWMMXT" + "walignr2%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_walignr")] +) + +(define_insn "iwmmxt_walignr3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (subreg:V8QI (ashiftrt:TI + (subreg:TI (vec_concat:V16QI + (match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")) 0) + (mult:SI + (zero_extract:SI (reg:SI WCGR3) (const_int 3) (const_int 0)) + (const_int 8))) 0))] + "TARGET_REALLY_IWMMXT" + "walignr3%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_walignr")] +) + +(define_insn "iwmmxt_wsadb" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [ + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V8QI 2 "register_operand" "y") + (match_operand:V8QI 3 "register_operand" "y")] UNSPEC_WSAD))] + "TARGET_REALLY_IWMMXT" + "wsadb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsad")] +) + +(define_insn "iwmmxt_wsadh" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [ + (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V4HI 2 "register_operand" "y") + (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WSAD))] + "TARGET_REALLY_IWMMXT" + "wsadh%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsad")] +) + +(define_insn "iwmmxt_wsadbz" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))] + "TARGET_REALLY_IWMMXT" + "wsadbz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsad")] +) + +(define_insn "iwmmxt_wsadhz" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))] + "TARGET_REALLY_IWMMXT" + "wsadhz%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsad")] +) + +(include "iwmmxt2.md") diff --git a/gcc-4.9/gcc/config/arm/iwmmxt2.md b/gcc-4.9/gcc/config/arm/iwmmxt2.md new file mode 100644 index 000000000..b6e4b2476 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/iwmmxt2.md @@ -0,0 +1,903 @@ +;; Patterns for the Intel Wireless MMX technology architecture. +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; Written by Marvell, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_insn "iwmmxt_wabs3" + [(set (match_operand:VMMX 0 "register_operand" "=y") + (unspec:VMMX [(match_operand:VMMX 1 "register_operand" "y")] UNSPEC_WABS))] + "TARGET_REALLY_IWMMXT" + "wabs%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wabs")] +) + +(define_insn "iwmmxt_wabsdiffb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (abs:V8HI + (minus:V8HI + (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y"))))))] + "TARGET_REALLY_IWMMXT" + "wabsdiffb%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wabsdiff")] +) + +(define_insn "iwmmxt_wabsdiffh" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate: V4HI + (abs:V4SI + (minus:V4SI + (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))))))] + "TARGET_REALLY_IWMMXT" + "wabsdiffh%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wabsdiff")] +) + +(define_insn "iwmmxt_wabsdiffw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (truncate: V2SI + (abs:V2DI + (minus:V2DI + (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y"))))))] + "TARGET_REALLY_IWMMXT" + "wabsdiffw%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wabsdiff")] +) + +(define_insn "iwmmxt_waddsubhx" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (ss_minus:V4HI + (match_operand:V4HI 1 "register_operand" "y") + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)]))) + (ss_plus:V4HI + (match_dup 1) + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)]))) + (const_int 10)))] + "TARGET_REALLY_IWMMXT" + "waddsubhx%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_waddsubhx")] +) + +(define_insn "iwmmxt_wsubaddhx" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (ss_plus:V4HI + (match_operand:V4HI 1 "register_operand" "y") + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)]))) + (ss_minus:V4HI + (match_dup 1) + (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)]))) + (const_int 10)))] + "TARGET_REALLY_IWMMXT" + "wsubaddhx%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wsubaddhx")] +) + +(define_insn "addc3" + [(set (match_operand:VMMX2 0 "register_operand" "=y") + (unspec:VMMX2 + [(plus:VMMX2 + (match_operand:VMMX2 1 "register_operand" "y") + (match_operand:VMMX2 2 "register_operand" "y"))] UNSPEC_WADDC))] + "TARGET_REALLY_IWMMXT" + "waddc%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wadd")] +) + +(define_insn "iwmmxt_avg4" +[(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (vec_select:V8HI + (vec_merge:V8HI + (lshiftrt:V8HI + (plus:V8HI + (plus:V8HI + (plus:V8HI + (plus:V8HI + (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y"))) + (vec_select:V8HI (zero_extend:V8HI (match_dup 1)) + (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2) + (const_int 3) (const_int 4) (const_int 5) (const_int 6)]))) + (vec_select:V8HI (zero_extend:V8HI (match_dup 2)) + (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2) + (const_int 3) (const_int 4) (const_int 5) (const_int 6)]))) + (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) + (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) + (const_int 2)) + (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0) + (const_int 0) (const_int 0) (const_int 0) (const_int 0)]) + (const_int 254)) + (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4) + (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))] + "TARGET_REALLY_IWMMXT" + "wavg4%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wavg4")] +) + +(define_insn "iwmmxt_avg4r" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (truncate:V8QI + (vec_select:V8HI + (vec_merge:V8HI + (lshiftrt:V8HI + (plus:V8HI + (plus:V8HI + (plus:V8HI + (plus:V8HI + (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y")) + (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y"))) + (vec_select:V8HI (zero_extend:V8HI (match_dup 1)) + (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2) + (const_int 3) (const_int 4) (const_int 5) (const_int 6)]))) + (vec_select:V8HI (zero_extend:V8HI (match_dup 2)) + (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2) + (const_int 3) (const_int 4) (const_int 5) (const_int 6)]))) + (const_vector:V8HI [(const_int 2) (const_int 2) (const_int 2) (const_int 2) + (const_int 2) (const_int 2) (const_int 2) (const_int 2)])) + (const_int 2)) + (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0) + (const_int 0) (const_int 0) (const_int 0) (const_int 0)]) + (const_int 254)) + (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4) + (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))] + "TARGET_REALLY_IWMMXT" + "wavg4r%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wavg4")] +) + +(define_insn "iwmmxt_wmaddsx" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")) + (parallel [(const_int 0) (const_int 2)]))) + (mult:V2SI + (vec_select:V2SI (sign_extend:V4SI (match_dup 1)) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI (sign_extend:V4SI (match_dup 2)) + (parallel [(const_int 1) (const_int 3)])))))] + "TARGET_REALLY_IWMMXT" + "wmaddsx%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmadd")] +) + +(define_insn "iwmmxt_wmaddux" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")) + (parallel [(const_int 0) (const_int 2)]))) + (mult:V2SI + (vec_select:V2SI (zero_extend:V4SI (match_dup 1)) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI (zero_extend:V4SI (match_dup 2)) + (parallel [(const_int 1) (const_int 3)])))))] + "TARGET_REALLY_IWMMXT" + "wmaddux%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmadd")] +) + +(define_insn "iwmmxt_wmaddsn" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (minus:V2SI + (mult:V2SI + (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")) + (parallel [(const_int 0) (const_int 2)]))) + (mult:V2SI + (vec_select:V2SI (sign_extend:V4SI (match_dup 1)) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI (sign_extend:V4SI (match_dup 2)) + (parallel [(const_int 1) (const_int 3)])))))] + "TARGET_REALLY_IWMMXT" + "wmaddsn%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmadd")] +) + +(define_insn "iwmmxt_wmaddun" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (minus:V2SI + (mult:V2SI + (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (parallel [(const_int 0) (const_int 2)])) + (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")) + (parallel [(const_int 0) (const_int 2)]))) + (mult:V2SI + (vec_select:V2SI (zero_extend:V4SI (match_dup 1)) + (parallel [(const_int 1) (const_int 3)])) + (vec_select:V2SI (zero_extend:V4SI (match_dup 2)) + (parallel [(const_int 1) (const_int 3)])))))] + "TARGET_REALLY_IWMMXT" + "wmaddun%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmadd")] +) + +(define_insn "iwmmxt_wmulwsm" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (truncate:V2SI + (ashiftrt:V2DI + (mult:V2DI + (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y"))) + (const_int 32))))] + "TARGET_REALLY_IWMMXT" + "wmulwsm%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmulw")] +) + +(define_insn "iwmmxt_wmulwum" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (truncate:V2SI + (lshiftrt:V2DI + (mult:V2DI + (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y"))) + (const_int 32))))] + "TARGET_REALLY_IWMMXT" + "wmulwum%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmulw")] +) + +(define_insn "iwmmxt_wmulsmr" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (ashiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_vector:V4SI [(const_int 32768) + (const_int 32768) + (const_int 32768)])) + (const_int 16))))] + "TARGET_REALLY_IWMMXT" + "wmulsmr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmul")] +) + +(define_insn "iwmmxt_wmulumr" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (mult:V4SI + (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y")) + (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))) + (const_vector:V4SI [(const_int 32768) + (const_int 32768) + (const_int 32768) + (const_int 32768)])) + (const_int 16))))] + "TARGET_REALLY_IWMMXT" + "wmulumr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmul")] +) + +(define_insn "iwmmxt_wmulwsmr" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (truncate:V2SI + (ashiftrt:V2DI + (plus:V2DI + (mult:V2DI + (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y"))) + (const_vector:V2DI [(const_int 2147483648) + (const_int 2147483648)])) + (const_int 32))))] + "TARGET_REALLY_IWMMXT" + "wmulwsmr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmul")] +) + +(define_insn "iwmmxt_wmulwumr" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (truncate:V2SI + (lshiftrt:V2DI + (plus:V2DI + (mult:V2DI + (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y")) + (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y"))) + (const_vector:V2DI [(const_int 2147483648) + (const_int 2147483648)])) + (const_int 32))))] + "TARGET_REALLY_IWMMXT" + "wmulwumr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmulw")] +) + +(define_insn "iwmmxt_wmulwl" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (mult:V2SI + (match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")))] + "TARGET_REALLY_IWMMXT" + "wmulwl%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmulw")] +) + +(define_insn "iwmmxt_wqmulm" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULM))] + "TARGET_REALLY_IWMMXT" + "wqmulm%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmulm")] +) + +(define_insn "iwmmxt_wqmulwm" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWM))] + "TARGET_REALLY_IWMMXT" + "wqmulwm%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmulwm")] +) + +(define_insn "iwmmxt_wqmulmr" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y") + (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULMR))] + "TARGET_REALLY_IWMMXT" + "wqmulmr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmulm")] +) + +(define_insn "iwmmxt_wqmulwmr" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y") + (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWMR))] + "TARGET_REALLY_IWMMXT" + "wqmulwmr%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmulwm")] +) + +(define_insn "iwmmxt_waddbhusm" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)]) + (us_truncate:V4QI + (ss_plus:V4HI + (match_operand:V4HI 1 "register_operand" "y") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))))))] + "TARGET_REALLY_IWMMXT" + "waddbhusm%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_waddbhus")] +) + +(define_insn "iwmmxt_waddbhusl" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI + (ss_plus:V4HI + (match_operand:V4HI 1 "register_operand" "y") + (zero_extend:V4HI + (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))) + (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])))] + "TARGET_REALLY_IWMMXT" + "waddbhusl%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_waddbhus")] +) + +(define_insn "iwmmxt_wqmiabb" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))] + "TARGET_REALLY_IWMMXT" + "wqmiabb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiabt" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))] + "TARGET_REALLY_IWMMXT" + "wqmiabt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiatb" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))] + "TARGET_REALLY_IWMMXT" + "wqmiatb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiatt" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))] + "TARGET_REALLY_IWMMXT" + "wqmiatt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiabbn" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))] + "TARGET_REALLY_IWMMXT" + "wqmiabbn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiabtn" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))] + "TARGET_REALLY_IWMMXT" + "wqmiabtn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiatbn" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))] + "TARGET_REALLY_IWMMXT" + "wqmiatbn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wqmiattn" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "0") + (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48)) + (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16)) + (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))] + "TARGET_REALLY_IWMMXT" + "wqmiattn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wqmiaxy")] +) + +(define_insn "iwmmxt_wmiabb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 0)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 2)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 2)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiabb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiabt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 1)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 2)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 3)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiabt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiatb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 0)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 3)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 2)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiatb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiatt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 1)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 3)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 3)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiatt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiabbn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 0)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 2)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 2)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiabbn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiabtn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 1)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 2)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 3)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiabtn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiatbn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 0)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 3)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 2)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiatbn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiattn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI (match_operand:DI 1 "register_operand" "0") + (plus:DI + (mult:DI + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 1)]))) + (sign_extend:DI + (vec_select:HI (match_operand:V4HI 3 "register_operand" "y") + (parallel [(const_int 1)])))) + (mult:DI + (sign_extend:DI + (vec_select:HI (match_dup 2) + (parallel [(const_int 3)]))) + (sign_extend:DI + (vec_select:HI (match_dup 3) + (parallel [(const_int 3)])))))))] + "TARGET_REALLY_IWMMXT" + "wmiattn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiaxy")] +) + +(define_insn "iwmmxt_wmiawbb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawbb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawbt" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawbt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawtb" + [(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawtb%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawtt" +[(set (match_operand:DI 0 "register_operand" "=y") + (plus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawtt%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawbbn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawbbn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawbtn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawbtn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawtbn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawtbn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmiawttn" + [(set (match_operand:DI 0 "register_operand" "=y") + (minus:DI + (match_operand:DI 1 "register_operand" "0") + (mult:DI + (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)]))) + (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))] + "TARGET_REALLY_IWMMXT" + "wmiawttn%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmiawxy")] +) + +(define_insn "iwmmxt_wmerge" + [(set (match_operand:DI 0 "register_operand" "=y") + (ior:DI + (ashift:DI + (match_operand:DI 2 "register_operand" "y") + (minus:SI + (const_int 64) + (mult:SI + (match_operand:SI 3 "immediate_operand" "i") + (const_int 8)))) + (lshiftrt:DI + (ashift:DI + (match_operand:DI 1 "register_operand" "y") + (mult:SI + (match_dup 3) + (const_int 8))) + (mult:SI + (match_dup 3) + (const_int 8)))))] + "TARGET_REALLY_IWMMXT" + "wmerge%?\\t%0, %1, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_wmerge")] +) + +(define_insn "iwmmxt_tandc3" + [(set (reg:CC CC_REGNUM) + (subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TANDC) 0)) + (unspec:CC [(reg:SI 15)] UNSPEC_TANDC)] + "TARGET_REALLY_IWMMXT" + "tandc%?\\t r15" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_tandc")] +) + +(define_insn "iwmmxt_torc3" + [(set (reg:CC CC_REGNUM) + (subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORC) 0)) + (unspec:CC [(reg:SI 15)] UNSPEC_TORC)] + "TARGET_REALLY_IWMMXT" + "torc%?\\t r15" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_torc")] +) + +(define_insn "iwmmxt_torvsc3" + [(set (reg:CC CC_REGNUM) + (subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORVSC) 0)) + (unspec:CC [(reg:SI 15)] UNSPEC_TORVSC)] + "TARGET_REALLY_IWMMXT" + "torvsc%?\\t r15" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_torvsc")] +) + +(define_insn "iwmmxt_textrc3" + [(set (reg:CC CC_REGNUM) + (subreg:CC (unspec:VMMX [(const_int 0) + (match_operand:SI 0 "immediate_operand" "i")] UNSPEC_TEXTRC) 0)) + (unspec:CC [(reg:SI 15)] UNSPEC_TEXTRC)] + "TARGET_REALLY_IWMMXT" + "textrc%?\\t r15, %0" + [(set_attr "predicable" "yes") + (set_attr "type" "wmmx_textrc")] +) diff --git a/gcc-4.9/gcc/config/arm/ldmstm.md b/gcc-4.9/gcc/config/arm/ldmstm.md new file mode 100644 index 000000000..1a2429071 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/ldmstm.md @@ -0,0 +1,1225 @@ +/* ARM ldm/stm instruction patterns. This file was automatically generated + using arm-ldmstm.ml. Please do not edit manually. + + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +(define_insn "*ldm4_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (match_operand:SI 5 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_ldm4_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "low_register_operand" "") + (mem:SI (match_operand:SI 5 "s_register_operand" "l"))) + (set (match_operand:SI 2 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4")]) + +(define_insn "*ldm4_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 5))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_ldm4_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&l") + (plus:SI (match_dup 5) (const_int 16))) + (set (match_operand:SI 1 "low_register_operand" "") + (mem:SI (match_dup 5))) + (set (match_operand:SI 2 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 3 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 4 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5" + "ldm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4")]) + +(define_insn "*stm4_ia" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 5 "s_register_operand" "rk")) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(ia%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm4_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (mem:SI (match_dup 5)) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "stm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_stm4_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&l") + (plus:SI (match_dup 5) (const_int 16))) + (set (mem:SI (match_dup 5)) + (match_operand:SI 1 "low_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 2 "low_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 3 "low_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 4 "low_register_operand" ""))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 5" + "stm%(ia%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4")]) + +(define_insn "*ldm4_ib" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 16))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(ib%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_ib_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 12)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int 16))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "ldm%(ib%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_ib" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int 4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 16))) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(ib%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_ib_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int 16))) + (set (mem:SI (plus:SI (match_dup 5) (const_int 4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 12))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int 16))) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "stm%(ib%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_da" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 5)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(da%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_da_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 5)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "ldm%(da%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_da" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -12))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (match_dup 5)) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(da%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*stm4_da_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (mem:SI (plus:SI (match_dup 5) (const_int -12))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (match_dup 5)) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 5" + "stm%(da%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm4_db" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") + (const_int -16)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -12)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(db%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*ldm4_db_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -16)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -12)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -8)))) + (set (match_operand:SI 4 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 5) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "ldm%(db%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "load4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm4_db" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 5 "s_register_operand" "rk") (const_int -16))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -12))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(db%)\t%5, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm4_db_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 5 "s_register_operand" "+&rk") + (plus:SI (match_dup 5) (const_int -16))) + (set (mem:SI (plus:SI (match_dup 5) (const_int -16))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -12))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -8))) + (match_operand:SI 3 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 5) (const_int -4))) + (match_operand:SI 4 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 5" + "stm%(db%)\t%5!, {%1, %2, %3, %4}" + [(set_attr "type" "store4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 6 "memory_operand" "")) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 7 "memory_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 4, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (parallel + [(set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 6 "memory_operand" "")) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 7 "memory_operand" ""))])] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 4, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 9 "const_int_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 10 "const_int_operand" "")) + (set (match_operand:SI 6 "memory_operand" "") + (match_dup 2)) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 11 "const_int_operand" "")) + (set (match_operand:SI 7 "memory_operand" "") + (match_dup 3))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 4)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 9 "const_int_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 10 "const_int_operand" "")) + (set (match_operand:SI 3 "s_register_operand" "") + (match_operand:SI 11 "const_int_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 6 "memory_operand" "") + (match_dup 2)) + (set (match_operand:SI 7 "memory_operand" "") + (match_dup 3))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 4)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 4 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" "")) + (set (match_operand:SI 6 "memory_operand" "") + (match_operand:SI 2 "s_register_operand" "")) + (set (match_operand:SI 7 "memory_operand" "") + (match_operand:SI 3 "s_register_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_stm_seq (operands, 4)) + DONE; + else + FAIL; +}) + +(define_insn "*ldm3_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (match_operand:SI 4 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_ldm3_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "low_register_operand" "") + (mem:SI (match_operand:SI 4 "s_register_operand" "l"))) + (set (match_operand:SI 2 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3")]) + +(define_insn "*ldm3_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 4))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_ldm3_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&l") + (plus:SI (match_dup 4) (const_int 12))) + (set (match_operand:SI 1 "low_register_operand" "") + (mem:SI (match_dup 4))) + (set (match_operand:SI 2 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 3 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4" + "ldm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3")]) + +(define_insn "*stm3_ia" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 4 "s_register_operand" "rk")) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(ia%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm3_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (mem:SI (match_dup 4)) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_stm3_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&l") + (plus:SI (match_dup 4) (const_int 12))) + (set (mem:SI (match_dup 4)) + (match_operand:SI 1 "low_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 2 "low_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 3 "low_register_operand" ""))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 4" + "stm%(ia%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3")]) + +(define_insn "*ldm3_ib" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 12))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(ib%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_ib_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int 12))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(ib%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_ib" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int 4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 12))) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(ib%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_ib_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int 12))) + (set (mem:SI (plus:SI (match_dup 4) (const_int 4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int 12))) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(ib%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_da" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 4)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(da%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_da_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 4)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "ldm%(da%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_da" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -8))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (match_dup 4)) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(da%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*stm3_da_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (mem:SI (plus:SI (match_dup 4) (const_int -8))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (match_dup 4)) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 4" + "stm%(da%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm3_db" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(db%)\t%4, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*ldm3_db_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -12)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -8)))) + (set (match_operand:SI 3 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 4) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "ldm%(db%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "load3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm3_db" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 4 "s_register_operand" "rk") (const_int -12))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(db%)\t%4, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm3_db_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 4 "s_register_operand" "+&rk") + (plus:SI (match_dup 4) (const_int -12))) + (set (mem:SI (plus:SI (match_dup 4) (const_int -12))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -8))) + (match_operand:SI 2 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 4) (const_int -4))) + (match_operand:SI 3 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 4" + "stm%(db%)\t%4!, {%1, %2, %3}" + [(set_attr "type" "store3") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 3, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 4 "memory_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 5 "memory_operand" ""))])] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 3, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 7 "const_int_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 2))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 3)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 6 "const_int_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 7 "const_int_operand" "")) + (set (match_operand:SI 2 "s_register_operand" "") + (match_operand:SI 8 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 4 "memory_operand" "") + (match_dup 1)) + (set (match_operand:SI 5 "memory_operand" "") + (match_dup 2))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 3)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 4 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" "")) + (set (match_operand:SI 5 "memory_operand" "") + (match_operand:SI 2 "s_register_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_stm_seq (operands, 3)) + DONE; + else + FAIL; +}) + +(define_insn "*ldm2_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (match_operand:SI 3 "s_register_operand" "rk"))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "ldm%(ia%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_ldm2_ia" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "low_register_operand" "") + (mem:SI (match_operand:SI 3 "s_register_operand" "l"))) + (set (match_operand:SI 2 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 2" + "ldm%(ia%)\t%3, {%1, %2}" + [(set_attr "type" "load2")]) + +(define_insn "*ldm2_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 3))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_ldm2_ia_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&l") + (plus:SI (match_dup 3) (const_int 8))) + (set (match_operand:SI 1 "low_register_operand" "") + (mem:SI (match_dup 3))) + (set (match_operand:SI 2 "low_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4))))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3" + "ldm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "load2")]) + +(define_insn "*stm2_ia" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (match_operand:SI 3 "s_register_operand" "rk")) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "stm%(ia%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm2_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (mem:SI (match_dup 3)) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*thumb_stm2_ia_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&l") + (plus:SI (match_dup 3) (const_int 8))) + (set (mem:SI (match_dup 3)) + (match_operand:SI 1 "low_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 2 "low_register_operand" ""))])] + "TARGET_THUMB1 && XVECLEN (operands[0], 0) == 3" + "stm%(ia%)\t%3!, {%1, %2}" + [(set_attr "type" "store2")]) + +(define_insn "*ldm2_ib" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 8))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "ldm%(ib%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_ib_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int 8))))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(ib%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_ib" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int 4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "stm%(ib%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_ib_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int 8))) + (set (mem:SI (plus:SI (match_dup 3) (const_int 4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int 8))) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(ib%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_da" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") + (const_int -4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 3)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "ldm%(da%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_da_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -4)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (match_dup 3)))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "ldm%(da%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_da" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (match_dup 3)) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 2" + "stm%(da%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*stm2_da_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (match_dup 3)) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_ARM && XVECLEN (operands[0], 0) == 3" + "stm%(da%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes")]) + +(define_insn "*ldm2_db" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "ldm%(db%)\t%3, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*ldm2_db_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (match_operand:SI 1 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -8)))) + (set (match_operand:SI 2 "arm_hard_general_register_operand" "") + (mem:SI (plus:SI (match_dup 3) + (const_int -4))))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "ldm%(db%)\t%3!, {%1, %2}" + [(set_attr "type" "load2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm2_db" + [(match_parallel 0 "store_multiple_operation" + [(set (mem:SI (plus:SI (match_operand:SI 3 "s_register_operand" "rk") (const_int -8))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 2" + "stm%(db%)\t%3, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "*stm2_db_update" + [(match_parallel 0 "store_multiple_operation" + [(set (match_operand:SI 3 "s_register_operand" "+&rk") + (plus:SI (match_dup 3) (const_int -8))) + (set (mem:SI (plus:SI (match_dup 3) (const_int -8))) + (match_operand:SI 1 "arm_hard_general_register_operand" "")) + (set (mem:SI (plus:SI (match_dup 3) (const_int -4))) + (match_operand:SI 2 "arm_hard_general_register_operand" ""))])] + "TARGET_32BIT && XVECLEN (operands[0], 0) == 3" + "stm%(db%)\t%3!, {%1, %2}" + [(set_attr "type" "store2") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_ldm_seq (operands, 2, false)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 2)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "" + [(const_int 0)] +{ + if (gen_const_stm_seq (operands, 2)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "s_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "s_register_operand" ""))] + "" + [(const_int 0)] +{ + if (gen_stm_seq (operands, 2)) + DONE; + else + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 4 "s_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand:SI 6 "s_register_operand" "") + (match_operand:SI 7 "s_register_operand" "")])) + (clobber (reg:CC CC_REGNUM))])] + "((((REGNO (operands[6]) == REGNO (operands[0])) + && (REGNO (operands[7]) == REGNO (operands[1]))) + || ((REGNO (operands[7]) == REGNO (operands[0])) + && (REGNO (operands[6]) == REGNO (operands[1])))) + && (peep2_regno_dead_p (3, REGNO (operands[0])) + || (REGNO (operands[0]) == REGNO (operands[4]))) + && (peep2_regno_dead_p (3, REGNO (operands[1])) + || (REGNO (operands[1]) == REGNO (operands[4]))))" + [(parallel + [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)])) + (clobber (reg:CC CC_REGNUM))])] +{ + if (!gen_ldm_seq (operands, 2, true)) + FAIL; +}) + +(define_peephole2 + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "s_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "s_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand:SI 6 "s_register_operand" "") + (match_operand:SI 7 "s_register_operand" "")]))] + "((((REGNO (operands[6]) == REGNO (operands[0])) + && (REGNO (operands[7]) == REGNO (operands[1]))) + || ((REGNO (operands[7]) == REGNO (operands[0])) + && (REGNO (operands[6]) == REGNO (operands[1])))) + && (peep2_regno_dead_p (3, REGNO (operands[0])) + || (REGNO (operands[0]) == REGNO (operands[4]))) + && (peep2_regno_dead_p (3, REGNO (operands[1])) + || (REGNO (operands[1]) == REGNO (operands[4]))))" + [(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] +{ + if (!gen_ldm_seq (operands, 2, true)) + FAIL; +}) + diff --git a/gcc-4.9/gcc/config/arm/ldrdstrd.md b/gcc-4.9/gcc/config/arm/ldrdstrd.md new file mode 100644 index 000000000..064033aaa --- /dev/null +++ b/gcc-4.9/gcc/config/arm/ldrdstrd.md @@ -0,0 +1,260 @@ +;; ARM ldrd/strd peephole optimizations. +;; +;; Copyright (C) 2013-2014 Free Software Foundation, Inc. +;; +;; Written by Greta Yorsh + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The following peephole optimizations identify consecutive memory +;; accesses, and try to rearrange the operands to enable generation of +;; ldrd/strd. + +(define_peephole2 ; ldrd + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, true, false, false)) + FAIL; + else if (TARGET_ARM) + { + /* In ARM state, the destination registers of LDRD/STRD must be + consecutive. We emit DImode access. */ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit [(set (match_dup 0) (match_dup 2))] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(parallel [(set (match_dup 0) (match_dup 2)) + (set (match_dup 1) (match_dup 3))])] */ + rtx t1 = gen_rtx_SET (VOIDmode, operands[0], operands[2]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[1], operands[3]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +(define_peephole2 ; strd + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "arm_general_register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "arm_general_register_operand" ""))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, false, false)) + FAIL; + else if (TARGET_ARM) + { + /* In ARM state, the destination registers of LDRD/STRD must be + consecutive. We emit DImode access. */ + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit [(set (match_dup 2) (match_dup 0))] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[2], operands[0])); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +;; The following peepholes reorder registers to enable LDRD/STRD. +(define_peephole2 ; strd of constants + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, true, false)) + FAIL; + else if (TARGET_ARM) + { + rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (set (match_dup 2) tmp)] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +(define_peephole2 ; strd of constants + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 4 "const_int_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 5 "const_int_operand" "")) + (set (match_operand:SI 2 "memory_operand" "") + (match_dup 0)) + (set (match_operand:SI 3 "memory_operand" "") + (match_dup 1))] + "TARGET_LDRD + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)" + [(const_int 0)] +{ + if (!gen_operands_ldrd_strd (operands, false, true, false)) + FAIL; + else if (TARGET_ARM) + { + rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + /* Emit the pattern + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (set (match_dup 2) tmp)] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + emit_insn (gen_rtx_SET (VOIDmode, operands[2], tmp)); + DONE; + } + else if (TARGET_THUMB2) + { + /* Emit the pattern: + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 1) (match_dup 5)) + (parallel [(set (match_dup 2) (match_dup 0)) + (set (match_dup 3) (match_dup 1))])] */ + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[1], operands[5])); + rtx t1 = gen_rtx_SET (VOIDmode, operands[2], operands[0]); + rtx t2 = gen_rtx_SET (VOIDmode, operands[3], operands[1]); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); + DONE; + } +}) + +;; The following two peephole optimizations are only relevant for ARM +;; mode where LDRD/STRD require consecutive registers. + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation. + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "arm_general_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand 6 "arm_general_register_operand" "") + (match_operand 7 "arm_general_register_operand" "") ]))] + "TARGET_LDRD && TARGET_ARM + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun) + && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) + ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] + { + if (!gen_operands_ldrd_strd (operands, true, false, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation that sets the flags. + [(set (match_operand:SI 0 "arm_general_register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "arm_general_register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (parallel + [(set (match_operand:SI 4 "arm_general_register_operand" "") + (match_operator:SI 5 "commutative_binary_operator" + [(match_operand 6 "arm_general_register_operand" "") + (match_operand 7 "arm_general_register_operand" "") ])) + (clobber (reg:CC CC_REGNUM))])] + "TARGET_LDRD && TARGET_ARM + && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun) + && ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7]))) + ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (parallel + [(set (match_dup 4) + (match_op_dup 5 [(match_dup 6) (match_dup 7)])) + (clobber (reg:CC CC_REGNUM))])] + { + if (!gen_operands_ldrd_strd (operands, true, false, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + +;; TODO: Handle LDRD/STRD with writeback: +;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY +;; (b) Patterns may be followed by an update of the base address. diff --git a/gcc-4.9/gcc/config/arm/linux-eabi.h b/gcc-4.9/gcc/config/arm/linux-eabi.h new file mode 100644 index 000000000..f1f3448f1 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/linux-eabi.h @@ -0,0 +1,122 @@ +/* Configuration file for ARM GNU/Linux EABI targets. + Copyright (C) 2004-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* On EABI GNU/Linux, we want both the BPABI builtins and the + GNU/Linux builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + TARGET_BPABI_CPP_BUILTINS(); \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + ANDROID_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (false) + +/* We default to a soft-float ABI so that binaries can run on all + target hardware. If you override this to use the hard-float ABI then + change the setting of GLIBC_DYNAMIC_LINKER_DEFAULT as well. */ +#undef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT + +/* We default to the "aapcs-linux" ABI so that enums are int-sized by + default. */ +#undef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX + +/* Default to armv5t so that thumb shared libraries work. + The ARM10TDMI core is the default for armv5t, so set + SUBTARGET_CPU_DEFAULT to achieve this. */ +#undef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi + +/* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +#undef TARGET_LINKER_EMULATION +#if TARGET_BIG_ENDIAN_DEFAULT +#define TARGET_LINKER_EMULATION "armelfb_linux_eabi" +#else +#define TARGET_LINKER_EMULATION "armelf_linux_eabi" +#endif + +#undef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION + +/* GNU/Linux on ARM currently supports three dynamic linkers: + - ld-linux.so.2 - for the legacy ABI + - ld-linux.so.3 - for the EABI-derived soft-float ABI + - ld-linux-armhf.so.3 - for the EABI-derived hard-float ABI. + All the dynamic linkers live in /lib. + We default to soft-float, but this can be overridden by changing both + GLIBC_DYNAMIC_LINKER_DEFAULT and TARGET_DEFAULT_FLOAT_ABI. */ + +#undef GLIBC_DYNAMIC_LINKER +#define GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "/lib/ld-linux.so.3" +#define GLIBC_DYNAMIC_LINKER_HARD_FLOAT "/lib/ld-linux-armhf.so.3" +#define GLIBC_DYNAMIC_LINKER_DEFAULT GLIBC_DYNAMIC_LINKER_SOFT_FLOAT + +#define GLIBC_DYNAMIC_LINKER \ + "%{mfloat-abi=hard:" GLIBC_DYNAMIC_LINKER_HARD_FLOAT "} \ + %{mfloat-abi=soft*:" GLIBC_DYNAMIC_LINKER_SOFT_FLOAT "} \ + %{!mfloat-abi=*:" GLIBC_DYNAMIC_LINKER_DEFAULT "}" + +/* At this point, bpabi.h will have clobbered LINK_SPEC. We want to + use the GNU/Linux version, not the generic BPABI version. */ +#undef LINK_SPEC +#define LINK_SPEC EABI_LINK_SPEC \ + LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \ + LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) + +#undef ASAN_CC1_SPEC +#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}" + +#undef CC1_SPEC +#define CC1_SPEC \ + LINUX_OR_ANDROID_CC (GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC, \ + GNU_USER_TARGET_CC1_SPEC " " ASAN_CC1_SPEC " " \ + ANDROID_CC1_SPEC) + +#define CC1PLUS_SPEC \ + LINUX_OR_ANDROID_CC ("", ANDROID_CC1PLUS_SPEC) + +#undef LIB_SPEC +#define LIB_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \ + GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC) + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC \ + LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC) + +/* Use the default LIBGCC_SPEC, not the version in linux-elf.h, as we + do not use -lfloat. */ +#undef LIBGCC_SPEC + +/* Clear the instruction cache from `beg' to `end'. This is + implemented in lib1funcs.S, so ensure an error if this definition + is used. */ +#undef CLEAR_INSN_CACHE +#define CLEAR_INSN_CACHE(BEG, END) not_used + +#define ARM_TARGET2_DWARF_FORMAT (DW_EH_PE_pcrel | DW_EH_PE_indirect) diff --git a/gcc-4.9/gcc/config/arm/linux-elf.h b/gcc-4.9/gcc/config/arm/linux-elf.h new file mode 100644 index 000000000..5dc3328e8 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/linux-elf.h @@ -0,0 +1,115 @@ +/* Definitions for ARM running Linux-based GNU systems using ELF + Copyright (C) 1993-2014 Free Software Foundation, Inc. + Contributed by Philip Blundell + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* elfos.h should have already been included. Now just override + any conflicting definitions and add any extras. */ + +/* Run-time Target Specification. */ +#undef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD + +/* TARGET_BIG_ENDIAN_DEFAULT is set in + config.gcc for big endian configurations. */ +#if TARGET_BIG_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT MASK_BIG_END +#define TARGET_ENDIAN_OPTION "mbig-endian" +#define TARGET_LINKER_EMULATION "armelfb_linux" +#else +#define TARGET_ENDIAN_DEFAULT 0 +#define TARGET_ENDIAN_OPTION "mlittle-endian" +#define TARGET_LINKER_EMULATION "armelf_linux" +#endif + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (TARGET_ENDIAN_DEFAULT) + +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm6 + +#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION " -p" + +/* We do not have any MULTILIB_OPTIONS specified, so there are no + MULTILIB_DEFAULTS. */ +#undef MULTILIB_DEFAULTS + +/* Now we define the strings used to build the spec file. */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#define LIBGCC_SPEC "%{mfloat-abi=soft*:-lfloat} -lgcc" + +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" + +#define LINUX_TARGET_LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ + %{shared:-shared} \ + %{symbolic:-Bsymbolic} \ + %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \ + -X \ + %{mbig-endian:-EB} %{mlittle-endian:-EL}" \ + SUBTARGET_EXTRA_LINK_SPEC + +#undef LINK_SPEC +#define LINK_SPEC LINUX_TARGET_LINK_SPEC + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + } \ + while (0) + +/* This is how we tell the assembler that two symbols have the same value. */ +#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \ + do \ + { \ + assemble_name (FILE, NAME1); \ + fputs (" = ", FILE); \ + assemble_name (FILE, NAME2); \ + fputc ('\n', FILE); \ + } \ + while (0) + +#undef FPUTYPE_DEFAULT +#define FPUTYPE_DEFAULT "vfp" + +/* Call the function profiler with a given profile label. */ +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + fprintf (STREAM, "\tbl\tmcount%s\n", \ + (TARGET_ARM && NEED_PLT_RELOC) ? "(PLT)" : ""); \ +} + +/* The GNU/Linux profiler clobbers the link register. Make sure the + prologue knows to save it. */ +#define PROFILE_HOOK(X) \ + emit_clobber (gen_rtx_REG (SImode, LR_REGNUM)) + +/* The GNU/Linux profiler needs a frame pointer. */ +#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile + +/* Add .note.GNU-stack. */ +#undef NEED_INDICATE_EXEC_STACK +#define NEED_INDICATE_EXEC_STACK 1 diff --git a/gcc-4.9/gcc/config/arm/linux-gas.h b/gcc-4.9/gcc/config/arm/linux-gas.h new file mode 100644 index 000000000..52a739c26 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/linux-gas.h @@ -0,0 +1,55 @@ +/* Definitions of target machine for GNU compiler. + ARM Linux-based GNU systems version. + Copyright (C) 1997-2014 Free Software Foundation, Inc. + Contributed by Russell King . + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This is how we tell the assembler that a symbol is weak. + GAS always supports weak symbols. */ + +/* Unsigned chars produces much better code than signed. */ +#define DEFAULT_SIGNED_CHAR 0 + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}" + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* Use the AAPCS type for wchar_t, or the previous Linux default for + non-AAPCS. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE (TARGET_AAPCS_BASED ? "unsigned int" : "long int") + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +/* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ +#define CLEAR_INSN_CACHE(BEG, END) \ +{ \ + register unsigned long _beg __asm ("a1") = (unsigned long) (BEG); \ + register unsigned long _end __asm ("a2") = (unsigned long) (END); \ + register unsigned long _flg __asm ("a3") = 0; \ + __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" \ + : "=r" (_beg) \ + : "0" (_beg), "r" (_end), "r" (_flg)); \ +} diff --git a/gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md b/gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md new file mode 100644 index 000000000..9968803ca --- /dev/null +++ b/gcc-4.9/gcc/config/arm/marvell-f-iwmmxt.md @@ -0,0 +1,189 @@ +;; Marvell WMMX2 pipeline description +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; Written by Marvell, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +(define_automaton "marvell_f_iwmmxt") + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Pipelines +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; This is a 7-stage pipelines: +;; +;; MD | MI | ME1 | ME2 | ME3 | ME4 | MW +;; +;; There are various bypasses modelled to a greater or lesser extent. +;; +;; Latencies in this file correspond to the number of cycles after +;; the issue stage that it takes for the result of the instruction to +;; be computed, or for its side-effects to occur. + +(define_cpu_unit "mf_iwmmxt_MD" "marvell_f_iwmmxt") +(define_cpu_unit "mf_iwmmxt_MI" "marvell_f_iwmmxt") +(define_cpu_unit "mf_iwmmxt_ME1" "marvell_f_iwmmxt") +(define_cpu_unit "mf_iwmmxt_ME2" "marvell_f_iwmmxt") +(define_cpu_unit "mf_iwmmxt_ME3" "marvell_f_iwmmxt") +(define_cpu_unit "mf_iwmmxt_ME4" "marvell_f_iwmmxt") +(define_cpu_unit "mf_iwmmxt_MW" "marvell_f_iwmmxt") + +(define_reservation "mf_iwmmxt_ME" + "mf_iwmmxt_ME1,mf_iwmmxt_ME2,mf_iwmmxt_ME3,mf_iwmmxt_ME4" +) + +(define_reservation "mf_iwmmxt_pipeline" + "mf_iwmmxt_MD, mf_iwmmxt_MI, mf_iwmmxt_ME, mf_iwmmxt_MW" +) + +;; An attribute to indicate whether our reservations are applicable. +(define_attr "marvell_f_iwmmxt" "yes,no" + (const (if_then_else (symbol_ref "arm_arch_iwmmxt") + (const_string "yes") (const_string "no")))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; instruction classes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; An attribute appended to instructions for classification + +(define_attr "wmmxt_shift" "yes,no" + (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_pack" "yes,no" + (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\ + wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\ + wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_mult_c1" "yes,no" + (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\ + wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\ + wmmx_wqmulwm") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_mult_c2" "yes,no" + (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_alu_c1" "yes,no" + (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\ + wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_alu_c2" "yes,no" + (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\ + wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\ + wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\ + wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_alu_c3" "yes,no" + (if_then_else (eq_attr "type" "wmmx_wsad") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_transfer_c1" "yes,no" + (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\ + wmmx_tmcr, wmmx_tmcrr") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_transfer_c2" "yes,no" + (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\ + wmmx_tmrc, wmmx_tmrrc") + (const_string "yes") (const_string "no")) +) + +(define_attr "wmmxt_transfer_c3" "yes,no" + (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy") + (const_string "yes") (const_string "no")) +) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Main description +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_reservation "marvell_f_iwmmxt_alu_c1" 1 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_alu_c1" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_pack" 1 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_pack" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_shift" 1 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_shift" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_transfer_c1" 1 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_transfer_c1" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_transfer_c2" 5 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_transfer_c2" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_alu_c2" 2 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_alu_c2" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_alu_c3" 3 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_alu_c3" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_transfer_c3" 4 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_transfer_c3" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_mult_c1" 4 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_mult_c1" "yes")) + "mf_iwmmxt_pipeline") + +;There is a forwarding path from ME3 stage +(define_insn_reservation "marvell_f_iwmmxt_mult_c2" 3 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "wmmxt_mult_c2" "yes")) + "mf_iwmmxt_pipeline") + +(define_insn_reservation "marvell_f_iwmmxt_wstr" 0 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "type" "wmmx_wstr")) + "mf_iwmmxt_pipeline") + +;There is a forwarding path from MW stage +(define_insn_reservation "marvell_f_iwmmxt_wldr" 5 + (and (eq_attr "marvell_f_iwmmxt" "yes") + (eq_attr "type" "wmmx_wldr")) + "mf_iwmmxt_pipeline") diff --git a/gcc-4.9/gcc/config/arm/marvell-pj4.md b/gcc-4.9/gcc/config/arm/marvell-pj4.md new file mode 100644 index 000000000..0b9d6ebad --- /dev/null +++ b/gcc-4.9/gcc/config/arm/marvell-pj4.md @@ -0,0 +1,232 @@ +;; Marvell ARM Processor Pipeline Description +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Marvell. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Pipeline description for the Marvell PJ4, aka "Flareon". +(define_automaton "pj4") + +;; Issue resources +(define_cpu_unit "pj4_is1,pj4_is2" "pj4") +(define_reservation "pj4_is" "(pj4_is1|pj4_is2)") +(define_reservation "pj4_isb" "(pj4_is1+pj4_is2)") + +;; Functional units +(define_cpu_unit "pj4_alu1,pj4_alu2,pj4_mul,pj4_div" "pj4") + +;; Completion ports +(define_cpu_unit "pj4_w1,pj4_w2" "pj4") + +;; Complete/Retire control +(define_cpu_unit "pj4_c1,pj4_c2" "pj4") +(define_reservation "pj4_cp" "(pj4_c1|pj4_c2)") +(define_reservation "pj4_cpb" "(pj4_c1+pj4_c2)") + +;; Integer arithmetic instructions + +(define_insn_reservation "pj4_alu_e1" 1 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "mov_imm,mov_reg,mvn_imm,mvn_reg") + (not (eq_attr "conds" "set"))) + "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_alu_e1_conds" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "mov_imm,mov_reg,mvn_imm,mvn_reg") + (eq_attr "conds" "set")) + "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_alu" 1 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\ + logic_imm,logics_imm,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg") + (not (eq_attr "conds" "set"))) + "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_alu_conds" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "alu_imm,alus_imm,alu_reg,alus_reg,\ + logic_imm,logics_imm,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg") + (eq_attr "conds" "set")) + "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_shift" 1 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "alu_shift_imm,logic_shift_imm,\ + alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,\ + alus_shift_reg,logics_shift_reg,\ + extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg") + (not (eq_attr "conds" "set")) + (eq_attr "shift" "1")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_shift_conds" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "alu_shift_imm,logic_shift_imm,\ + alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,\ + alus_shift_reg,logics_shift_reg,\ + extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg") + (eq_attr "conds" "set") + (eq_attr "shift" "1")) "pj4_is,(pj4_alu1,pj4_w1+pj4_cp)|(pj4_alu2,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_alu_shift" 1 + (and (eq_attr "tune" "marvell_pj4") + (not (eq_attr "conds" "set")) + (eq_attr "type" "alu_shift_imm,logic_shift_imm,\ + alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,\ + alus_shift_reg,logics_shift_reg,\ + extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")) + "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)") + +(define_insn_reservation "pj4_alu_shift_conds" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "conds" "set") + (eq_attr "type" "alu_shift_imm,logic_shift_imm,alus_shift_imm,logics_shift_imm,\ + alu_shift_reg,logic_shift_reg,alus_shift_reg,logics_shift_reg,\ + extend,\ + mov_shift,mvn_shift,mov_shift_reg,mvn_shift_reg")) + "pj4_is,(pj4_alu1,nothing,pj4_w1+pj4_cp)|(pj4_alu2,nothing,pj4_w2+pj4_cp)") + +(define_bypass 2 "pj4_alu_shift,pj4_shift" + "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp") + +(define_insn_reservation "pj4_ir_mul" 3 + (and (eq_attr "tune" "marvell_pj4") + (ior (eq_attr "mul32" "yes") + (eq_attr "mul64" "yes"))) + "pj4_is,pj4_mul,nothing*2,pj4_cp") + +(define_insn_reservation "pj4_ir_div" 20 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp") + +;; Branches and calls. + +(define_insn_reservation "pj4_branches" 0 + (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "branch")) "pj4_is") + +(define_insn_reservation "pj4_calls" 32 + (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "call")) "pj4_is") + +;; Load/store instructions + +(define_insn_reservation "pj4_ldr" 3 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "load_byte,load1")) + "pj4_is,pj4_alu1,nothing*2,pj4_cp") + +(define_insn_reservation "pj4_ldrd" 3 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "load2")) + "pj4_is,pj4_alu1,nothing*2,pj4_cpb") + +(define_insn_reservation "pj4_str" 1 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "store1")) + "pj4_is,pj4_alu1,nothing*2,pj4_cp") + +(define_insn_reservation "pj4_strd" 1 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "store2")) + "pj4_is,pj4_alu1,nothing*2,pj4_cpb") + +(define_insn_reservation "pj4_ldm" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "load3,load4")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp") + +(define_insn_reservation "pj4_stm" 2 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "store3,store4")) "pj4_isb,pj4_isb+pj4_alu1,pj4_alu1,nothing,pj4_cp,pj4_cp") + +;; Loads forward at WR-stage to ALU pipes +(define_bypass 2 "pj4_ldr,pj4_ldrd" "pj4_alu") +(define_bypass 2 "pj4_ldr,pj4_ldrd" "pj4_alu_shift" "arm_no_early_alu_shift_dep") + +(define_bypass 4 "pj4_ldr,pj4_ldrd" "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp") +(define_bypass 5 "pj4_ldm" "pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp") + +;; Loads to stores can back-to-back forward +(define_bypass 1 "pj4_ldr,pj4_ldrd" "pj4_str,pj4_strd" "arm_no_early_store_addr_dep") + +;; PJ4 VFP floating point unit +(define_automaton "pj4_vfp") + +(define_cpu_unit "vissue" "pj4_vfp") +(define_cpu_unit "vadd" "pj4_vfp") +(define_cpu_unit "vmul" "pj4_vfp") +(define_cpu_unit "vdiv" "pj4_vfp") +(define_cpu_unit "vfast" "pj4_vfp") + +(define_insn_reservation "pj4_vfp_add" 5 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "fadds,faddd")) "pj4_is,nothing*2,vissue,vadd,nothing*3") + +(define_insn_reservation "pj4_vfp_mul" 6 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "fmuls,fmuld")) "pj4_is,nothing*2,vissue,vmul,nothing*4") + +(define_insn_reservation "pj4_vfp_divs" 20 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "fdivs, fsqrts")) "pj4_is,nothing*2,vissue,vdiv*18,nothing") + +(define_insn_reservation "pj4_vfp_divd" 34 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "fdivd, fsqrtd")) "pj4_is,nothing*2,vissue,vdiv*32,nothing") + +(define_insn_reservation "pj4_vfp_mac" 9 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "fmacs,fmacd")) + "pj4_is,nothing*2,vissue,vmul,nothing*3,vadd,nothing*3") + +(define_bypass 5 "pj4_vfp_mac" "pj4_vfp_mac" "arm_no_early_mul_dep") + +(define_insn_reservation "pj4_vfp_cpy" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,\ + fcmps,fcmpd,f_cvt,f_cvtf2i,f_cvti2f")) +"pj4_is,nothing*2,vissue,vfast,nothing*2") + +;; Enlarge latency, and wish that more nondependent insns are +;; scheduled immediately after VFP load. +(define_insn_reservation "pj4_vfp_load" 4 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "f_loads,f_loadd")) "pj4_isb,pj4_alu1,nothing,vissue,pj4_cp") + +(define_insn_reservation "pj4_vfp_store" 1 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "f_stores,f_stored")) "pj4_isb,pj4_alu1,nothing,vissue,pj4_cp") + +(define_insn_reservation "pj4_vfp_to_core" 7 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "f_mrc,f_mrrc,f_flag")) "pj4_isb,nothing,nothing,vissue,vfast,nothing*2") + +(define_insn_reservation "pj4_core_to_vfp" 2 + (and (eq_attr "tune" "marvell_pj4") + (eq_attr "type" "f_mcr,f_mcrr")) "pj4_isb,pj4_alu1,pj4_w1,vissue,pj4_cp") + diff --git a/gcc-4.9/gcc/config/arm/mmintrin.h b/gcc-4.9/gcc/config/arm/mmintrin.h new file mode 100644 index 000000000..b906faca4 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/mmintrin.h @@ -0,0 +1,1836 @@ +/* Copyright (C) 2002-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _MMINTRIN_H_INCLUDED +#define _MMINTRIN_H_INCLUDED + +#ifndef __IWMMXT__ +#error mmintrin.h included without enabling WMMX/WMMX2 instructions (e.g. -march=iwmmxt or -march=iwmmxt2) +#endif + + +#if defined __cplusplus +extern "C" { +/* Intrinsics use C name-mangling. */ +#endif /* __cplusplus */ + +/* The data type intended for user use. */ +typedef unsigned long long __m64, __int64; + +/* Internal data types for implementing the intrinsics. */ +typedef int __v2si __attribute__ ((vector_size (8))); +typedef short __v4hi __attribute__ ((vector_size (8))); +typedef signed char __v8qi __attribute__ ((vector_size (8))); + +/* Provided for source compatibility with MMX. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_empty (void) +{ +} + +/* "Convert" __m64 and __int64 into each other. */ +static __inline __m64 +_mm_cvtsi64_m64 (__int64 __i) +{ + return __i; +} + +static __inline __int64 +_mm_cvtm64_si64 (__m64 __i) +{ + return __i; +} + +static __inline int +_mm_cvtsi64_si32 (__int64 __i) +{ + return __i; +} + +static __inline __int64 +_mm_cvtsi32_si64 (int __i) +{ + return (__i & 0xffffffff); +} + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with signed saturation. */ +static __inline __m64 +_mm_packs_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackhss ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of + the result, and the two 32-bit values from M2 into the upper two 16-bit + values of the result, all with signed saturation. */ +static __inline __m64 +_mm_packs_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackwss ((__v2si)__m1, (__v2si)__m2); +} + +/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and + the 64-bit value from M2 into the upper 32-bits of the result, all with + signed saturation for values that do not fit exactly into 32-bits. */ +static __inline __m64 +_mm_packs_pi64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackdss ((long long)__m1, (long long)__m2); +} + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with unsigned saturation. */ +static __inline __m64 +_mm_packs_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackhus ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Pack the two 32-bit values from M1 into the lower two 16-bit values of + the result, and the two 32-bit values from M2 into the upper two 16-bit + values of the result, all with unsigned saturation. */ +static __inline __m64 +_mm_packs_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackwus ((__v2si)__m1, (__v2si)__m2); +} + +/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and + the 64-bit value from M2 into the upper 32-bits of the result, all with + unsigned saturation for values that do not fit exactly into 32-bits. */ +static __inline __m64 +_mm_packs_pu64 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wpackdus ((long long)__m1, (long long)__m2); +} + +/* Interleave the four 8-bit values from the high half of M1 with the four + 8-bit values from the high half of M2. */ +static __inline __m64 +_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckihb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Interleave the two 16-bit values from the high half of M1 with the two + 16-bit values from the high half of M2. */ +static __inline __m64 +_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckihh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Interleave the 32-bit value from the high half of M1 with the 32-bit + value from the high half of M2. */ +static __inline __m64 +_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckihw ((__v2si)__m1, (__v2si)__m2); +} + +/* Interleave the four 8-bit values from the low half of M1 with the four + 8-bit values from the low half of M2. */ +static __inline __m64 +_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckilb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Interleave the two 16-bit values from the low half of M1 with the two + 16-bit values from the low half of M2. */ +static __inline __m64 +_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckilh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Interleave the 32-bit value from the low half of M1 with the 32-bit + value from the low half of M2. */ +static __inline __m64 +_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wunpckilw ((__v2si)__m1, (__v2si)__m2); +} + +/* Take the four 8-bit values from the low half of M1, sign extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackel_pi8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelsb ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the low half of M1, sign extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackel_pi16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelsh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the low half of M1, and return it sign extended + to 64 bits. */ +static __inline __m64 +_mm_unpackel_pi32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelsw ((__v2si)__m1); +} + +/* Take the four 8-bit values from the high half of M1, sign extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pi8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehsb ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the high half of M1, sign extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pi16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehsh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the high half of M1, and return it sign extended + to 64 bits. */ +static __inline __m64 +_mm_unpackeh_pi32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehsw ((__v2si)__m1); +} + +/* Take the four 8-bit values from the low half of M1, zero extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackel_pu8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckelub ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the low half of M1, zero extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackel_pu16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckeluh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the low half of M1, and return it zero extended + to 64 bits. */ +static __inline __m64 +_mm_unpackel_pu32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckeluw ((__v2si)__m1); +} + +/* Take the four 8-bit values from the high half of M1, zero extend them, + and return the result as a vector of four 16-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pu8 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehub ((__v8qi)__m1); +} + +/* Take the two 16-bit values from the high half of M1, zero extend them, + and return the result as a vector of two 32-bit quantities. */ +static __inline __m64 +_mm_unpackeh_pu16 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehuh ((__v4hi)__m1); +} + +/* Take the 32-bit value from the high half of M1, and return it zero extended + to 64 bits. */ +static __inline __m64 +_mm_unpackeh_pu32 (__m64 __m1) +{ + return (__m64) __builtin_arm_wunpckehuw ((__v2si)__m1); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2. */ +static __inline __m64 +_mm_add_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2. */ +static __inline __m64 +_mm_add_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2. */ +static __inline __m64 +_mm_add_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddw ((__v2si)__m1, (__v2si)__m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddbss ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddhss ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2 using signed + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddwss ((__v2si)__m1, (__v2si)__m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddbus ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddhus ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2 using unsigned + saturated arithmetic. */ +static __inline __m64 +_mm_adds_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_waddwus ((__v2si)__m1, (__v2si)__m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ +static __inline __m64 +_mm_sub_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubb ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ +static __inline __m64 +_mm_sub_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ +static __inline __m64 +_mm_sub_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubw ((__v2si)__m1, (__v2si)__m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed + saturating arithmetic. */ +static __inline __m64 +_mm_subs_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubbss ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + signed saturating arithmetic. */ +static __inline __m64 +_mm_subs_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubhss ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using + signed saturating arithmetic. */ +static __inline __m64 +_mm_subs_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubwss ((__v2si)__m1, (__v2si)__m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using + unsigned saturating arithmetic. */ +static __inline __m64 +_mm_subs_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubbus ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + unsigned saturating arithmetic. */ +static __inline __m64 +_mm_subs_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubhus ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using + unsigned saturating arithmetic. */ +static __inline __m64 +_mm_subs_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wsubwus ((__v2si)__m1, (__v2si)__m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing + four 32-bit intermediate results, which are then summed by pairs to + produce two 32-bit results. */ +static __inline __m64 +_mm_madd_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmadds ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing + four 32-bit intermediate results, which are then summed by pairs to + produce two 32-bit results. */ +static __inline __m64 +_mm_madd_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmaddu ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in + M2 and produce the high 16 bits of the 32-bit results. */ +static __inline __m64 +_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmulsm ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in + M2 and produce the high 16 bits of the 32-bit results. */ +static __inline __m64 +_mm_mulhi_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmulum ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce + the low 16 bits of the results. */ +static __inline __m64 +_mm_mullo_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wmulul ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Shift four 16-bit values in M left by COUNT. */ +static __inline __m64 +_mm_sll_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsllh ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_slli_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsllhi ((__v4hi)__m, __count); +} + +/* Shift two 32-bit values in M left by COUNT. */ +static __inline __m64 +_mm_sll_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsllw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_slli_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsllwi ((__v2si)__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT. */ +static __inline __m64 +_mm_sll_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wslld (__m, __count); +} + +static __inline __m64 +_mm_slli_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wslldi (__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ +static __inline __m64 +_mm_sra_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrah ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_srai_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrahi ((__v4hi)__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ +static __inline __m64 +_mm_sra_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsraw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_srai_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrawi ((__v2si)__m, __count); +} + +/* Shift the 64-bit value in M right by COUNT; shift in the sign bit. */ +static __inline __m64 +_mm_sra_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrad (__m, __count); +} + +static __inline __m64 +_mm_srai_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsradi (__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ +static __inline __m64 +_mm_srl_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrlh ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_srli_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrlhi ((__v4hi)__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ +static __inline __m64 +_mm_srl_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrlw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_srli_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrlwi ((__v2si)__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ +static __inline __m64 +_mm_srl_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wsrld (__m, __count); +} + +static __inline __m64 +_mm_srli_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wsrldi (__m, __count); +} + +/* Rotate four 16-bit values in M right by COUNT. */ +static __inline __m64 +_mm_ror_pi16 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wrorh ((__v4hi)__m, __count); +} + +static __inline __m64 +_mm_rori_pi16 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wrorhi ((__v4hi)__m, __count); +} + +/* Rotate two 32-bit values in M right by COUNT. */ +static __inline __m64 +_mm_ror_pi32 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wrorw ((__v2si)__m, __count); +} + +static __inline __m64 +_mm_rori_pi32 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wrorwi ((__v2si)__m, __count); +} + +/* Rotate two 64-bit values in M right by COUNT. */ +static __inline __m64 +_mm_ror_si64 (__m64 __m, __m64 __count) +{ + return (__m64) __builtin_arm_wrord (__m, __count); +} + +static __inline __m64 +_mm_rori_si64 (__m64 __m, int __count) +{ + return (__m64) __builtin_arm_wrordi (__m, __count); +} + +/* Bit-wise AND the 64-bit values in M1 and M2. */ +static __inline __m64 +_mm_and_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wand (__m1, __m2); +} + +/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the + 64-bit value in M2. */ +static __inline __m64 +_mm_andnot_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wandn (__m2, __m1); +} + +/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ +static __inline __m64 +_mm_or_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wor (__m1, __m2); +} + +/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ +static __inline __m64 +_mm_xor_si64 (__m64 __m1, __m64 __m2) +{ + return __builtin_arm_wxor (__m1, __m2); +} + +/* Compare eight 8-bit values. The result of the comparison is 0xFF if the + test is true and zero if false. */ +static __inline __m64 +_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpeqb ((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtsb ((__v8qi)__m1, (__v8qi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pu8 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtub ((__v8qi)__m1, (__v8qi)__m2); +} + +/* Compare four 16-bit values. The result of the comparison is 0xFFFF if + the test is true and zero if false. */ +static __inline __m64 +_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpeqh ((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtsh ((__v4hi)__m1, (__v4hi)__m2); +} + +static __inline __m64 +_mm_cmpgt_pu16 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtuh ((__v4hi)__m1, (__v4hi)__m2); +} + +/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if + the test is true and zero if false. */ +static __inline __m64 +_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpeqw ((__v2si)__m1, (__v2si)__m2); +} + +static __inline __m64 +_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtsw ((__v2si)__m1, (__v2si)__m2); +} + +static __inline __m64 +_mm_cmpgt_pu32 (__m64 __m1, __m64 __m2) +{ + return (__m64) __builtin_arm_wcmpgtuw ((__v2si)__m1, (__v2si)__m2); +} + +/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed + by accumulate across all elements and __A. */ +static __inline __m64 +_mm_mac_pu16 (__m64 __A, __m64 __B, __m64 __C) +{ + return __builtin_arm_wmacu (__A, (__v4hi)__B, (__v4hi)__C); +} + +/* Element-wise multiplication of signed 16-bit values __B and __C, followed + by accumulate across all elements and __A. */ +static __inline __m64 +_mm_mac_pi16 (__m64 __A, __m64 __B, __m64 __C) +{ + return __builtin_arm_wmacs (__A, (__v4hi)__B, (__v4hi)__C); +} + +/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed + by accumulate across all elements. */ +static __inline __m64 +_mm_macz_pu16 (__m64 __A, __m64 __B) +{ + return __builtin_arm_wmacuz ((__v4hi)__A, (__v4hi)__B); +} + +/* Element-wise multiplication of signed 16-bit values __B and __C, followed + by accumulate across all elements. */ +static __inline __m64 +_mm_macz_pi16 (__m64 __A, __m64 __B) +{ + return __builtin_arm_wmacsz ((__v4hi)__A, (__v4hi)__B); +} + +/* Accumulate across all unsigned 8-bit values in __A. */ +static __inline __m64 +_mm_acc_pu8 (__m64 __A) +{ + return __builtin_arm_waccb ((__v8qi)__A); +} + +/* Accumulate across all unsigned 16-bit values in __A. */ +static __inline __m64 +_mm_acc_pu16 (__m64 __A) +{ + return __builtin_arm_wacch ((__v4hi)__A); +} + +/* Accumulate across all unsigned 32-bit values in __A. */ +static __inline __m64 +_mm_acc_pu32 (__m64 __A) +{ + return __builtin_arm_waccw ((__v2si)__A); +} + +static __inline __m64 +_mm_mia_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmia (__A, __B, __C); +} + +static __inline __m64 +_mm_miaph_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiaph (__A, __B, __C); +} + +static __inline __m64 +_mm_miabb_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiabb (__A, __B, __C); +} + +static __inline __m64 +_mm_miabt_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiabt (__A, __B, __C); +} + +static __inline __m64 +_mm_miatb_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiatb (__A, __B, __C); +} + +static __inline __m64 +_mm_miatt_si64 (__m64 __A, int __B, int __C) +{ + return __builtin_arm_tmiatt (__A, __B, __C); +} + +/* Extract one of the elements of A and sign extend. The selector N must + be immediate. */ +#define _mm_extract_pi8(A, N) __builtin_arm_textrmsb ((__v8qi)(A), (N)) +#define _mm_extract_pi16(A, N) __builtin_arm_textrmsh ((__v4hi)(A), (N)) +#define _mm_extract_pi32(A, N) __builtin_arm_textrmsw ((__v2si)(A), (N)) + +/* Extract one of the elements of A and zero extend. The selector N must + be immediate. */ +#define _mm_extract_pu8(A, N) __builtin_arm_textrmub ((__v8qi)(A), (N)) +#define _mm_extract_pu16(A, N) __builtin_arm_textrmuh ((__v4hi)(A), (N)) +#define _mm_extract_pu32(A, N) __builtin_arm_textrmuw ((__v2si)(A), (N)) + +/* Inserts word D into one of the elements of A. The selector N must be + immediate. */ +#define _mm_insert_pi8(A, D, N) \ + ((__m64) __builtin_arm_tinsrb ((__v8qi)(A), (D), (N))) +#define _mm_insert_pi16(A, D, N) \ + ((__m64) __builtin_arm_tinsrh ((__v4hi)(A), (D), (N))) +#define _mm_insert_pi32(A, D, N) \ + ((__m64) __builtin_arm_tinsrw ((__v2si)(A), (D), (N))) + +/* Compute the element-wise maximum of signed 8-bit values. */ +static __inline __m64 +_mm_max_pi8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxsb ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise maximum of signed 16-bit values. */ +static __inline __m64 +_mm_max_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxsh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise maximum of signed 32-bit values. */ +static __inline __m64 +_mm_max_pi32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxsw ((__v2si)__A, (__v2si)__B); +} + +/* Compute the element-wise maximum of unsigned 8-bit values. */ +static __inline __m64 +_mm_max_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxub ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise maximum of unsigned 16-bit values. */ +static __inline __m64 +_mm_max_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxuh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise maximum of unsigned 32-bit values. */ +static __inline __m64 +_mm_max_pu32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wmaxuw ((__v2si)__A, (__v2si)__B); +} + +/* Compute the element-wise minimum of signed 16-bit values. */ +static __inline __m64 +_mm_min_pi8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminsb ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise minimum of signed 16-bit values. */ +static __inline __m64 +_mm_min_pi16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminsh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise minimum of signed 32-bit values. */ +static __inline __m64 +_mm_min_pi32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminsw ((__v2si)__A, (__v2si)__B); +} + +/* Compute the element-wise minimum of unsigned 16-bit values. */ +static __inline __m64 +_mm_min_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminub ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the element-wise minimum of unsigned 16-bit values. */ +static __inline __m64 +_mm_min_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminuh ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the element-wise minimum of unsigned 32-bit values. */ +static __inline __m64 +_mm_min_pu32 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wminuw ((__v2si)__A, (__v2si)__B); +} + +/* Create an 8-bit mask of the signs of 8-bit values. */ +static __inline int +_mm_movemask_pi8 (__m64 __A) +{ + return __builtin_arm_tmovmskb ((__v8qi)__A); +} + +/* Create an 8-bit mask of the signs of 16-bit values. */ +static __inline int +_mm_movemask_pi16 (__m64 __A) +{ + return __builtin_arm_tmovmskh ((__v4hi)__A); +} + +/* Create an 8-bit mask of the signs of 32-bit values. */ +static __inline int +_mm_movemask_pi32 (__m64 __A) +{ + return __builtin_arm_tmovmskw ((__v2si)__A); +} + +/* Return a combination of the four 16-bit values in A. The selector + must be an immediate. */ +#define _mm_shuffle_pi16(A, N) \ + ((__m64) __builtin_arm_wshufh ((__v4hi)(A), (N))) + + +/* Compute the rounded averages of the unsigned 8-bit values in A and B. */ +static __inline __m64 +_mm_avg_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2br ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the rounded averages of the unsigned 16-bit values in A and B. */ +static __inline __m64 +_mm_avg_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2hr ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the averages of the unsigned 8-bit values in A and B. */ +static __inline __m64 +_mm_avg2_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2b ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the averages of the unsigned 16-bit values in A and B. */ +static __inline __m64 +_mm_avg2_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wavg2h ((__v4hi)__A, (__v4hi)__B); +} + +/* Compute the sum of the absolute differences of the unsigned 8-bit + values in A and B. Return the value in the lower 16-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sad_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B); +} + +static __inline __m64 +_mm_sada_pu8 (__m64 __A, __m64 __B, __m64 __C) +{ + return (__m64) __builtin_arm_wsadb ((__v2si)__A, (__v8qi)__B, (__v8qi)__C); +} + +/* Compute the sum of the absolute differences of the unsigned 16-bit + values in A and B. Return the value in the lower 32-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sad_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); +} + +static __inline __m64 +_mm_sada_pu16 (__m64 __A, __m64 __B, __m64 __C) +{ + return (__m64) __builtin_arm_wsadh ((__v2si)__A, (__v4hi)__B, (__v4hi)__C); +} + + +/* Compute the sum of the absolute differences of the unsigned 8-bit + values in A and B. Return the value in the lower 16-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sadz_pu8 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B); +} + +/* Compute the sum of the absolute differences of the unsigned 16-bit + values in A and B. Return the value in the lower 32-bit word; the + upper words are cleared. */ +static __inline __m64 +_mm_sadz_pu16 (__m64 __A, __m64 __B) +{ + return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B); +} + +#define _mm_align_si64(__A,__B, N) \ + (__m64) __builtin_arm_walign ((__v8qi) (__A),(__v8qi) (__B), (N)) + +/* Creates a 64-bit zero. */ +static __inline __m64 +_mm_setzero_si64 (void) +{ + return __builtin_arm_wzero (); +} + +/* Set and Get arbitrary iWMMXt Control registers. + Note only registers 0-3 and 8-11 are currently defined, + the rest are reserved. */ + +static __inline void +_mm_setwcx (const int __value, const int __regno) +{ + switch (__regno) + { + case 0: + __asm __volatile ("tmcr wcid, %0" :: "r"(__value)); + break; + case 1: + __asm __volatile ("tmcr wcon, %0" :: "r"(__value)); + break; + case 2: + __asm __volatile ("tmcr wcssf, %0" :: "r"(__value)); + break; + case 3: + __asm __volatile ("tmcr wcasf, %0" :: "r"(__value)); + break; + case 8: + __builtin_arm_setwcgr0 (__value); + break; + case 9: + __builtin_arm_setwcgr1 (__value); + break; + case 10: + __builtin_arm_setwcgr2 (__value); + break; + case 11: + __builtin_arm_setwcgr3 (__value); + break; + default: + break; + } +} + +static __inline int +_mm_getwcx (const int __regno) +{ + int __value; + switch (__regno) + { + case 0: + __asm __volatile ("tmrc %0, wcid" : "=r"(__value)); + break; + case 1: + __asm __volatile ("tmrc %0, wcon" : "=r"(__value)); + break; + case 2: + __asm __volatile ("tmrc %0, wcssf" : "=r"(__value)); + break; + case 3: + __asm __volatile ("tmrc %0, wcasf" : "=r"(__value)); + break; + case 8: + return __builtin_arm_getwcgr0 (); + case 9: + return __builtin_arm_getwcgr1 (); + case 10: + return __builtin_arm_getwcgr2 (); + case 11: + return __builtin_arm_getwcgr3 (); + default: + break; + } + return __value; +} + +/* Creates a vector of two 32-bit values; I0 is least significant. */ +static __inline __m64 +_mm_set_pi32 (int __i1, int __i0) +{ + union + { + __m64 __q; + struct + { + unsigned int __i0; + unsigned int __i1; + } __s; + } __u; + + __u.__s.__i0 = __i0; + __u.__s.__i1 = __i1; + + return __u.__q; +} + +/* Creates a vector of four 16-bit values; W0 is least significant. */ +static __inline __m64 +_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) +{ + unsigned int __i1 = (unsigned short) __w3 << 16 | (unsigned short) __w2; + unsigned int __i0 = (unsigned short) __w1 << 16 | (unsigned short) __w0; + + return _mm_set_pi32 (__i1, __i0); +} + +/* Creates a vector of eight 8-bit values; B0 is least significant. */ +static __inline __m64 +_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, + char __b3, char __b2, char __b1, char __b0) +{ + unsigned int __i1, __i0; + + __i1 = (unsigned char)__b7; + __i1 = __i1 << 8 | (unsigned char)__b6; + __i1 = __i1 << 8 | (unsigned char)__b5; + __i1 = __i1 << 8 | (unsigned char)__b4; + + __i0 = (unsigned char)__b3; + __i0 = __i0 << 8 | (unsigned char)__b2; + __i0 = __i0 << 8 | (unsigned char)__b1; + __i0 = __i0 << 8 | (unsigned char)__b0; + + return _mm_set_pi32 (__i1, __i0); +} + +/* Similar, but with the arguments in reverse order. */ +static __inline __m64 +_mm_setr_pi32 (int __i0, int __i1) +{ + return _mm_set_pi32 (__i1, __i0); +} + +static __inline __m64 +_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3) +{ + return _mm_set_pi16 (__w3, __w2, __w1, __w0); +} + +static __inline __m64 +_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, + char __b4, char __b5, char __b6, char __b7) +{ + return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); +} + +/* Creates a vector of two 32-bit values, both elements containing I. */ +static __inline __m64 +_mm_set1_pi32 (int __i) +{ + return _mm_set_pi32 (__i, __i); +} + +/* Creates a vector of four 16-bit values, all elements containing W. */ +static __inline __m64 +_mm_set1_pi16 (short __w) +{ + unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w; + return _mm_set1_pi32 (__i); +} + +/* Creates a vector of four 16-bit values, all elements containing B. */ +static __inline __m64 +_mm_set1_pi8 (char __b) +{ + unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; + unsigned int __i = __w << 16 | __w; + return _mm_set1_pi32 (__i); +} + +#ifdef __IWMMXT2__ +static __inline __m64 +_mm_abs_pi8 (__m64 m1) +{ + return (__m64) __builtin_arm_wabsb ((__v8qi)m1); +} + +static __inline __m64 +_mm_abs_pi16 (__m64 m1) +{ + return (__m64) __builtin_arm_wabsh ((__v4hi)m1); + +} + +static __inline __m64 +_mm_abs_pi32 (__m64 m1) +{ + return (__m64) __builtin_arm_wabsw ((__v2si)m1); + +} + +static __inline __m64 +_mm_addsubhx_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_waddsubhx ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_absdiff_pu8 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wabsdiffb ((__v8qi)a, (__v8qi)b); +} + +static __inline __m64 +_mm_absdiff_pu16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wabsdiffh ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_absdiff_pu32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wabsdiffw ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_addc_pu16 (__m64 a, __m64 b) +{ + __m64 result; + __asm__ __volatile__ ("waddhc %0, %1, %2" : "=y" (result) : "y" (a), "y" (b)); + return result; +} + +static __inline __m64 +_mm_addc_pu32 (__m64 a, __m64 b) +{ + __m64 result; + __asm__ __volatile__ ("waddwc %0, %1, %2" : "=y" (result) : "y" (a), "y" (b)); + return result; +} + +static __inline __m64 +_mm_avg4_pu8 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wavg4 ((__v8qi)a, (__v8qi)b); +} + +static __inline __m64 +_mm_avg4r_pu8 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wavg4r ((__v8qi)a, (__v8qi)b); +} + +static __inline __m64 +_mm_maddx_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmaddsx ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_maddx_pu16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmaddux ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_msub_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmaddsn ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_msub_pu16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmaddun ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_mulhi_pi32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulwsm ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_mulhi_pu32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulwum ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_mulhir_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulsmr ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_mulhir_pi32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulwsmr ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_mulhir_pu16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulumr ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_mulhir_pu32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulwumr ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_mullo_pi32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wmulwl ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_qmulm_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wqmulm ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_qmulm_pi32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wqmulwm ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_qmulmr_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wqmulmr ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_qmulmr_pi32 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wqmulwmr ((__v2si)a, (__v2si)b); +} + +static __inline __m64 +_mm_subaddhx_pi16 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_wsubaddhx ((__v4hi)a, (__v4hi)b); +} + +static __inline __m64 +_mm_addbhusl_pu8 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_waddbhusl ((__v4hi)a, (__v8qi)b); +} + +static __inline __m64 +_mm_addbhusm_pu8 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_waddbhusm ((__v4hi)a, (__v8qi)b); +} + +#define _mm_qmiabb_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiabb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiabbn_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiabbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiabt_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiabt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiabtn_pi32(acc, m1, m2) \ + ({\ + __m64 _acc=acc;\ + __m64 _m1=m1;\ + __m64 _m2=m2;\ + _acc = (__m64) __builtin_arm_wqmiabtn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiatb_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiatb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiatbn_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiatbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiatt_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiatt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_qmiattn_pi32(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wqmiattn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiabb_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiabb (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiabbn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiabbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiabt_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiabt (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiabtn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiabtn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiatb_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiatb (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiatbn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiatbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiatt_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiatt (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiattn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiattn (_acc, (__v4hi)_m1, (__v4hi)_m2);\ + _acc;\ + }) + +#define _mm_wmiawbb_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawbb (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawbbn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawbbn (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawbt_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawbt (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawbtn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawbtn (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawtb_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawtb (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawtbn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawtbn (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawtt_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawtt (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +#define _mm_wmiawttn_si64(acc, m1, m2) \ + ({\ + __m64 _acc = acc;\ + __m64 _m1 = m1;\ + __m64 _m2 = m2;\ + _acc = (__m64) __builtin_arm_wmiawttn (_acc, (__v2si)_m1, (__v2si)_m2);\ + _acc;\ + }) + +/* The third arguments should be an immediate. */ +#define _mm_merge_si64(a, b, n) \ + ({\ + __m64 result;\ + result = (__m64) __builtin_arm_wmerge ((__m64) (a), (__m64) (b), (n));\ + result;\ + }) +#endif /* __IWMMXT2__ */ + +static __inline __m64 +_mm_alignr0_si64 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_walignr0 ((__v8qi) a, (__v8qi) b); +} + +static __inline __m64 +_mm_alignr1_si64 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_walignr1 ((__v8qi) a, (__v8qi) b); +} + +static __inline __m64 +_mm_alignr2_si64 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_walignr2 ((__v8qi) a, (__v8qi) b); +} + +static __inline __m64 +_mm_alignr3_si64 (__m64 a, __m64 b) +{ + return (__m64) __builtin_arm_walignr3 ((__v8qi) a, (__v8qi) b); +} + +static __inline void +_mm_tandcb () +{ + __asm __volatile ("tandcb r15"); +} + +static __inline void +_mm_tandch () +{ + __asm __volatile ("tandch r15"); +} + +static __inline void +_mm_tandcw () +{ + __asm __volatile ("tandcw r15"); +} + +#define _mm_textrcb(n) \ + ({\ + __asm__ __volatile__ (\ + "textrcb r15, %0" : : "i" (n));\ + }) + +#define _mm_textrch(n) \ + ({\ + __asm__ __volatile__ (\ + "textrch r15, %0" : : "i" (n));\ + }) + +#define _mm_textrcw(n) \ + ({\ + __asm__ __volatile__ (\ + "textrcw r15, %0" : : "i" (n));\ + }) + +static __inline void +_mm_torcb () +{ + __asm __volatile ("torcb r15"); +} + +static __inline void +_mm_torch () +{ + __asm __volatile ("torch r15"); +} + +static __inline void +_mm_torcw () +{ + __asm __volatile ("torcw r15"); +} + +#ifdef __IWMMXT2__ +static __inline void +_mm_torvscb () +{ + __asm __volatile ("torvscb r15"); +} + +static __inline void +_mm_torvsch () +{ + __asm __volatile ("torvsch r15"); +} + +static __inline void +_mm_torvscw () +{ + __asm __volatile ("torvscw r15"); +} +#endif /* __IWMMXT2__ */ + +static __inline __m64 +_mm_tbcst_pi8 (int value) +{ + return (__m64) __builtin_arm_tbcstb ((signed char) value); +} + +static __inline __m64 +_mm_tbcst_pi16 (int value) +{ + return (__m64) __builtin_arm_tbcsth ((short) value); +} + +static __inline __m64 +_mm_tbcst_pi32 (int value) +{ + return (__m64) __builtin_arm_tbcstw (value); +} + +#define _m_empty _mm_empty +#define _m_packsswb _mm_packs_pi16 +#define _m_packssdw _mm_packs_pi32 +#define _m_packuswb _mm_packs_pu16 +#define _m_packusdw _mm_packs_pu32 +#define _m_packssqd _mm_packs_pi64 +#define _m_packusqd _mm_packs_pu64 +#define _mm_packs_si64 _mm_packs_pi64 +#define _mm_packs_su64 _mm_packs_pu64 +#define _m_punpckhbw _mm_unpackhi_pi8 +#define _m_punpckhwd _mm_unpackhi_pi16 +#define _m_punpckhdq _mm_unpackhi_pi32 +#define _m_punpcklbw _mm_unpacklo_pi8 +#define _m_punpcklwd _mm_unpacklo_pi16 +#define _m_punpckldq _mm_unpacklo_pi32 +#define _m_punpckehsbw _mm_unpackeh_pi8 +#define _m_punpckehswd _mm_unpackeh_pi16 +#define _m_punpckehsdq _mm_unpackeh_pi32 +#define _m_punpckehubw _mm_unpackeh_pu8 +#define _m_punpckehuwd _mm_unpackeh_pu16 +#define _m_punpckehudq _mm_unpackeh_pu32 +#define _m_punpckelsbw _mm_unpackel_pi8 +#define _m_punpckelswd _mm_unpackel_pi16 +#define _m_punpckelsdq _mm_unpackel_pi32 +#define _m_punpckelubw _mm_unpackel_pu8 +#define _m_punpckeluwd _mm_unpackel_pu16 +#define _m_punpckeludq _mm_unpackel_pu32 +#define _m_paddb _mm_add_pi8 +#define _m_paddw _mm_add_pi16 +#define _m_paddd _mm_add_pi32 +#define _m_paddsb _mm_adds_pi8 +#define _m_paddsw _mm_adds_pi16 +#define _m_paddsd _mm_adds_pi32 +#define _m_paddusb _mm_adds_pu8 +#define _m_paddusw _mm_adds_pu16 +#define _m_paddusd _mm_adds_pu32 +#define _m_psubb _mm_sub_pi8 +#define _m_psubw _mm_sub_pi16 +#define _m_psubd _mm_sub_pi32 +#define _m_psubsb _mm_subs_pi8 +#define _m_psubsw _mm_subs_pi16 +#define _m_psubuw _mm_subs_pi32 +#define _m_psubusb _mm_subs_pu8 +#define _m_psubusw _mm_subs_pu16 +#define _m_psubusd _mm_subs_pu32 +#define _m_pmaddwd _mm_madd_pi16 +#define _m_pmadduwd _mm_madd_pu16 +#define _m_pmulhw _mm_mulhi_pi16 +#define _m_pmulhuw _mm_mulhi_pu16 +#define _m_pmullw _mm_mullo_pi16 +#define _m_pmacsw _mm_mac_pi16 +#define _m_pmacuw _mm_mac_pu16 +#define _m_pmacszw _mm_macz_pi16 +#define _m_pmacuzw _mm_macz_pu16 +#define _m_paccb _mm_acc_pu8 +#define _m_paccw _mm_acc_pu16 +#define _m_paccd _mm_acc_pu32 +#define _m_pmia _mm_mia_si64 +#define _m_pmiaph _mm_miaph_si64 +#define _m_pmiabb _mm_miabb_si64 +#define _m_pmiabt _mm_miabt_si64 +#define _m_pmiatb _mm_miatb_si64 +#define _m_pmiatt _mm_miatt_si64 +#define _m_psllw _mm_sll_pi16 +#define _m_psllwi _mm_slli_pi16 +#define _m_pslld _mm_sll_pi32 +#define _m_pslldi _mm_slli_pi32 +#define _m_psllq _mm_sll_si64 +#define _m_psllqi _mm_slli_si64 +#define _m_psraw _mm_sra_pi16 +#define _m_psrawi _mm_srai_pi16 +#define _m_psrad _mm_sra_pi32 +#define _m_psradi _mm_srai_pi32 +#define _m_psraq _mm_sra_si64 +#define _m_psraqi _mm_srai_si64 +#define _m_psrlw _mm_srl_pi16 +#define _m_psrlwi _mm_srli_pi16 +#define _m_psrld _mm_srl_pi32 +#define _m_psrldi _mm_srli_pi32 +#define _m_psrlq _mm_srl_si64 +#define _m_psrlqi _mm_srli_si64 +#define _m_prorw _mm_ror_pi16 +#define _m_prorwi _mm_rori_pi16 +#define _m_prord _mm_ror_pi32 +#define _m_prordi _mm_rori_pi32 +#define _m_prorq _mm_ror_si64 +#define _m_prorqi _mm_rori_si64 +#define _m_pand _mm_and_si64 +#define _m_pandn _mm_andnot_si64 +#define _m_por _mm_or_si64 +#define _m_pxor _mm_xor_si64 +#define _m_pcmpeqb _mm_cmpeq_pi8 +#define _m_pcmpeqw _mm_cmpeq_pi16 +#define _m_pcmpeqd _mm_cmpeq_pi32 +#define _m_pcmpgtb _mm_cmpgt_pi8 +#define _m_pcmpgtub _mm_cmpgt_pu8 +#define _m_pcmpgtw _mm_cmpgt_pi16 +#define _m_pcmpgtuw _mm_cmpgt_pu16 +#define _m_pcmpgtd _mm_cmpgt_pi32 +#define _m_pcmpgtud _mm_cmpgt_pu32 +#define _m_pextrb _mm_extract_pi8 +#define _m_pextrw _mm_extract_pi16 +#define _m_pextrd _mm_extract_pi32 +#define _m_pextrub _mm_extract_pu8 +#define _m_pextruw _mm_extract_pu16 +#define _m_pextrud _mm_extract_pu32 +#define _m_pinsrb _mm_insert_pi8 +#define _m_pinsrw _mm_insert_pi16 +#define _m_pinsrd _mm_insert_pi32 +#define _m_pmaxsb _mm_max_pi8 +#define _m_pmaxsw _mm_max_pi16 +#define _m_pmaxsd _mm_max_pi32 +#define _m_pmaxub _mm_max_pu8 +#define _m_pmaxuw _mm_max_pu16 +#define _m_pmaxud _mm_max_pu32 +#define _m_pminsb _mm_min_pi8 +#define _m_pminsw _mm_min_pi16 +#define _m_pminsd _mm_min_pi32 +#define _m_pminub _mm_min_pu8 +#define _m_pminuw _mm_min_pu16 +#define _m_pminud _mm_min_pu32 +#define _m_pmovmskb _mm_movemask_pi8 +#define _m_pmovmskw _mm_movemask_pi16 +#define _m_pmovmskd _mm_movemask_pi32 +#define _m_pshufw _mm_shuffle_pi16 +#define _m_pavgb _mm_avg_pu8 +#define _m_pavgw _mm_avg_pu16 +#define _m_pavg2b _mm_avg2_pu8 +#define _m_pavg2w _mm_avg2_pu16 +#define _m_psadbw _mm_sad_pu8 +#define _m_psadwd _mm_sad_pu16 +#define _m_psadzbw _mm_sadz_pu8 +#define _m_psadzwd _mm_sadz_pu16 +#define _m_paligniq _mm_align_si64 +#define _m_cvt_si2pi _mm_cvtsi64_m64 +#define _m_cvt_pi2si _mm_cvtm64_si64 +#define _m_from_int _mm_cvtsi32_si64 +#define _m_to_int _mm_cvtsi64_si32 + +#if defined __cplusplus +}; /* End "C" */ +#endif /* __cplusplus */ + +#endif /* _MMINTRIN_H_INCLUDED */ diff --git a/gcc-4.9/gcc/config/arm/neon-docgen.ml b/gcc-4.9/gcc/config/arm/neon-docgen.ml new file mode 100644 index 000000000..5788a533e --- /dev/null +++ b/gcc-4.9/gcc/config/arm/neon-docgen.ml @@ -0,0 +1,424 @@ +(* ARM NEON documentation generator. + + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-docgen neon.cmo neon-docgen.ml + + Run with: + /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi +*) + +open Neon + +(* The combined "ops" and "reinterp" table. *) +let ops_reinterp = reinterp @ ops + +(* Helper functions for extracting things from the "ops" table. *) +let single_opcode desired_opcode () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + if opcode = desired_opcode then row :: got_so_far + else got_so_far + ) [] ops_reinterp + +let multiple_opcodes desired_opcodes () = + List.fold_left (fun got_so_far -> + fun desired_opcode -> + (single_opcode desired_opcode ()) @ got_so_far) + [] desired_opcodes + +let ldx_opcode number () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vldx n | Vldx_lane n | Vldx_dup n when n = number -> + row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +let stx_opcode number () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vstx n | Vstx_lane n when n = number -> + row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +let tbl_opcode () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vtbl _ -> row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +let tbx_opcode () = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (opcode, _, _, _, _, _) -> + match opcode with + Vtbx _ -> row :: got_so_far + | _ -> got_so_far + ) [] ops_reinterp + +(* The groups of intrinsics. *) +let intrinsic_groups = + [ "Addition", single_opcode Vadd; + "Multiplication", single_opcode Vmul; + "Multiply-accumulate", single_opcode Vmla; + "Multiply-subtract", single_opcode Vmls; + "Fused-multiply-accumulate", single_opcode Vfma; + "Fused-multiply-subtract", single_opcode Vfms; + "Round to integral (to nearest, ties to even)", single_opcode Vrintn; + "Round to integral (to nearest, ties away from zero)", single_opcode Vrinta; + "Round to integral (towards +Inf)", single_opcode Vrintp; + "Round to integral (towards -Inf)", single_opcode Vrintm; + "Round to integral (towards 0)", single_opcode Vrintz; + "Subtraction", single_opcode Vsub; + "Comparison (equal-to)", single_opcode Vceq; + "Comparison (greater-than-or-equal-to)", single_opcode Vcge; + "Comparison (less-than-or-equal-to)", single_opcode Vcle; + "Comparison (greater-than)", single_opcode Vcgt; + "Comparison (less-than)", single_opcode Vclt; + "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage; + "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale; + "Comparison (absolute greater-than)", single_opcode Vcagt; + "Comparison (absolute less-than)", single_opcode Vcalt; + "Test bits", single_opcode Vtst; + "Absolute difference", single_opcode Vabd; + "Absolute difference and accumulate", single_opcode Vaba; + "Maximum", single_opcode Vmax; + "Minimum", single_opcode Vmin; + "Pairwise add", single_opcode Vpadd; + "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada; + "Folding maximum", single_opcode Vpmax; + "Folding minimum", single_opcode Vpmin; + "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts]; + "Vector shift left", single_opcode Vshl; + "Vector shift left by constant", single_opcode Vshl_n; + "Vector shift right by constant", single_opcode Vshr_n; + "Vector shift right by constant and accumulate", single_opcode Vsra_n; + "Vector shift right and insert", single_opcode Vsri; + "Vector shift left and insert", single_opcode Vsli; + "Absolute value", single_opcode Vabs; + "Negation", single_opcode Vneg; + "Bitwise not", single_opcode Vmvn; + "Count leading sign bits", single_opcode Vcls; + "Count leading zeros", single_opcode Vclz; + "Count number of set bits", single_opcode Vcnt; + "Reciprocal estimate", single_opcode Vrecpe; + "Reciprocal square-root estimate", single_opcode Vrsqrte; + "Get lanes from a vector", single_opcode Vget_lane; + "Set lanes in a vector", single_opcode Vset_lane; + "Create vector from literal bit pattern", single_opcode Vcreate; + "Set all lanes to the same value", + multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane]; + "Combining vectors", single_opcode Vcombine; + "Splitting vectors", multiple_opcodes [Vget_high; Vget_low]; + "Conversions", multiple_opcodes [Vcvt; Vcvt_n]; + "Move, single_opcode narrowing", single_opcode Vmovn; + "Move, single_opcode long", single_opcode Vmovl; + "Table lookup", tbl_opcode; + "Extended table lookup", tbx_opcode; + "Multiply, lane", single_opcode Vmul_lane; + "Long multiply, lane", single_opcode Vmull_lane; + "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane; + "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane; + "Multiply-accumulate, lane", single_opcode Vmla_lane; + "Multiply-subtract, lane", single_opcode Vmls_lane; + "Vector multiply by scalar", single_opcode Vmul_n; + "Vector long multiply by scalar", single_opcode Vmull_n; + "Vector saturating doubling long multiply by scalar", + single_opcode Vqdmull_n; + "Vector saturating doubling multiply high by scalar", + single_opcode Vqdmulh_n; + "Vector multiply-accumulate by scalar", single_opcode Vmla_n; + "Vector multiply-subtract by scalar", single_opcode Vmls_n; + "Vector extract", single_opcode Vext; + "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16]; + "Bit selection", single_opcode Vbsl; + "Transpose elements", single_opcode Vtrn; + "Zip elements", single_opcode Vzip; + "Unzip elements", single_opcode Vuzp; + "Element/structure loads, VLD1 variants", ldx_opcode 1; + "Element/structure stores, VST1 variants", stx_opcode 1; + "Element/structure loads, VLD2 variants", ldx_opcode 2; + "Element/structure stores, VST2 variants", stx_opcode 2; + "Element/structure loads, VLD3 variants", ldx_opcode 3; + "Element/structure stores, VST3 variants", stx_opcode 3; + "Element/structure loads, VLD4 variants", ldx_opcode 4; + "Element/structure stores, VST4 variants", stx_opcode 4; + "Logical operations (AND)", single_opcode Vand; + "Logical operations (OR)", single_opcode Vorr; + "Logical operations (exclusive OR)", single_opcode Veor; + "Logical operations (AND-NOT)", single_opcode Vbic; + "Logical operations (OR-NOT)", single_opcode Vorn; + "Reinterpret casts", single_opcode Vreinterp ] + +(* Given an intrinsic shape, produce a string to document the corresponding + operand shapes. *) +let rec analyze_shape shape = + let rec n_things n thing = + match n with + 0 -> [] + | n -> thing :: (n_things (n - 1) thing) + in + let rec analyze_shape_elt reg_no elt = + match elt with + Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}" + | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}" + | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}" + | Immed -> "#@var{0}" + | VecArray (1, elt) -> + let elt_regexp = analyze_shape_elt 0 elt in + "@{" ^ elt_regexp ^ "@}" + | VecArray (n, elt) -> + let rec f m = + match m with + 0 -> [] + | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1)) + in + let ops = List.rev (f n) in + "@{" ^ (commas (fun x -> x) ops "") ^ "@}" + | (PtrTo elt | CstPtrTo elt) -> + "[" ^ (analyze_shape_elt reg_no elt) ^ "]" + | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]" + | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]" + | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]" + | Alternatives alts -> (analyze_shape_elt reg_no (List.hd alts)) + in + match shape with + All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) "" + | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^ + ", " ^ (analyze_shape_elt 0 Dreg) + | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^ + (analyze_shape_elt 0 elt) + | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ + ", " ^ (analyze_shape_elt 0 Dreg) + | Wide_noreg elt -> analyze_shape (Long_noreg elt) + | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^ + ", " ^ (analyze_shape_elt 0 Qreg) + | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) "" + | By_scalar Dreg -> + analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) + | By_scalar Qreg -> + analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) + | By_scalar _ -> assert false + | Wide_lane -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Wide_scalar -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Pair_result elt -> + let elt_regexp = analyze_shape_elt 0 elt in + let elt_regexp' = analyze_shape_elt 1 elt in + elt_regexp ^ ", " ^ elt_regexp' + | Unary_scalar _ -> "FIXME Unary_scalar" + | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) + | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) + | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) + +(* Document a single intrinsic. *) +let describe_intrinsic first chan + (elt_ty, (_, features, shape, name, munge, _)) = + let c_arity, new_elt_ty = munge shape elt_ty in + let c_types = strings_of_arity c_arity in + Printf.fprintf chan "@itemize @bullet\n"; + let item_code = if first then "@item" else "@itemx" in + Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types) + (intrinsic_name name) (string_of_elt elt_ty); + Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) ""); + if not (List.exists (fun feature -> feature = No_op) features) then + begin + let print_one_insn name = + Printf.fprintf chan "@code{"; + let no_suffix = (new_elt_ty = NoElts) in + let name_with_suffix = + if no_suffix then name + else name ^ "." ^ (string_of_elt_dots new_elt_ty) + in + let possible_operands = analyze_all_shapes features shape + analyze_shape + in + let rec print_one_possible_operand op = + Printf.fprintf chan "%s %s}" name_with_suffix op + in + (* If the intrinsic expands to multiple instructions, we assume + they are all of the same form. *) + print_one_possible_operand (List.hd possible_operands) + in + let rec print_insns names = + match names with + [] -> () + | [name] -> print_one_insn name + | name::names -> (print_one_insn name; + Printf.fprintf chan " @emph{or} "; + print_insns names) + in + let insn_names = get_insn_names features name in + Printf.fprintf chan "@*@emph{Form of expected instruction(s):} "; + print_insns insn_names; + Printf.fprintf chan "\n" + end; + Printf.fprintf chan "@end itemize\n"; + Printf.fprintf chan "\n\n" + +(* Document a group of intrinsics. *) +let document_group chan (group_title, group_extractor) = + (* Extract the rows in question from the ops table and then turn them + into a list of intrinsics. *) + let intrinsics = + List.fold_left (fun got_so_far -> + fun row -> + match row with + (_, _, _, _, _, elt_tys) -> + List.fold_left (fun got_so_far' -> + fun elt_ty -> + (elt_ty, row) :: got_so_far') + got_so_far elt_tys + ) [] (group_extractor ()) + in + (* Emit the title for this group. *) + Printf.fprintf chan "@subsubsection %s\n\n" group_title; + (* Emit a description of each intrinsic. *) + List.iter (describe_intrinsic true chan) intrinsics; + (* Close this group. *) + Printf.fprintf chan "\n\n" + +let gnu_header chan = + List.iter (fun s -> Printf.fprintf chan "%s\n" s) [ + "@c Copyright (C) 2006-2014 Free Software Foundation, Inc."; + "@c This is part of the GCC manual."; + "@c For copying conditions, see the file gcc.texi."; + ""; + "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml"; + "@c Please do not edit manually."] + +let crypto_doc = +" +@itemize @bullet +@item poly128_t vldrq_p128(poly128_t const *) +@end itemize + +@itemize @bullet +@item void vstrq_p128(poly128_t *, poly128_t) +@end itemize + +@itemize @bullet +@item uint64x1_t vceq_p64 (poly64x1_t, poly64x1_t) +@end itemize + +@itemize @bullet +@item uint64x1_t vtst_p64 (poly64x1_t, poly64x1_t) +@end itemize + +@itemize @bullet +@item uint32_t vsha1h_u32 (uint32_t) +@*@emph{Form of expected instruction(s):} @code{sha1h.32 @var{q0}, @var{q1}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1cq_u32 (uint32x4_t, uint32_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1c.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1pq_u32 (uint32x4_t, uint32_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1p.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1mq_u32 (uint32x4_t, uint32_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1m.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1su0q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1su0.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha1su1q_u32 (uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha1su1.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256hq_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256h.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256h2q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256h2.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256su0q_u32 (uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256su0.32 @var{q0}, @var{q1}} +@end itemize + +@itemize @bullet +@item uint32x4_t vsha256su1q_u32 (uint32x4_t, uint32x4_t, uint32x4_t) +@*@emph{Form of expected instruction(s):} @code{sha256su1.32 @var{q0}, @var{q1}, @var{q2}} +@end itemize + +@itemize @bullet +@item poly128_t vmull_p64 (poly64_t a, poly64_t b) +@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} +@end itemize + +@itemize @bullet +@item poly128_t vmull_high_p64 (poly64x2_t a, poly64x2_t b) +@*@emph{Form of expected instruction(s):} @code{vmull.p64 @var{q0}, @var{d1}, @var{d2}} +@end itemize +" + +(* Program entry point. *) +let _ = + if Array.length Sys.argv <> 2 then + failwith "Usage: neon-docgen " + else + let file = Sys.argv.(1) in + try + let chan = open_out file in + gnu_header chan; + List.iter (document_group chan) intrinsic_groups; + Printf.fprintf chan "%s\n" crypto_doc; + close_out chan + with Sys_error sys -> + failwith ("Could not create output file " ^ file ^ ": " ^ sys) diff --git a/gcc-4.9/gcc/config/arm/neon-gen.ml b/gcc-4.9/gcc/config/arm/neon-gen.ml new file mode 100644 index 000000000..f3dd86b0a --- /dev/null +++ b/gcc-4.9/gcc/config/arm/neon-gen.ml @@ -0,0 +1,520 @@ +(* Auto-generate ARM Neon intrinsics header file. + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-gen neon.cmo neon-gen.ml + + Run with: + ./neon-gen > arm_neon.h +*) + +open Neon + +(* The format codes used in the following functions are documented at: + http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\ + #6_printflikefunctionsforprettyprinting + (one line, remove the backslash.) +*) + +(* Following functions can be used to approximate GNU indentation style. *) +let start_function () = + Format.printf "@["; + ref 0 + +let end_function nesting = + match !nesting with + 0 -> Format.printf "@;@;@]" + | _ -> failwith ("Bad nesting (ending function at level " + ^ (string_of_int !nesting) ^ ")") + +let open_braceblock nesting = + begin match !nesting with + 0 -> Format.printf "@,@<0>{@[@," + | _ -> Format.printf "@,@[ @<0>{@[@," + end; + incr nesting + +let close_braceblock nesting = + decr nesting; + match !nesting with + 0 -> Format.printf "@]@,@<0>}" + | _ -> Format.printf "@]@,@<0>}@]" + +let print_function arity fnname body = + let ffmt = start_function () in + Format.printf "__extension__ static __inline "; + let inl = "__attribute__ ((__always_inline__))" in + begin match arity with + Arity0 ret -> + Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname + | Arity1 (ret, arg0) -> + Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname + (string_of_vectype arg0) + | Arity2 (ret, arg0, arg1) -> + Format.printf "%s %s@,%s (%s __a, %s __b)" + (string_of_vectype ret) inl fnname (string_of_vectype arg0) + (string_of_vectype arg1) + | Arity3 (ret, arg0, arg1, arg2) -> + Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)" + (string_of_vectype ret) inl fnname (string_of_vectype arg0) + (string_of_vectype arg1) (string_of_vectype arg2) + | Arity4 (ret, arg0, arg1, arg2, arg3) -> + Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)" + (string_of_vectype ret) inl fnname (string_of_vectype arg0) + (string_of_vectype arg1) (string_of_vectype arg2) + (string_of_vectype arg3) + end; + open_braceblock ffmt; + let rec print_lines = function + [] -> () + | "" :: lines -> print_lines lines + | [line] -> Format.printf "%s" line + | line::lines -> Format.printf "%s@," line ; print_lines lines in + print_lines body; + close_braceblock ffmt; + end_function ffmt + +let union_string num elts base = + let itype = inttype_for_array num elts in + let iname = string_of_inttype itype + and sname = string_of_vectype (T_arrayof (num, elts)) in + Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base + +let rec signed_ctype = function + T_uint8x8 | T_poly8x8 -> T_int8x8 + | T_uint8x16 | T_poly8x16 -> T_int8x16 + | T_uint16x4 | T_poly16x4 -> T_int16x4 + | T_uint16x8 | T_poly16x8 -> T_int16x8 + | T_uint32x2 -> T_int32x2 + | T_uint32x4 -> T_int32x4 + | T_uint64x1 -> T_int64x1 + | T_uint64x2 -> T_int64x2 + | T_poly64x2 -> T_int64x2 + (* Cast to types defined by mode in arm.c, not random types pulled in from + the header in use. This fixes incompatible pointer errors when + compiling with C++. *) + | T_uint8 | T_int8 -> T_intQI + | T_uint16 | T_int16 -> T_intHI + | T_uint32 | T_int32 -> T_intSI + | T_uint64 | T_int64 -> T_intDI + | T_float16 -> T_floatHF + | T_float32 -> T_floatSF + | T_poly8 -> T_intQI + | T_poly16 -> T_intHI + | T_poly64 -> T_intDI + | T_poly128 -> T_intTI + | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) + | T_ptrto elt -> T_ptrto (signed_ctype elt) + | T_const elt -> T_const (signed_ctype elt) + | x -> x + +let add_cast ctype cval = + let stype = signed_ctype ctype in + if ctype <> stype then + Printf.sprintf "(%s) %s" (string_of_vectype stype) cval + else + cval + +let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" + +(* Return a tuple of a list of declarations to go at the start of the function, + and a list of statements needed to return THING. *) +let return arity thing = + match arity with + Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) + | Arity4 (ret, _, _, _, _) -> + begin match ret with + T_arrayof (num, vec) -> + let uname = union_string num vec "__rv" in + [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"] + | T_void -> + [], [thing ^ ";"] + | _ -> + [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"] + end + +let mask_shape_for_shuffle = function + All (num, reg) -> All (num, reg) + | Pair_result reg -> All (2, reg) + | _ -> failwith "mask_for_shuffle" + +let mask_elems shuffle shape elttype part = + let elem_size = elt_width elttype in + let num_elems = + match regmap shape 0 with + Dreg -> 64 / elem_size + | Qreg -> 128 / elem_size + | _ -> failwith "mask_elems" in + shuffle elem_size num_elems part + +(* Return a tuple of a list of declarations 0and a list of statements needed + to implement an intrinsic using __builtin_shuffle. SHUFFLE is a function + which returns a list of elements suitable for using as a mask. *) + +let shuffle_fn shuffle shape arity elttype = + let mshape = mask_shape_for_shuffle shape in + let masktype = type_for_elt mshape (unsigned_of_elt elttype) 0 in + let masktype_str = string_of_vectype masktype in + let shuffle_res = type_for_elt mshape elttype 0 in + let shuffle_res_str = string_of_vectype shuffle_res in + match arity with + Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) + | Arity4 (ret, _, _, _, _) -> + begin match ret with + T_arrayof (num, vec) -> + let elems1 = mask_elems shuffle mshape elttype `lo + and elems2 = mask_elems shuffle mshape elttype `hi in + let mask1 = (String.concat ", " (List.map string_of_int elems1)) + and mask2 = (String.concat ", " (List.map string_of_int elems2)) in + let shuf1 = Printf.sprintf + "__rv.val[0] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });" + shuffle_res_str masktype_str mask1 + and shuf2 = Printf.sprintf + "__rv.val[1] = (%s) __builtin_shuffle (__a, __b, (%s) { %s });" + shuffle_res_str masktype_str mask2 in + [Printf.sprintf "%s __rv;" (string_of_vectype ret);], + [shuf1; shuf2; "return __rv;"] + | _ -> + let elems = mask_elems shuffle mshape elttype `lo in + let mask = (String.concat ", " (List.map string_of_int elems)) in + let shuf = Printf.sprintf + "return (%s) __builtin_shuffle (__a, (%s) { %s });" shuffle_res_str masktype_str mask in + [""], + [shuf] + end + +let rec element_type ctype = + match ctype with + T_arrayof (_, v) -> element_type v + | _ -> ctype + +let params ps = + let pdecls = ref [] in + let ptype t p = + match t with + T_arrayof (num, elts) -> + let uname = union_string num elts (p ^ "u") in + let decl = Printf.sprintf "%s = { %s };" uname p in + pdecls := decl :: !pdecls; + p ^ "u.__o" + | _ -> add_cast t p in + let plist = match ps with + Arity0 _ -> [] + | Arity1 (_, t1) -> [ptype t1 "__a"] + | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"] + | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"] + | Arity4 (_, t1, t2, t3, t4) -> + [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in + !pdecls, plist + +let modify_params features plist = + let is_flipped = + List.exists (function Flipped _ -> true | _ -> false) features in + if is_flipped then + match plist with + [ a; b ] -> [ b; a ] + | _ -> + failwith ("Don't know how to flip args " ^ (String.concat ", " plist)) + else + plist + +(* !!! Decide whether to add an extra information word based on the shape + form. *) +let extra_word shape features paramlist bits = + let use_word = + match shape with + All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow + | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm + | Narrow_imm -> true + | _ -> List.mem InfoWord features + in + if use_word then + paramlist @ [string_of_int bits] + else + paramlist + +(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0). + Bit 1 represents floats & polynomials (1), or ordinary integers (0). + Bit 2 represents rounding (1) vs none (0). *) +let infoword_value elttype features = + let bits01 = + match elt_class elttype with + Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001 + | Poly -> 0b010 + | Float -> 0b011 + | _ -> 0b000 + and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in + bits01 lor rounding_bit + +(* "Cast" type operations will throw an exception in mode_of_elt (actually in + elt_width, called from there). Deal with that here, and generate a suffix + with multiple modes (). *) +let rec mode_suffix elttype shape = + try + let mode = mode_of_elt elttype shape in + string_of_mode mode + with MixedMode (dst, src) -> + let dstmode = mode_of_elt ~argpos:0 dst shape + and srcmode = mode_of_elt ~argpos:1 src shape in + string_of_mode dstmode ^ string_of_mode srcmode + +let get_shuffle features = + try + match List.find (function Use_shuffle _ -> true | _ -> false) features with + Use_shuffle fn -> Some fn + | _ -> None + with Not_found -> None + +let print_feature_test_start features = + try + match List.find (fun feature -> + match feature with Requires_feature _ -> true + | Requires_arch _ -> true + | Requires_FP_bit _ -> true + | _ -> false) + features with + Requires_feature feature -> + Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature + | Requires_arch arch -> + Format.printf "#if __ARM_ARCH >= %d@\n" arch + | Requires_FP_bit bit -> + Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n" + (1 lsl bit) + | _ -> assert false + with Not_found -> assert true + +let print_feature_test_end features = + let feature = + List.exists (function Requires_feature _ -> true + | Requires_arch _ -> true + | Requires_FP_bit _ -> true + | _ -> false) features in + if feature then Format.printf "#endif@\n" + + +let print_variant opcode features shape name (ctype, asmtype, elttype) = + let bits = infoword_value elttype features in + let modesuf = mode_suffix elttype shape in + let pdecls, paramlist = params ctype in + let rdecls, stmts = + match get_shuffle features with + Some shuffle -> shuffle_fn shuffle shape ctype elttype + | None -> + let paramlist' = modify_params features paramlist in + let paramlist'' = extra_word shape features paramlist' bits in + let parstr = String.concat ", " paramlist'' in + let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)" + (builtin_name features name) modesuf parstr in + return ctype builtin in + let body = pdecls @ rdecls @ stmts + and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in + begin + print_feature_test_start features; + print_function ctype fnname body; + print_feature_test_end features; + end + +(* When this function processes the element types in the ops table, it rewrites + them in a list of tuples (a,b,c): + a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8) + b : Asm type : a single, processed element type, e.g. P16. This is the + type which should be attached to the asm opcode. + c : Variant type : the unprocessed type for this variant (e.g. in add + instructions which don't care about the sign, b might be i16 and c + might be s16.) +*) + +let print_op (opcode, features, shape, name, munge, types) = + let sorted_types = List.sort compare types in + let munged_types = List.map + (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in + List.iter + (fun variant -> print_variant opcode features shape name variant) + munged_types + +let print_ops ops = + List.iter print_op ops + +(* Output type definitions. Table entries are: + cbase : "C" name for the type. + abase : "ARM" base name for the type (i.e. int in int8x8_t). + esize : element size. + enum : element count. + alevel: architecture level at which available. +*) + +type fpulevel = CRYPTO | ALL + +let deftypes () = + let typeinfo = [ + (* Doubleword vector types. *) + "__builtin_neon_qi", "int", 8, 8, ALL; + "__builtin_neon_hi", "int", 16, 4, ALL; + "__builtin_neon_si", "int", 32, 2, ALL; + "__builtin_neon_di", "int", 64, 1, ALL; + "__builtin_neon_hf", "float", 16, 4, ALL; + "__builtin_neon_sf", "float", 32, 2, ALL; + "__builtin_neon_poly8", "poly", 8, 8, ALL; + "__builtin_neon_poly16", "poly", 16, 4, ALL; + "__builtin_neon_poly64", "poly", 64, 1, CRYPTO; + "__builtin_neon_uqi", "uint", 8, 8, ALL; + "__builtin_neon_uhi", "uint", 16, 4, ALL; + "__builtin_neon_usi", "uint", 32, 2, ALL; + "__builtin_neon_udi", "uint", 64, 1, ALL; + + (* Quadword vector types. *) + "__builtin_neon_qi", "int", 8, 16, ALL; + "__builtin_neon_hi", "int", 16, 8, ALL; + "__builtin_neon_si", "int", 32, 4, ALL; + "__builtin_neon_di", "int", 64, 2, ALL; + "__builtin_neon_sf", "float", 32, 4, ALL; + "__builtin_neon_poly8", "poly", 8, 16, ALL; + "__builtin_neon_poly16", "poly", 16, 8, ALL; + "__builtin_neon_poly64", "poly", 64, 2, CRYPTO; + "__builtin_neon_uqi", "uint", 8, 16, ALL; + "__builtin_neon_uhi", "uint", 16, 8, ALL; + "__builtin_neon_usi", "uint", 32, 4, ALL; + "__builtin_neon_udi", "uint", 64, 2, ALL + ] in + List.iter + (fun (cbase, abase, esize, enum, fpulevel) -> + let attr = + match enum with + 1 -> "" + | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" + (esize * enum / 8) in + if fpulevel == CRYPTO then + Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n"; + Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr; + if fpulevel == CRYPTO then + Format.printf "#endif\n";) + typeinfo; + Format.print_newline (); + (* Extra types not in . *) + Format.printf "typedef float float32_t;\n"; + Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; + Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"; + Format.printf "#ifdef __ARM_FEATURE_CRYPTO\n"; + Format.printf "typedef __builtin_neon_poly64 poly64_t;\n"; + Format.printf "typedef __builtin_neon_poly128 poly128_t;\n"; + Format.printf "#endif\n" + +(* Output structs containing arrays, for load & store instructions etc. + poly128_t is deliberately not included here because it has no array types + defined for it. *) + +let arrtypes () = + let typeinfo = [ + "int", 8, ALL; "int", 16, ALL; + "int", 32, ALL; "int", 64, ALL; + "uint", 8, ALL; "uint", 16, ALL; + "uint", 32, ALL; "uint", 64, ALL; + "float", 32, ALL; "poly", 8, ALL; + "poly", 16, ALL; "poly", 64, CRYPTO + ] in + let writestruct elname elsize regsize arrsize fpulevel = + let elnum = regsize / elsize in + let structname = + Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in + let sfmt = start_function () in + Format.printf "%stypedef struct %s" + (if fpulevel == CRYPTO then "#ifdef __ARM_FEATURE_CRYPTO\n" else "") structname; + open_braceblock sfmt; + Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize; + close_braceblock sfmt; + Format.printf " %s;%s" structname (if fpulevel == CRYPTO then "\n#endif\n" else ""); + end_function sfmt; + in + for n = 2 to 4 do + List.iter + (fun (elname, elsize, alevel) -> + writestruct elname elsize 64 n alevel; + writestruct elname elsize 128 n alevel) + typeinfo + done + +let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) + +(* Do it. *) + +let _ = + print_lines [ +"/* ARM NEON intrinsics include file. This file is generated automatically"; +" using neon-gen.ml. Please do not edit manually."; +""; +" Copyright (C) 2006-2014 Free Software Foundation, Inc."; +" Contributed by CodeSourcery."; +""; +" This file is part of GCC."; +""; +" GCC is free software; you can redistribute it and/or modify it"; +" under the terms of the GNU General Public License as published"; +" by the Free Software Foundation; either version 3, or (at your"; +" option) any later version."; +""; +" GCC is distributed in the hope that it will be useful, but WITHOUT"; +" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; +" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; +" License for more details."; +""; +" Under Section 7 of GPL version 3, you are granted additional"; +" permissions described in the GCC Runtime Library Exception, version"; +" 3.1, as published by the Free Software Foundation."; +""; +" You should have received a copy of the GNU General Public License and"; +" a copy of the GCC Runtime Library Exception along with this program;"; +" see the files COPYING3 and COPYING.RUNTIME respectively. If not, see"; +" . */"; +""; +"#ifndef _GCC_ARM_NEON_H"; +"#define _GCC_ARM_NEON_H 1"; +""; +"#ifndef __ARM_NEON__"; +"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h"; +"#else"; +""; +"#ifdef __cplusplus"; +"extern \"C\" {"; +"#endif"; +""; +"#include "; +""]; + deftypes (); + arrtypes (); + Format.print_newline (); + print_ops ops; + Format.print_newline (); + print_ops reinterp; + print_ops reinterpq; + Format.printf "%s" crypto_intrinsics; + print_lines [ +"#ifdef __cplusplus"; +"}"; +"#endif"; +"#endif"; +"#endif"] diff --git a/gcc-4.9/gcc/config/arm/neon-testgen.ml b/gcc-4.9/gcc/config/arm/neon-testgen.ml new file mode 100644 index 000000000..df429f59e --- /dev/null +++ b/gcc-4.9/gcc/config/arm/neon-testgen.ml @@ -0,0 +1,305 @@ +(* Auto-generate ARM Neon intrinsics tests. + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-testgen neon.cmo neon-testgen.ml + + Run with: + cd /path/to/gcc/testsuite/gcc.target/arm/neon + /path/to/neon-testgen +*) + +open Neon + +type c_type_flags = Pointer | Const + +(* Open a test source file. *) +let open_test_file dir name = + try + open_out (dir ^ "/" ^ name ^ ".c") + with Sys_error str -> + failwith ("Could not create test source file " ^ name ^ ": " ^ str) + +(* Emit prologue code to a test source file. *) +let emit_prologue chan test_name effective_target = + Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic. */\n" test_name; + Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n"; + Printf.fprintf chan "/* { dg-do assemble } */\n"; + Printf.fprintf chan "/* { dg-require-effective-target %s_ok } */\n" + effective_target; + Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n"; + Printf.fprintf chan "/* { dg-add-options %s } */\n" effective_target; + Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"; + Printf.fprintf chan "void test_%s (void)\n{\n" test_name + +(* Emit declarations of local variables that are going to be passed + to an intrinsic, together with one to take a returned value if needed. *) +let emit_automatics chan c_types features = + let emit () = + ignore ( + List.fold_left (fun arg_number -> fun (flags, ty) -> + let pointer_bit = + if List.mem Pointer flags then "*" else "" + in + (* Const arguments to builtins are directly + written in as constants. *) + if not (List.mem Const flags) then + Printf.fprintf chan " %s %sarg%d_%s;\n" + ty pointer_bit arg_number ty; + arg_number + 1) + 0 (List.tl c_types)) + in + match c_types with + (_, return_ty) :: tys -> + if return_ty <> "void" then begin + (* The intrinsic returns a value. We need to do explict register + allocation for vget_low tests or they fail because of copy + elimination. *) + ((if List.mem Fixed_vector_reg features then + Printf.fprintf chan " register %s out_%s asm (\"d18\");\n" + return_ty return_ty + else if List.mem Fixed_core_reg features then + Printf.fprintf chan " register %s out_%s asm (\"r0\");\n" + return_ty return_ty + else + Printf.fprintf chan " %s out_%s;\n" return_ty return_ty); + emit ()) + end else + (* The intrinsic does not return a value. *) + emit () + | _ -> assert false + +(* Emit code to call an intrinsic. *) +let emit_call chan const_valuator c_types name elt_ty = + (if snd (List.hd c_types) <> "void" then + Printf.fprintf chan " out_%s = " (snd (List.hd c_types)) + else + Printf.fprintf chan " "); + Printf.fprintf chan "%s_%s (" (intrinsic_name name) (string_of_elt elt_ty); + let print_arg chan arg_number (flags, ty) = + (* If the argument is of const type, then directly write in the + constant now. *) + if List.mem Const flags then + match const_valuator with + None -> + if List.mem Pointer flags then + Printf.fprintf chan "0" + else + Printf.fprintf chan "1" + | Some f -> Printf.fprintf chan "%s" (string_of_int (f arg_number)) + else + Printf.fprintf chan "arg%d_%s" arg_number ty + in + let rec print_args arg_number tys = + match tys with + [] -> () + | [ty] -> print_arg chan arg_number ty + | ty::tys -> + print_arg chan arg_number ty; + Printf.fprintf chan ", "; + print_args (arg_number + 1) tys + in + print_args 0 (List.tl c_types); + Printf.fprintf chan ");\n" + +(* Emit epilogue code to a test source file. *) +let emit_epilogue chan features regexps = + let no_op = List.exists (fun feature -> feature = No_op) features in + Printf.fprintf chan "}\n\n"; + (if not no_op then + List.iter (fun regexp -> + Printf.fprintf chan + "/* { dg-final { scan-assembler \"%s\" } } */\n" regexp) + regexps + else + () + ); + Printf.fprintf chan "/* { dg-final { cleanup-saved-temps } } */\n" + +(* Check a list of C types to determine which ones are pointers and which + ones are const. *) +let check_types tys = + let tys' = + List.map (fun ty -> + let len = String.length ty in + if len > 2 && String.get ty (len - 2) = ' ' + && String.get ty (len - 1) = '*' + then ([Pointer], String.sub ty 0 (len - 2)) + else ([], ty)) tys + in + List.map (fun (flags, ty) -> + if String.length ty > 6 && String.sub ty 0 6 = "const " + then (Const :: flags, String.sub ty 6 ((String.length ty) - 6)) + else (flags, ty)) tys' + +(* Work out what the effective target should be. *) +let effective_target features = + try + match List.find (fun feature -> + match feature with Requires_feature _ -> true + | Requires_arch _ -> true + | Requires_FP_bit 1 -> true + | _ -> false) + features with + Requires_feature "FMA" -> "arm_neonv2" + | Requires_feature "CRYPTO" -> "arm_crypto" + | Requires_arch 8 -> "arm_v8_neon" + | Requires_FP_bit 1 -> "arm_neon_fp16" + | _ -> assert false + with Not_found -> "arm_neon" + +(* Given an intrinsic shape, produce a regexp that will match + the right-hand sides of instructions generated by an intrinsic of + that shape. *) +let rec analyze_shape shape = + let rec n_things n thing = + match n with + 0 -> [] + | n -> thing :: (n_things (n - 1) thing) + in + let rec analyze_shape_elt elt = + match elt with + Dreg -> "\\[dD\\]\\[0-9\\]+" + | Qreg -> "\\[qQ\\]\\[0-9\\]+" + | Corereg -> "\\[rR\\]\\[0-9\\]+" + | Immed -> "#\\[0-9\\]+" + | VecArray (1, elt) -> + let elt_regexp = analyze_shape_elt elt in + "((\\\\\\{" ^ elt_regexp ^ "\\\\\\})|(" ^ elt_regexp ^ "))" + | VecArray (n, elt) -> + let elt_regexp = analyze_shape_elt elt in + let alt1 = elt_regexp ^ "-" ^ elt_regexp in + let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in + "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}" + | (PtrTo elt | CstPtrTo elt) -> + "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]" + | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" + | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" + | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")" + in + match shape with + All (n, elt) -> commas analyze_shape_elt (n_things n elt) "" + | Long -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Dreg) ^ + ", " ^ (analyze_shape_elt Dreg) + | Long_noreg elt -> (analyze_shape_elt elt) ^ ", " ^ (analyze_shape_elt elt) + | Wide -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Qreg) ^ + ", " ^ (analyze_shape_elt Dreg) + | Wide_noreg elt -> analyze_shape (Long_noreg elt) + | Narrow -> (analyze_shape_elt Dreg) ^ ", " ^ (analyze_shape_elt Qreg) ^ + ", " ^ (analyze_shape_elt Qreg) + | Use_operands elts -> commas analyze_shape_elt (Array.to_list elts) "" + | By_scalar Dreg -> + analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) + | By_scalar Qreg -> + analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) + | By_scalar _ -> assert false + | Wide_lane -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Wide_scalar -> + analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) + | Pair_result elt -> + let elt_regexp = analyze_shape_elt elt in + elt_regexp ^ ", " ^ elt_regexp + | Unary_scalar _ -> "FIXME Unary_scalar" + | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) + | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) + | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) + +(* Generate tests for one intrinsic. *) +let test_intrinsic dir opcode features shape name munge elt_ty = + (* Open the test source file. *) + let test_name = name ^ (string_of_elt elt_ty) in + let chan = open_test_file dir test_name in + (* Work out what argument and return types the intrinsic has. *) + let c_arity, new_elt_ty = munge shape elt_ty in + let c_types = check_types (strings_of_arity c_arity) in + (* Extract any constant valuator (a function specifying what constant + values are to be written into the intrinsic call) from the features + list. *) + let const_valuator = + try + match (List.find (fun feature -> match feature with + Const_valuator _ -> true + | _ -> false) features) with + Const_valuator f -> Some f + | _ -> assert false + with Not_found -> None + in + (* Work out what instruction name(s) to expect. *) + let insns = get_insn_names features name in + let no_suffix = (new_elt_ty = NoElts) in + let insns = + if no_suffix then insns + else List.map (fun insn -> + let suffix = string_of_elt_dots new_elt_ty in + insn ^ "\\." ^ suffix) insns + in + (* Construct a regexp to match against the expected instruction name(s). *) + let insn_regexp = + match insns with + [] -> assert false + | [insn] -> insn + | _ -> + let rec calc_regexp insns cur_regexp = + match insns with + [] -> cur_regexp + | [insn] -> cur_regexp ^ "(" ^ insn ^ "))" + | insn::insns -> calc_regexp insns (cur_regexp ^ "(" ^ insn ^ ")|") + in calc_regexp insns "(" + in + (* Construct regexps to match against the instructions that this + intrinsic expands to. Watch out for any writeback character and + comments after the instruction. *) + let regexps = List.map (fun regexp -> insn_regexp ^ "\\[ \t\\]+" ^ regexp ^ + "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n") + (analyze_all_shapes features shape analyze_shape) + in + let effective_target = effective_target features + in + (* Emit file and function prologues. *) + emit_prologue chan test_name effective_target; + (* Emit local variable declarations. *) + emit_automatics chan c_types features; + Printf.fprintf chan "\n"; + (* Emit the call to the intrinsic. *) + emit_call chan const_valuator c_types name elt_ty; + (* Emit the function epilogue and the DejaGNU scan-assembler directives. *) + emit_epilogue chan features regexps; + (* Close the test file. *) + close_out chan + +(* Generate tests for one element of the "ops" table. *) +let test_intrinsic_group dir (opcode, features, shape, name, munge, types) = + List.iter (test_intrinsic dir opcode features shape name munge) types + +(* Program entry point. *) +let _ = + let directory = if Array.length Sys.argv <> 1 then Sys.argv.(1) else "." in + List.iter (test_intrinsic_group directory) (reinterp @ reinterpq @ ops) + diff --git a/gcc-4.9/gcc/config/arm/neon.md b/gcc-4.9/gcc/config/arm/neon.md new file mode 100644 index 000000000..aad420ce7 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/neon.md @@ -0,0 +1,5808 @@ +;; ARM NEON coprocessor Machine Description +;; Copyright (C) 2006-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; Attribute used to permit string comparisons against in +;; type attribute definitions. +(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd")) + +(define_insn "*neon_mov" + [(set (match_operand:VDX 0 "nonimmediate_operand" + "=w,Un,w, w, ?r,?w,?r,?r, ?Us") + (match_operand:VDX 1 "general_operand" + " w,w, Dn,Uni, w, r, r, Usi,r"))] + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + if (which_alternative == 2) + { + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_move (operands[1], mode, + &operands[1], &width); + + gcc_assert (is_valid != 0); + + if (width == 0) + return "vmov.f32\t%P0, %1 @ "; + else + sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ ", width); + + return templ; + } + + switch (which_alternative) + { + case 0: return "vmov\t%P0, %P1 @ "; + case 1: case 3: return output_move_neon (operands); + case 2: gcc_unreachable (); + case 4: return "vmov\t%Q0, %R0, %P1 @ "; + case 5: return "vmov\t%P0, %Q1, %R1 @ "; + default: return output_move_double (operands, true, NULL); + } +} + [(set_attr "type" "neon_move,neon_store1_1reg,neon_move,\ + neon_load1_1reg, neon_to_gp,neon_from_gp,mov_reg,\ + neon_load1_2reg, neon_store1_2reg") + (set_attr "length" "4,4,4,4,4,4,8,8,8") + (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") + (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")]) + +(define_insn "*neon_mov" + [(set (match_operand:VQXMOV 0 "nonimmediate_operand" + "=w,Un,w, w, ?r,?w,?r,?r, ?Us") + (match_operand:VQXMOV 1 "general_operand" + " w,w, Dn,Uni, w, r, r, Usi, r"))] + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + if (which_alternative == 2) + { + int width, is_valid; + static char templ[40]; + + is_valid = neon_immediate_valid_for_move (operands[1], mode, + &operands[1], &width); + + gcc_assert (is_valid != 0); + + if (width == 0) + return "vmov.f32\t%q0, %1 @ "; + else + sprintf (templ, "vmov.i%d\t%%q0, %%1 @ ", width); + + return templ; + } + + switch (which_alternative) + { + case 0: return "vmov\t%q0, %q1 @ "; + case 1: case 3: return output_move_neon (operands); + case 2: gcc_unreachable (); + case 4: return "vmov\t%Q0, %R0, %e1 @ \;vmov\t%J0, %K0, %f1"; + case 5: return "vmov\t%e0, %Q1, %R1 @ \;vmov\t%f0, %J1, %K1"; + default: return output_move_quad (operands); + } +} + [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\ + neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\ + mov_reg,neon_load1_4reg,neon_store1_4reg") + (set_attr "length" "4,8,4,8,8,8,16,8,16") + (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*") + (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_NEON" +{ + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (TImode, operands[1]); + } +}) + +(define_expand "mov" + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") + (match_operand:VSTRUCT 1 "general_operand" ""))] + "TARGET_NEON" +{ + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + } +}) + +(define_insn "*neon_mov" + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") + (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] + "TARGET_NEON + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +{ + switch (which_alternative) + { + case 0: return "#"; + case 1: case 2: return output_move_neon (operands); + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q") + (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) + +(define_split + [(set (match_operand:EI 0 "s_register_operand" "") + (match_operand:EI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (DImode, rdest + 4); + src[1] = gen_rtx_REG (DImode, rsrc + 4); + + neon_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:OI 0 "s_register_operand" "") + (match_operand:OI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[2], src[2]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (TImode, rdest + 4); + src[1] = gen_rtx_REG (TImode, rsrc + 4); + + neon_disambiguate_copy (operands, dest, src, 2); +}) + +(define_split + [(set (match_operand:CI 0 "s_register_operand" "") + (match_operand:CI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[3], src[3]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (TImode, rdest + 4); + src[1] = gen_rtx_REG (TImode, rsrc + 4); + dest[2] = gen_rtx_REG (TImode, rdest + 8); + src[2] = gen_rtx_REG (TImode, rsrc + 8); + + neon_disambiguate_copy (operands, dest, src, 3); +}) + +(define_split + [(set (match_operand:XI 0 "s_register_operand" "") + (match_operand:XI 1 "s_register_operand" ""))] + "TARGET_NEON && reload_completed" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5)) + (set (match_dup 6) (match_dup 7))] +{ + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); + rtx dest[4], src[4]; + + dest[0] = gen_rtx_REG (TImode, rdest); + src[0] = gen_rtx_REG (TImode, rsrc); + dest[1] = gen_rtx_REG (TImode, rdest + 4); + src[1] = gen_rtx_REG (TImode, rsrc + 4); + dest[2] = gen_rtx_REG (TImode, rdest + 8); + src[2] = gen_rtx_REG (TImode, rsrc + 8); + dest[3] = gen_rtx_REG (TImode, rdest + 12); + src[3] = gen_rtx_REG (TImode, rsrc + 12); + + neon_disambiguate_copy (operands, dest, src, 4); +}) + +(define_expand "movmisalign" + [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand") + (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" +{ + rtx adjust_mem; + /* This pattern is not permitted to fail during expansion: if both arguments + are non-registers (e.g. memory := constant, which can be created by the + auto-vectorizer), force operand 1 into a register. */ + if (!s_register_operand (operands[0], mode) + && !s_register_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + + if (s_register_operand (operands[0], mode)) + adjust_mem = operands[1]; + else + adjust_mem = operands[0]; + + /* Legitimize address. */ + if (!neon_vector_mem_operand (adjust_mem, 2, true)) + XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0)); + +}) + +(define_insn "*movmisalign_neon_store" + [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um") + (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" + "vst1.\t{%P1}, %A0" + [(set_attr "type" "neon_store1_1reg")]) + +(define_insn "*movmisalign_neon_load" + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand" + " Um")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" + "vld1.\t{%P0}, %A1" + [(set_attr "type" "neon_load1_1reg")]) + +(define_insn "*movmisalign_neon_store" + [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um") + (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" + "vst1.\t{%q1}, %A0" + [(set_attr "type" "neon_store1_1reg")]) + +(define_insn "*movmisalign_neon_load" + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand" + " Um")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access" + "vld1.\t{%q0}, %A1" + [(set_attr "type" "neon_store1_1reg")]) + +(define_insn "vec_set_internal" + [(set (match_operand:VD 0 "s_register_operand" "=w,w") + (vec_merge:VD + (vec_duplicate:VD + (match_operand: 1 "nonimmediate_operand" "Um,r")) + (match_operand:VD 3 "s_register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_NEON" +{ + int elt = ffs ((int) INTVAL (operands[2])) - 1; + if (BYTES_BIG_ENDIAN) + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + + if (which_alternative == 0) + return "vld1.\t{%P0[%c2]}, %A1"; + else + return "vmov.\t%P0[%c2], %1"; +} + [(set_attr "type" "neon_load1_all_lanes,neon_from_gp")]) + +(define_insn "vec_set_internal" + [(set (match_operand:VQ 0 "s_register_operand" "=w,w") + (vec_merge:VQ + (vec_duplicate:VQ + (match_operand: 1 "nonimmediate_operand" "Um,r")) + (match_operand:VQ 3 "s_register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_NEON" +{ + HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; + int half_elts = GET_MODE_NUNITS (mode) / 2; + int elt = elem % half_elts; + int hi = (elem / half_elts) * 2; + int regno = REGNO (operands[0]); + + if (BYTES_BIG_ENDIAN) + elt = half_elts - 1 - elt; + + operands[0] = gen_rtx_REG (mode, regno + hi); + operands[2] = GEN_INT (elt); + + if (which_alternative == 0) + return "vld1.\t{%P0[%c2]}, %A1"; + else + return "vmov.\t%P0[%c2], %1"; +} + [(set_attr "type" "neon_load1_all_lanes,neon_from_gp")] +) + +(define_insn "vec_setv2di_internal" + [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") + (vec_merge:V2DI + (vec_duplicate:V2DI + (match_operand:DI 1 "nonimmediate_operand" "Um,r")) + (match_operand:V2DI 3 "s_register_operand" "0,0") + (match_operand:SI 2 "immediate_operand" "i,i")))] + "TARGET_NEON" +{ + HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1; + int regno = REGNO (operands[0]) + 2 * elem; + + operands[0] = gen_rtx_REG (DImode, regno); + + if (which_alternative == 0) + return "vld1.64\t%P0, %A1"; + else + return "vmov\t%P0, %Q1, %R1"; +} + [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")] +) + +(define_expand "vec_set" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); + emit_insn (gen_vec_set_internal (operands[0], operands[1], + GEN_INT (elem), operands[0])); + DONE; +}) + +(define_insn "vec_extract" + [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") + (vec_select: + (match_operand:VD 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + + if (which_alternative == 0) + return "vst1.\t{%P1[%c2]}, %A0"; + else + return "vmov.\t%0, %P1[%c2]"; +} + [(set_attr "type" "neon_store1_one_lane,neon_to_gp")] +) + +(define_insn "vec_extract" + [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] + "TARGET_NEON" +{ + int half_elts = GET_MODE_NUNITS (mode) / 2; + int elt = INTVAL (operands[2]) % half_elts; + int hi = (INTVAL (operands[2]) / half_elts) * 2; + int regno = REGNO (operands[1]); + + if (BYTES_BIG_ENDIAN) + elt = half_elts - 1 - elt; + + operands[1] = gen_rtx_REG (mode, regno + hi); + operands[2] = GEN_INT (elt); + + if (which_alternative == 0) + return "vst1.\t{%P1[%c2]}, %A0"; + else + return "vmov.\t%0, %P1[%c2]"; +} + [(set_attr "type" "neon_store1_one_lane,neon_to_gp")] +) + +(define_insn "vec_extractv2di" + [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") + (vec_select:DI + (match_operand:V2DI 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); + + operands[1] = gen_rtx_REG (DImode, regno); + + if (which_alternative == 0) + return "vst1.64\t{%P1}, %A0 @ v2di"; + else + return "vmov\t%Q0, %R0, %P1 @ v2di"; +} + [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")] +) + +(define_expand "vec_init" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand 1 "" "")] + "TARGET_NEON" +{ + neon_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;; Doubleword and quadword arithmetic. + +;; NOTE: some other instructions also support 64-bit integer +;; element size, which we could potentially use for "long long" operations. + +(define_insn "*add3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vadd.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_addsub_s") + (const_string "neon_add")))] +) + +(define_insn "adddi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r") + (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: /* fall through */ + case 3: return "vadd.i64\t%P0, %P1, %P2"; + case 1: return "#"; + case 2: return "#"; + case 4: return "#"; + case 5: return "#"; + case 6: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_add,multiple,multiple,neon_add,\ + multiple,multiple,multiple") + (set_attr "conds" "*,clob,clob,*,clob,clob,clob") + (set_attr "length" "*,8,8,*,8,8,8") + (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")] +) + +(define_insn "*sub3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vsub.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_addsub_s") + (const_string "neon_sub")))] +) + +(define_insn "subdi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") + (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") + (match_operand:DI 2 "s_register_operand" "w,r,0,0,w"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: /* fall through */ + case 4: return "vsub.i64\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 2: /* fall through */ + case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub") + (set_attr "conds" "*,clob,clob,clob,*") + (set_attr "length" "*,8,8,8,*") + (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")] +) + +(define_insn "*mul3_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vmul.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mul_s") + (const_string "neon_mul_")))] +) + +(define_insn "mul3add_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w")) + (match_operand:VDQW 1 "s_register_operand" "0")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vmla.\t%0, %2, %3" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s") + (const_string "neon_mla_")))] +) + +(define_insn "mul3negadd_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") + (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w"))))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vmls.\t%0, %2, %3" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s") + (const_string "neon_mla_")))] +) + +;; Fused multiply-accumulate +;; We define each insn twice here: +;; 1: with flag_unsafe_math_optimizations for the widening multiply phase +;; to be able to use when converting to FMA. +;; 2: without flag_unsafe_math_optimizations for the intrinsics to use. +(define_insn "fma4" + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" + "vfma%?.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] +) + +(define_insn "fma4_intrinsic" + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA" + "vfma%?.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] +) + +(define_insn "*fmsub4" + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" + "vfms%?.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] +) + +(define_insn "fmsub4_intrinsic" + [(set (match_operand:VCVTF 0 "register_operand" "=w") + (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) + (match_operand:VCVTF 2 "register_operand" "w") + (match_operand:VCVTF 3 "register_operand" "0")))] + "TARGET_NEON && TARGET_FMA" + "vfms%?.\\t%0, %1, %2" + [(set_attr "type" "neon_fp_mla_s")] +) + +(define_insn "neon_vrint" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 + "s_register_operand" "w")] + NEON_VRINT))] + "TARGET_NEON && TARGET_FPU_ARMV8" + "vrint%?.f32\\t%0, %1" + [(set_attr "type" "neon_fp_round_")] +) + +(define_insn "ior3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") + (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") + (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vorr\t%0, %1, %2"; + case 1: return neon_output_logic_immediate ("vorr", &operands[2], + mode, 0, VALID_NEON_QREG_MODE (mode)); + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_logic")] +) + +;; The concrete forms of the Neon immediate-logic instructions are vbic and +;; vorr. We support the pseudo-instruction vand instead, because that +;; corresponds to the canonical form the middle-end expects to use for +;; immediate bitwise-ANDs. + +(define_insn "and3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") + (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") + (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vand\t%0, %1, %2"; + case 1: return neon_output_logic_immediate ("vand", &operands[2], + mode, 1, VALID_NEON_QREG_MODE (mode)); + default: gcc_unreachable (); + } +} + [(set_attr "type" "neon_logic")] +) + +(define_insn "orn3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) + (match_operand:VDQ 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vorn\t%0, %1, %2" + [(set_attr "type" "neon_logic")] +) + +;; TODO: investigate whether we should disable +;; this and bicdi3_neon for the A8 in line with the other +;; changes above. +(define_insn_and_split "orndi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") + (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r")) + (match_operand:DI 1 "s_register_operand" "w,r,r,0")))] + "TARGET_NEON" + "@ + vorn\t%P0, %P1, %P2 + # + # + #" + "reload_completed && + (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))" + [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] + " + { + if (TARGET_THUMB2) + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[5] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + else + { + emit_insn (gen_one_cmpldi2 (operands[0], operands[2])); + emit_insn (gen_iordi3 (operands[0], operands[1], operands[0])); + DONE; + } + }" + [(set_attr "type" "neon_logic,multiple,multiple,multiple") + (set_attr "length" "*,16,8,8") + (set_attr "arch" "any,a,t2,t2")] +) + +(define_insn "bic3_neon" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w")) + (match_operand:VDQ 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vbic\t%0, %1, %2" + [(set_attr "type" "neon_logic")] +) + +;; Compare to *anddi_notdi_di. +(define_insn "bicdi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r") + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0")) + (match_operand:DI 1 "s_register_operand" "w,0,r")))] + "TARGET_NEON" + "@ + vbic\t%P0, %P1, %P2 + # + #" + [(set_attr "type" "neon_logic,multiple,multiple") + (set_attr "length" "*,8,8")] +) + +(define_insn "xor3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")))] + "TARGET_NEON" + "veor\t%0, %1, %2" + [(set_attr "type" "neon_logic")] +) + +(define_insn "one_cmpl2" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vmvn\t%0, %1" + [(set_attr "type" "neon_move")] +) + +(define_insn "abs2" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vabs.\t%0, %1" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_abs_s") + (const_string "neon_abs")))] +) + +(define_insn "neg2" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vneg.\t%0, %1" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_neg_s") + (const_string "neon_neg")))] +) + +(define_insn "negdi2_neon" + [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r") + (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r"))) + (clobber (match_scratch:DI 2 "= X,&w,X, X")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" + "#" + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + +; Split negdi2_neon for vfp registers +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (neg:DI (match_operand:DI 1 "s_register_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (const_int 0)) + (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + { + if (!REG_P (operands[2])) + operands[2] = operands[0]; + } +) + +; Split negdi2_neon for core registers +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (neg:DI (match_operand:DI 1 "s_register_operand" ""))) + (clobber (match_scratch:DI 2 "")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_32BIT && reload_completed + && arm_general_register_operand (operands[0], DImode)" + [(parallel [(set (match_dup 0) (neg:DI (match_dup 1))) + (clobber (reg:CC CC_REGNUM))])] + "" +) + +(define_insn "*umin3_neon" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmin.\t%0, %1, %2" + [(set_attr "type" "neon_minmax")] +) + +(define_insn "*umax3_neon" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmax.\t%0, %1, %2" + [(set_attr "type" "neon_minmax")] +) + +(define_insn "*smin3_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmin.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_minmax_s") + (const_string "neon_minmax")))] +) + +(define_insn "*smax3_neon" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vmax.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_minmax_s") + (const_string "neon_minmax")))] +) + +; TODO: V2DI shifts are current disabled because there are bugs in the +; generic vectorizer code. It ends up creating a V2DI constructor with +; SImode elements. + +(define_insn "vashl3" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w") + (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w") + (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))] + "TARGET_NEON" + { + switch (which_alternative) + { + case 0: return "vshl.\t%0, %1, %2"; + case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2], + mode, + VALID_NEON_QREG_MODE (mode), + true); + default: gcc_unreachable (); + } + } + [(set_attr "type" "neon_shift_reg, neon_shift_imm")] +) + +(define_insn "vashr3_imm" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] + "TARGET_NEON" + { + return neon_output_shift_immediate ("vshr", 's', &operands[2], + mode, VALID_NEON_QREG_MODE (mode), + false); + } + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "vlshr3_imm" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))] + "TARGET_NEON" + { + return neon_output_shift_immediate ("vshr", 'u', &operands[2], + mode, VALID_NEON_QREG_MODE (mode), + false); + } + [(set_attr "type" "neon_shift_imm")] +) + +; Used for implementing logical shift-right, which is a left-shift by a negative +; amount, with signed operands. This is essentially the same as ashl3 +; above, but using an unspec in case GCC tries anything tricky with negative +; shift amounts. + +(define_insn "ashl3_signed" + [(set (match_operand:VDQI 0 "s_register_operand" "=w") + (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") + (match_operand:VDQI 2 "s_register_operand" "w")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_NEON" + "vshl.\t%0, %1, %2" + [(set_attr "type" "neon_shift_reg")] +) + +; Used for implementing logical shift-right, which is a left-shift by a negative +; amount, with unsigned operands. + +(define_insn "ashl3_unsigned" + [(set (match_operand:VDQI 0 "s_register_operand" "=w") + (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w") + (match_operand:VDQI 2 "s_register_operand" "w")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_NEON" + "vshl.\t%0, %1, %2" + [(set_attr "type" "neon_shift_reg")] +) + +(define_expand "vashr3" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] + "TARGET_NEON" +{ + if (s_register_operand (operands[2], mode)) + { + rtx neg = gen_reg_rtx (mode); + emit_insn (gen_neg2 (neg, operands[2])); + emit_insn (gen_ashl3_signed (operands[0], operands[1], neg)); + } + else + emit_insn (gen_vashr3_imm (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "vlshr3" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))] + "TARGET_NEON" +{ + if (s_register_operand (operands[2], mode)) + { + rtx neg = gen_reg_rtx (mode); + emit_insn (gen_neg2 (neg, operands[2])); + emit_insn (gen_ashl3_unsigned (operands[0], operands[1], neg)); + } + else + emit_insn (gen_vlshr3_imm (operands[0], operands[1], operands[2])); + DONE; +}) + +;; 64-bit shifts + +;; This pattern loads a 32-bit shift count into a 64-bit NEON register, +;; leaving the upper half uninitalized. This is OK since the shift +;; instruction only looks at the low 8 bits anyway. To avoid confusing +;; data flow analysis however, we pretend the full register is set +;; using an unspec. +(define_insn "neon_load_count" + [(set (match_operand:DI 0 "s_register_operand" "=w,w") + (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")] + UNSPEC_LOAD_COUNT))] + "TARGET_NEON" + "@ + vld1.32\t{%P0[0]}, %A1 + vmov.32\t%P0[0], %1" + [(set_attr "type" "neon_load1_1reg,neon_from_gp")] +) + +(define_insn "ashldi3_neon_noclobber" + [(set (match_operand:DI 0 "s_register_operand" "=w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w") + (match_operand:DI 2 "reg_or_int_operand" " i,w")))] + "TARGET_NEON && reload_completed + && (!CONST_INT_P (operands[2]) + || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))" + "@ + vshl.u64\t%P0, %P1, %2 + vshl.u64\t%P0, %P1, %P2" + [(set_attr "type" "neon_shift_imm, neon_shift_reg")] +) + +(define_insn_and_split "ashldi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w") + (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w") + (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i"))) + (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X")) + (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X")) + (clobber (reg:CC_C CC_REGNUM))] + "TARGET_NEON" + "#" + "TARGET_NEON && reload_completed" + [(const_int 0)] + " + { + if (IS_VFP_REGNUM (REGNO (operands[0]))) + { + if (CONST_INT_P (operands[2])) + { + if (INTVAL (operands[2]) < 1) + { + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + else if (INTVAL (operands[2]) > 63) + operands[2] = gen_rtx_CONST_INT (VOIDmode, 63); + } + else + { + emit_insn (gen_neon_load_count (operands[5], operands[2])); + operands[2] = operands[5]; + } + + /* Ditch the unnecessary clobbers. */ + emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1], + operands[2])); + } + else + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); + else + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], + operands[2], operands[3], operands[4]); + } + DONE; + }" + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,*,*") + (set_attr "type" "multiple")] +) + +; The shift amount needs to be negated for right-shifts +(define_insn "signed_shift_di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "s_register_operand" " w")] + UNSPEC_ASHIFT_SIGNED))] + "TARGET_NEON && reload_completed" + "vshl.s64\t%P0, %P1, %P2" + [(set_attr "type" "neon_shift_reg")] +) + +; The shift amount needs to be negated for right-shifts +(define_insn "unsigned_shift_di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "s_register_operand" " w")] + UNSPEC_ASHIFT_UNSIGNED))] + "TARGET_NEON && reload_completed" + "vshl.u64\t%P0, %P1, %P2" + [(set_attr "type" "neon_shift_reg")] +) + +(define_insn "ashrdi3_neon_imm_noclobber" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "const_int_operand" " i")))] + "TARGET_NEON && reload_completed + && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" + "vshr.s64\t%P0, %P1, %2" + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "lshrdi3_neon_imm_noclobber" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w") + (match_operand:DI 2 "const_int_operand" " i")))] + "TARGET_NEON && reload_completed + && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" + "vshr.u64\t%P0, %P1, %2" + [(set_attr "type" "neon_shift_imm")] +) + +;; ashrdi3_neon +;; lshrdi3_neon +(define_insn_and_split "di3_neon" + [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w") + (rshifts:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w") + (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i"))) + (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X")) + (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X")) + (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" + "#" + "TARGET_NEON && reload_completed" + [(const_int 0)] + " + { + if (IS_VFP_REGNUM (REGNO (operands[0]))) + { + if (CONST_INT_P (operands[2])) + { + if (INTVAL (operands[2]) < 1) + { + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + else if (INTVAL (operands[2]) > 64) + operands[2] = gen_rtx_CONST_INT (VOIDmode, 64); + + /* Ditch the unnecessary clobbers. */ + emit_insn (gen_di3_neon_imm_noclobber (operands[0], + operands[1], + operands[2])); + } + else + { + /* We must use a negative left-shift. */ + emit_insn (gen_negsi2 (operands[3], operands[2])); + emit_insn (gen_neon_load_count (operands[5], operands[3])); + emit_insn (gen__shift_di3_neon (operands[0], operands[1], + operands[5])); + } + } + else + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) + /* This clobbers CC. */ + emit_insn (gen_arm_di3_1bit (operands[0], operands[1])); + else + /* This clobbers CC (ASHIFTRT by register only). */ + arm_emit_coreregs_64bit_shift (, operands[0], operands[1], + operands[2], operands[3], operands[4]); + } + + DONE; + }" + [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") + (set_attr "opt" "*,*,speed,speed,*,*") + (set_attr "type" "multiple")] +) + +;; Widening operations + +(define_insn "widen_ssum3" + [(set (match_operand: 0 "s_register_operand" "=w") + (plus: (sign_extend: + (match_operand:VW 1 "s_register_operand" "%w")) + (match_operand: 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vaddw.\t%q0, %q2, %P1" + [(set_attr "type" "neon_add_widen")] +) + +(define_insn "widen_usum3" + [(set (match_operand: 0 "s_register_operand" "=w") + (plus: (zero_extend: + (match_operand:VW 1 "s_register_operand" "%w")) + (match_operand: 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vaddw.\t%q0, %q2, %P1" + [(set_attr "type" "neon_add_widen")] +) + +;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit +;; shift-count granularity. That's good enough for the middle-end's current +;; needs. + +;; Note that it's not safe to perform such an operation in big-endian mode, +;; due to element-ordering issues. + +(define_expand "vec_shr_" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand:VDQ 1 "s_register_operand" "") + (match_operand:SI 2 "const_multiple_of_8_operand" "")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + rtx zero_reg; + HOST_WIDE_INT num_bits = INTVAL (operands[2]); + const int width = GET_MODE_BITSIZE (mode); + const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; + rtx (*gen_ext) (rtx, rtx, rtx, rtx) = + (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; + + if (num_bits == width) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + + zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); + operands[0] = gen_lowpart (bvecmode, operands[0]); + operands[1] = gen_lowpart (bvecmode, operands[1]); + + emit_insn (gen_ext (operands[0], operands[1], zero_reg, + GEN_INT (num_bits / BITS_PER_UNIT))); + DONE; +}) + +(define_expand "vec_shl_" + [(match_operand:VDQ 0 "s_register_operand" "") + (match_operand:VDQ 1 "s_register_operand" "") + (match_operand:SI 2 "const_multiple_of_8_operand" "")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + rtx zero_reg; + HOST_WIDE_INT num_bits = INTVAL (operands[2]); + const int width = GET_MODE_BITSIZE (mode); + const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode; + rtx (*gen_ext) (rtx, rtx, rtx, rtx) = + (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi; + + if (num_bits == 0) + { + emit_move_insn (operands[0], CONST0_RTX (mode)); + DONE; + } + + num_bits = width - num_bits; + + zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode)); + operands[0] = gen_lowpart (bvecmode, operands[0]); + operands[1] = gen_lowpart (bvecmode, operands[1]); + + emit_insn (gen_ext (operands[0], zero_reg, operands[1], + GEN_INT (num_bits / BITS_PER_UNIT))); + DONE; +}) + +;; Helpers for quad-word reduction operations + +; Add (or smin, smax...) the low N/2 elements of the N-element vector +; operand[1] to the high N/2 elements of same. Put the result in operand[0], an +; N/2-element vector. + +(define_insn "quad_halves_v4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") + (vqh_ops:V2SI + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2SI (match_dup 1) + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_NEON" + ".32\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set_attr "type" "neon_reduc__q")] +) + +(define_insn "quad_halves_v4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (vqhs_ops:V2SF + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])) + (vec_select:V2SF (match_dup 1) + (parallel [(const_int 2) (const_int 3)]))))] + "TARGET_NEON && flag_unsafe_math_optimizations" + ".f32\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set_attr "type" "neon_fp_reduc__s_q")] +) + +(define_insn "quad_halves_v8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "+w") + (vqh_ops:V4HI + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])) + (vec_select:V4HI (match_dup 1) + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] + "TARGET_NEON" + ".16\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set_attr "type" "neon_reduc__q")] +) + +(define_insn "quad_halves_v16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "+w") + (vqh_ops:V8QI + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])) + (vec_select:V8QI (match_dup 1) + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)]))))] + "TARGET_NEON" + ".8\t%P0, %e1, %f1" + [(set_attr "vqh_mnem" "") + (set_attr "type" "neon_reduc__q")] +) + +(define_expand "move_hi_quad_" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "")] + "TARGET_NEON" +{ + emit_move_insn (simplify_gen_subreg (mode, operands[0], mode, + GET_MODE_SIZE (mode)), + operands[1]); + DONE; +}) + +(define_expand "move_lo_quad_" + [(match_operand:ANY128 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "")] + "TARGET_NEON" +{ + emit_move_insn (simplify_gen_subreg (mode, operands[0], + mode, 0), + operands[1]); + DONE; +}) + +;; Reduction operations + +(define_expand "reduc_splus_" + [(match_operand:VD 0 "s_register_operand" "") + (match_operand:VD 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpadd_internal); + DONE; +}) + +(define_expand "reduc_splus_" + [(match_operand:VQ 0 "s_register_operand" "") + (match_operand:VQ 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations) + && !BYTES_BIG_ENDIAN" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_plus (step1, operands[1])); + emit_insn (gen_reduc_splus_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_insn "reduc_splus_v2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=w") + (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")] + UNSPEC_VPADD))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vadd.i64\t%e0, %e1, %f1" + [(set_attr "type" "neon_add_q")] +) + +;; NEON does not distinguish between signed and unsigned addition except on +;; widening operations. +(define_expand "reduc_uplus_" + [(match_operand:VDQI 0 "s_register_operand" "") + (match_operand:VDQI 1 "s_register_operand" "")] + "TARGET_NEON && ( || !BYTES_BIG_ENDIAN)" +{ + emit_insn (gen_reduc_splus_ (operands[0], operands[1])); + DONE; +}) + +(define_expand "reduc_smin_" + [(match_operand:VD 0 "s_register_operand" "") + (match_operand:VD 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpsmin); + DONE; +}) + +(define_expand "reduc_smin_" + [(match_operand:VQ 0 "s_register_operand" "") + (match_operand:VQ 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations) + && !BYTES_BIG_ENDIAN" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_smin (step1, operands[1])); + emit_insn (gen_reduc_smin_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_expand "reduc_smax_" + [(match_operand:VD 0 "s_register_operand" "") + (match_operand:VD 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpsmax); + DONE; +}) + +(define_expand "reduc_smax_" + [(match_operand:VQ 0 "s_register_operand" "") + (match_operand:VQ 1 "s_register_operand" "")] + "TARGET_NEON && (! || flag_unsafe_math_optimizations) + && !BYTES_BIG_ENDIAN" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_smax (step1, operands[1])); + emit_insn (gen_reduc_smax_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_expand "reduc_umin_" + [(match_operand:VDI 0 "s_register_operand" "") + (match_operand:VDI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpumin); + DONE; +}) + +(define_expand "reduc_umin_" + [(match_operand:VQI 0 "s_register_operand" "") + (match_operand:VQI 1 "s_register_operand" "")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_umin (step1, operands[1])); + emit_insn (gen_reduc_umin_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_expand "reduc_umax_" + [(match_operand:VDI 0 "s_register_operand" "") + (match_operand:VDI 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_pairwise_reduce (operands[0], operands[1], mode, + &gen_neon_vpumax); + DONE; +}) + +(define_expand "reduc_umax_" + [(match_operand:VQI 0 "s_register_operand" "") + (match_operand:VQI 1 "s_register_operand" "")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + rtx step1 = gen_reg_rtx (mode); + rtx res_d = gen_reg_rtx (mode); + + emit_insn (gen_quad_halves_umax (step1, operands[1])); + emit_insn (gen_reduc_umax_ (res_d, step1)); + emit_insn (gen_move_lo_quad_ (operands[0], res_d)); + + DONE; +}) + +(define_insn "neon_vpadd_internal" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")] + UNSPEC_VPADD))] + "TARGET_NEON" + "vpadd.\t%P0, %P1, %P2" + ;; Assume this schedules like vadd. + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_reduc_add_s") + (const_string "neon_reduc_add")))] +) + +(define_insn "neon_vpsmin" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")] + UNSPEC_VPSMIN))] + "TARGET_NEON" + "vpmin.\t%P0, %P1, %P2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_reduc_minmax_s") + (const_string "neon_reduc_minmax")))] +) + +(define_insn "neon_vpsmax" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")] + UNSPEC_VPSMAX))] + "TARGET_NEON" + "vpmax.\t%P0, %P1, %P2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_reduc_minmax_s") + (const_string "neon_reduc_minmax")))] +) + +(define_insn "neon_vpumin" + [(set (match_operand:VDI 0 "s_register_operand" "=w") + (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w")] + UNSPEC_VPUMIN))] + "TARGET_NEON" + "vpmin.\t%P0, %P1, %P2" + [(set_attr "type" "neon_reduc_minmax")] +) + +(define_insn "neon_vpumax" + [(set (match_operand:VDI 0 "s_register_operand" "=w") + (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w")] + UNSPEC_VPUMAX))] + "TARGET_NEON" + "vpmax.\t%P0, %P1, %P2" + [(set_attr "type" "neon_reduc_minmax")] +) + +;; Saturating arithmetic + +; NOTE: Neon supports many more saturating variants of instructions than the +; following, but these are all GCC currently understands. +; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself +; yet either, although these patterns may be used by intrinsics when they're +; added. + +(define_insn "*ss_add_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqadd.\t%P0, %P1, %P2" + [(set_attr "type" "neon_qadd")] +) + +(define_insn "*us_add_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (us_plus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqadd.\t%P0, %P1, %P2" + [(set_attr "type" "neon_qadd")] +) + +(define_insn "*ss_sub_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqsub.\t%P0, %P1, %P2" + [(set_attr "type" "neon_qsub")] +) + +(define_insn "*us_sub_neon" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (us_minus:VD (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w")))] + "TARGET_NEON" + "vqsub.\t%P0, %P1, %P2" + [(set_attr "type" "neon_qsub")] +) + +;; Conditional instructions. These are comparisons with conditional moves for +;; vectors. They perform the assignment: +;; +;; Vop0 = (Vop4 Vop5) ? Vop1 : Vop2; +;; +;; where op3 is <, <=, ==, !=, >= or >. Operations are performed +;; element-wise. + +(define_expand "vcond" + [(set (match_operand:VDQW 0 "s_register_operand" "") + (if_then_else:VDQW + (match_operator 3 "comparison_operator" + [(match_operand:VDQW 4 "s_register_operand" "") + (match_operand:VDQW 5 "nonmemory_operand" "")]) + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + HOST_WIDE_INT magic_word = (mode == V2SFmode || mode == V4SFmode) + ? 3 : 1; + rtx magic_rtx = GEN_INT (magic_word); + int inverse = 0; + int use_zero_form = 0; + int swap_bsl_operands = 0; + rtx mask = gen_reg_rtx (mode); + rtx tmp = gen_reg_rtx (mode); + + rtx (*base_comparison) (rtx, rtx, rtx, rtx); + rtx (*complimentary_comparison) (rtx, rtx, rtx, rtx); + + switch (GET_CODE (operands[3])) + { + case GE: + case GT: + case LE: + case LT: + case EQ: + if (operands[5] == CONST0_RTX (mode)) + { + use_zero_form = 1; + break; + } + /* Fall through. */ + default: + if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case UNLT: + inverse = 1; + /* Fall through. */ + case GE: + case UNGE: + case ORDERED: + case UNORDERED: + base_comparison = gen_neon_vcge; + complimentary_comparison = gen_neon_vcgt; + break; + case LE: + case UNLE: + inverse = 1; + /* Fall through. */ + case GT: + case UNGT: + base_comparison = gen_neon_vcgt; + complimentary_comparison = gen_neon_vcge; + break; + case EQ: + case NE: + case UNEQ: + base_comparison = gen_neon_vceq; + complimentary_comparison = gen_neon_vceq; + break; + default: + gcc_unreachable (); + } + + switch (GET_CODE (operands[3])) + { + case LT: + case LE: + case GT: + case GE: + case EQ: + /* The easy case. Here we emit one of vcge, vcgt or vceq. + As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: + a GE b -> a GE b + a GT b -> a GT b + a LE b -> b GE a + a LT b -> b GT a + a EQ b -> a EQ b + Note that there also exist direct comparison against 0 forms, + so catch those as a special case. */ + if (use_zero_form) + { + inverse = 0; + switch (GET_CODE (operands[3])) + { + case LT: + base_comparison = gen_neon_vclt; + break; + case LE: + base_comparison = gen_neon_vcle; + break; + default: + /* Do nothing, other zero form cases already have the correct + base_comparison. */ + break; + } + } + + if (!inverse) + emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx)); + else + emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx)); + break; + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case NE: + /* Vector compare returns false for lanes which are unordered, so if we use + the inverse of the comparison we actually want to emit, then + swap the operands to BSL, we will end up with the correct result. + Note that a NE NaN and NaN NE b are true for all a, b. + + Our transformations are: + a GE b -> !(b GT a) + a GT b -> !(b GE a) + a LE b -> !(a GT b) + a LT b -> !(a GE b) + a NE b -> !(a EQ b) */ + + if (inverse) + emit_insn (base_comparison (mask, operands[4], operands[5], magic_rtx)); + else + emit_insn (complimentary_comparison (mask, operands[5], operands[4], magic_rtx)); + + swap_bsl_operands = 1; + break; + case UNEQ: + /* We check (a > b || b > a). combining these comparisons give us + true iff !(a != b && a ORDERED b), swapping the operands to BSL + will then give us (a == b || a UNORDERED b) as intended. */ + + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], magic_rtx)); + emit_insn (gen_neon_vcgt (tmp, operands[5], operands[4], magic_rtx)); + emit_insn (gen_ior3 (mask, mask, tmp)); + swap_bsl_operands = 1; + break; + case UNORDERED: + /* Operands are ORDERED iff (a > b || b >= a). + Swapping the operands to BSL will give the UNORDERED case. */ + swap_bsl_operands = 1; + /* Fall through. */ + case ORDERED: + emit_insn (gen_neon_vcgt (tmp, operands[4], operands[5], magic_rtx)); + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], magic_rtx)); + emit_insn (gen_ior3 (mask, mask, tmp)); + break; + default: + gcc_unreachable (); + } + + if (swap_bsl_operands) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + DONE; +}) + +(define_expand "vcondu" + [(set (match_operand:VDQIW 0 "s_register_operand" "") + (if_then_else:VDQIW + (match_operator 3 "arm_comparison_operator" + [(match_operand:VDQIW 4 "s_register_operand" "") + (match_operand:VDQIW 5 "s_register_operand" "")]) + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:VDQIW 2 "s_register_operand" "")))] + "TARGET_NEON" +{ + rtx mask; + int inverse = 0, immediate_zero = 0; + + mask = gen_reg_rtx (mode); + + if (operands[5] == CONST0_RTX (mode)) + immediate_zero = 1; + else if (!REG_P (operands[5])) + operands[5] = force_reg (mode, operands[5]); + + switch (GET_CODE (operands[3])) + { + case GEU: + emit_insn (gen_neon_vcge (mask, operands[4], operands[5], + const0_rtx)); + break; + + case GTU: + emit_insn (gen_neon_vcgt (mask, operands[4], operands[5], + const0_rtx)); + break; + + case EQ: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + break; + + case LEU: + if (immediate_zero) + emit_insn (gen_neon_vcle (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcge (mask, operands[5], operands[4], + const0_rtx)); + break; + + case LTU: + if (immediate_zero) + emit_insn (gen_neon_vclt (mask, operands[4], operands[5], + const0_rtx)); + else + emit_insn (gen_neon_vcgt (mask, operands[5], operands[4], + const0_rtx)); + break; + + case NE: + emit_insn (gen_neon_vceq (mask, operands[4], operands[5], + const0_rtx)); + inverse = 1; + break; + + default: + gcc_unreachable (); + } + + if (inverse) + emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], + operands[1])); + else + emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], + operands[2])); + + DONE; +}) + +;; Patterns for builtins. + +; good for plain vadd, vaddq. + +(define_expand "neon_vadd" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_add3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vadd_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Note that NEON operations don't support the full IEEE 754 standard: in +; particular, denormal values are flushed to zero. This means that GCC cannot +; use those instructions for autovectorization, etc. unless +; -funsafe-math-optimizations is in effect (in which case flush-to-zero +; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h +; header) must work in either case: if -funsafe-math-optimizations is given, +; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics +; expand to unspecs (which may potentially limit the extent to which they might +; be optimized by generic code). + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vadd_unspec" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w")] + UNSPEC_VADD))] + "TARGET_NEON" + "vadd.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_addsub_s") + (const_string "neon_add")))] +) + +; operand 3 represents in bits: +; bit 0: signed (vs unsigned). +; bit 1: rounding (vs none). + +(define_insn "neon_vaddl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VADDL))] + "TARGET_NEON" + "vaddl.%T3%#\t%q0, %P1, %P2" + [(set_attr "type" "neon_add_long")] +) + +(define_insn "neon_vaddw" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VADDW))] + "TARGET_NEON" + "vaddw.%T3%#\t%q0, %q1, %P2" + [(set_attr "type" "neon_add_widen")] +) + +; vhadd and vrhadd. + +(define_insn "neon_vhadd" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VHADD))] + "TARGET_NEON" + "v%O3hadd.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_add_halve_q")] +) + +(define_insn "neon_vqadd" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQADD))] + "TARGET_NEON" + "vqadd.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_qadd")] +) + +(define_insn "neon_vaddhn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:VN 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VADDHN))] + "TARGET_NEON" + "v%O3addhn.\t%P0, %q1, %q2" + [(set_attr "type" "neon_add_halve_narrow_q")] +) + +;; We cannot replace this unspec with mul3 because of the odd +;; polynomial multiplication case that can specified by operand 3. +(define_insn "neon_vmul" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMUL))] + "TARGET_NEON" + "vmul.%F3%#\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mul_s") + (const_string "neon_mul_")))] +) + +(define_expand "neon_vmla" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_mul3add_neon (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_neon_vmla_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +(define_expand "neon_vfma" + [(match_operand:VCVTF 0 "s_register_operand") + (match_operand:VCVTF 1 "s_register_operand") + (match_operand:VCVTF 2 "s_register_operand") + (match_operand:VCVTF 3 "s_register_operand") + (match_operand:SI 4 "immediate_operand")] + "TARGET_NEON && TARGET_FMA" +{ + emit_insn (gen_fma4_intrinsic (operands[0], operands[2], operands[3], + operands[1])); + DONE; +}) + +(define_expand "neon_vfms" + [(match_operand:VCVTF 0 "s_register_operand") + (match_operand:VCVTF 1 "s_register_operand") + (match_operand:VCVTF 2 "s_register_operand") + (match_operand:VCVTF 3 "s_register_operand") + (match_operand:SI 4 "immediate_operand")] + "TARGET_NEON && TARGET_FMA" +{ + emit_insn (gen_fmsub4_intrinsic (operands[0], operands[2], operands[3], + operands[1])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmla_unspec" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w")] + UNSPEC_VMLA))] + "TARGET_NEON" + "vmla.\t%0, %2, %3" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s") + (const_string "neon_mla_")))] +) + +(define_insn "neon_vmlal" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMLAL))] + "TARGET_NEON" + "vmlal.%T4%#\t%q0, %P2, %P3" + [(set_attr "type" "neon_mla__long")] +) + +(define_expand "neon_vmls" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_mul3negadd_neon (operands[0], + operands[1], operands[2], operands[3])); + else + emit_insn (gen_neon_vmls_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmls_unspec" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w")] + UNSPEC_VMLS))] + "TARGET_NEON" + "vmls.\t%0, %2, %3" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s") + (const_string "neon_mla_")))] +) + +(define_insn "neon_vmlsl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMLSL))] + "TARGET_NEON" + "vmlsl.%T4%#\t%q0, %P2, %P3" + [(set_attr "type" "neon_mla__long")] +) + +(define_insn "neon_vqdmulh" + [(set (match_operand:VMDQI 0 "s_register_operand" "=w") + (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w") + (match_operand:VMDQI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQDMULH))] + "TARGET_NEON" + "vq%O3dmulh.\t%0, %1, %2" + [(set_attr "type" "neon_sat_mul_")] +) + +(define_insn "neon_vqdmlal" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMLAL))] + "TARGET_NEON" + "vqdmlal.\t%q0, %P2, %P3" + [(set_attr "type" "neon_sat_mla__long")] +) + +(define_insn "neon_vqdmlsl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMLSL))] + "TARGET_NEON" + "vqdmlsl.\t%q0, %P2, %P3" + [(set_attr "type" "neon_sat_mla__long")] +) + +(define_insn "neon_vmull" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMULL))] + "TARGET_NEON" + "vmull.%T3%#\t%q0, %P1, %P2" + [(set_attr "type" "neon_mul__long")] +) + +(define_insn "neon_vqdmull" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQDMULL))] + "TARGET_NEON" + "vqdmull.\t%q0, %P1, %P2" + [(set_attr "type" "neon_sat_mul__long")] +) + +(define_expand "neon_vsub" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (! || flag_unsafe_math_optimizations) + emit_insn (gen_sub3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vsub_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vsub_unspec" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w")] + UNSPEC_VSUB))] + "TARGET_NEON" + "vsub.\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_addsub_s") + (const_string "neon_sub")))] +) + +(define_insn "neon_vsubl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VDI 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSUBL))] + "TARGET_NEON" + "vsubl.%T3%#\t%q0, %P1, %P2" + [(set_attr "type" "neon_sub_long")] +) + +(define_insn "neon_vsubw" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "w") + (match_operand:VDI 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSUBW))] + "TARGET_NEON" + "vsubw.%T3%#\t%q0, %q1, %P2" + [(set_attr "type" "neon_sub_widen")] +) + +(define_insn "neon_vqsub" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSUB))] + "TARGET_NEON" + "vqsub.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_qsub")] +) + +(define_insn "neon_vhsub" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VHSUB))] + "TARGET_NEON" + "vhsub.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_sub_halve")] +) + +(define_insn "neon_vsubhn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:VN 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSUBHN))] + "TARGET_NEON" + "v%O3subhn.\t%P0, %q1, %q2" + [(set_attr "type" "neon_sub_halve_narrow_q")] +) + +(define_insn "neon_vceq" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCEQ))] + "TARGET_NEON" + "@ + vceq.\t%0, %1, %2 + vceq.\t%0, %1, #0" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_compare_s") + (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") + (const_string "neon_compare"))))] +) + +(define_insn "neon_vcge" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGE))] + "TARGET_NEON" + "@ + vcge.%T3%#\t%0, %1, %2 + vcge.%T3%#\t%0, %1, #0" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_compare_s") + (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") + (const_string "neon_compare"))))] +) + +(define_insn "neon_vcgeu" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCGEU))] + "TARGET_NEON" + "vcge.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_compare")] +) + +(define_insn "neon_vcgt" + [(set (match_operand: 0 "s_register_operand" "=w,w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w,w") + (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz") + (match_operand:SI 3 "immediate_operand" "i,i")] + UNSPEC_VCGT))] + "TARGET_NEON" + "@ + vcgt.%T3%#\t%0, %1, %2 + vcgt.%T3%#\t%0, %1, #0" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_compare_s") + (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") + (const_string "neon_compare"))))] +) + +(define_insn "neon_vcgtu" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCGTU))] + "TARGET_NEON" + "vcgt.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_compare")] +) + +;; VCLE and VCLT only support comparisons with immediate zero (register +;; variants are VCGE and VCGT with operands reversed). + +(define_insn "neon_vcle" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "zero_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLE))] + "TARGET_NEON" + "vcle.%T3%#\t%0, %1, #0" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_compare_s") + (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") + (const_string "neon_compare"))))] +) + +(define_insn "neon_vclt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: + [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "zero_operand" "Dz") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCLT))] + "TARGET_NEON" + "vclt.%T3%#\t%0, %1, #0" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_compare_s") + (if_then_else (match_operand 2 "zero_operand") + (const_string "neon_compare_zero") + (const_string "neon_compare"))))] +) + +(define_insn "neon_vcage" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCAGE))] + "TARGET_NEON" + "vacge.\t%0, %1, %2" + [(set_attr "type" "neon_fp_compare_s")] +) + +(define_insn "neon_vcagt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCAGT))] + "TARGET_NEON" + "vacgt.\t%0, %1, %2" + [(set_attr "type" "neon_fp_compare_s")] +) + +(define_insn "neon_vtst" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VTST))] + "TARGET_NEON" + "vtst.\t%0, %1, %2" + [(set_attr "type" "neon_tst")] +) + +(define_insn "neon_vabd" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VABD))] + "TARGET_NEON" + "vabd.%T3%#\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_abd_s") + (const_string "neon_abd")))] +) + +(define_insn "neon_vabdl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:VW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VABDL))] + "TARGET_NEON" + "vabdl.%T3%#\t%q0, %P1, %P2" + [(set_attr "type" "neon_abd_long")] +) + +(define_insn "neon_vaba" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:VDQIW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABD) + (match_operand:VDQIW 1 "s_register_operand" "0")))] + "TARGET_NEON" + "vaba.%T4%#\t%0, %2, %3" + [(set_attr "type" "neon_arith_acc")] +) + +(define_insn "neon_vabal" + [(set (match_operand: 0 "s_register_operand" "=w") + (plus: (unspec: [(match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABDL) + (match_operand: 1 "s_register_operand" "0")))] + "TARGET_NEON" + "vabal.%T4%#\t%q0, %P2, %P3" + [(set_attr "type" "neon_arith_acc")] +) + +(define_insn "neon_vmax" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMAX))] + "TARGET_NEON" + "vmax.%T3%#\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_minmax_s") + (const_string "neon_minmax")))] +) + +(define_insn "neon_vmin" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VMIN))] + "TARGET_NEON" + "vmin.%T3%#\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_minmax_s") + (const_string "neon_minmax")))] +) + +(define_expand "neon_vpadd" + [(match_operand:VD 0 "s_register_operand" "=w") + (match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + emit_insn (gen_neon_vpadd_internal (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_insn "neon_vpaddl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VPADDL))] + "TARGET_NEON" + "vpaddl.%T2%#\t%0, %1" + [(set_attr "type" "neon_reduc_add_long")] +) + +(define_insn "neon_vpadal" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VPADAL))] + "TARGET_NEON" + "vpadal.%T3%#\t%0, %2" + [(set_attr "type" "neon_reduc_add_acc")] +) + +(define_insn "neon_vpmax" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VPMAX))] + "TARGET_NEON" + "vpmax.%T3%#\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_reduc_minmax_s") + (const_string "neon_reduc_minmax")))] +) + +(define_insn "neon_vpmin" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") + (match_operand:VD 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VPMIN))] + "TARGET_NEON" + "vpmin.%T3%#\t%0, %1, %2" + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_reduc_minmax_s") + (const_string "neon_reduc_minmax")))] +) + +(define_insn "neon_vrecps" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VRECPS))] + "TARGET_NEON" + "vrecps.\t%0, %1, %2" + [(set_attr "type" "neon_fp_recps_s")] +) + +(define_insn "neon_vrsqrts" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VRSQRTS))] + "TARGET_NEON" + "vrsqrts.\t%0, %1, %2" + [(set_attr "type" "neon_fp_rsqrts_s")] +) + +(define_expand "neon_vabs" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_abs2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vqabs" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQABS))] + "TARGET_NEON" + "vqabs.\t%0, %1" + [(set_attr "type" "neon_qabs")] +) + +(define_expand "neon_vneg" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_neg2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vqneg" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQNEG))] + "TARGET_NEON" + "vqneg.\t%0, %1" + [(set_attr "type" "neon_qneg")] +) + +(define_insn "neon_vcls" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VCLS))] + "TARGET_NEON" + "vcls.\t%0, %1" + [(set_attr "type" "neon_cls")] +) + +(define_insn "clz2" + [(set (match_operand:VDQIW 0 "s_register_operand" "=w") + (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vclz.\t%0, %1" + [(set_attr "type" "neon_cnt")] +) + +(define_expand "neon_vclz" + [(match_operand:VDQIW 0 "s_register_operand" "") + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_clz2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "popcount2" + [(set (match_operand:VE 0 "s_register_operand" "=w") + (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcnt.\t%0, %1" + [(set_attr "type" "neon_cnt")] +) + +(define_expand "neon_vcnt" + [(match_operand:VE 0 "s_register_operand" "=w") + (match_operand:VE 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + emit_insn (gen_popcount2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vrecpe" + [(set (match_operand:V32 0 "s_register_operand" "=w") + (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VRECPE))] + "TARGET_NEON" + "vrecpe.\t%0, %1" + [(set_attr "type" "neon_fp_recpe_s")] +) + +(define_insn "neon_vrsqrte" + [(set (match_operand:V32 0 "s_register_operand" "=w") + (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VRSQRTE))] + "TARGET_NEON" + "vrsqrte.\t%0, %1" + [(set_attr "type" "neon_fp_rsqrte_s")] +) + +(define_expand "neon_vmvn" + [(match_operand:VDQIW 0 "s_register_operand" "") + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_one_cmpl2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "neon_vget_lane_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov.s\t%0, %P1[%c2]"; +} + [(set_attr "type" "neon_to_gp")] +) + +(define_insn "neon_vget_lane_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VD 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + return "vmov.u\t%0, %P1[%c2]"; +} + [(set_attr "type" "neon_to_gp")] +) + +(define_insn "neon_vget_lane_sext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (sign_extend:SI + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; + unsigned int elt = INTVAL (operands[2]); + unsigned int elt_adj = elt % halfelts; + + if (BYTES_BIG_ENDIAN) + elt_adj = halfelts - 1 - elt_adj; + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt_adj); + output_asm_insn ("vmov.s\t%0, %P1[%c2]", ops); + + return ""; +} + [(set_attr "type" "neon_to_gp_q")] +) + +(define_insn "neon_vget_lane_zext_internal" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (vec_select: + (match_operand:VQ 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + rtx ops[3]; + int regno = REGNO (operands[1]); + unsigned int halfelts = GET_MODE_NUNITS (mode) / 2; + unsigned int elt = INTVAL (operands[2]); + unsigned int elt_adj = elt % halfelts; + + if (BYTES_BIG_ENDIAN) + elt_adj = halfelts - 1 - elt_adj; + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (mode, regno + 2 * (elt / halfelts)); + ops[2] = GEN_INT (elt_adj); + output_asm_insn ("vmov.u\t%0, %P1[%c2]", ops); + + return ""; +} + [(set_attr "type" "neon_to_gp_q")] +) + +(define_expand "neon_vget_lane" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + HOST_WIDE_INT magic = INTVAL (operands[3]); + rtx insn; + + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); + + if (BYTES_BIG_ENDIAN) + { + /* The intrinsics are defined in terms of a model where the + element ordering in memory is vldm order, whereas the generic + RTL is defined in terms of a model where the element ordering + in memory is array order. Convert the lane number to conform + to this model. */ + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + + if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (mode)) == 32) + insn = gen_vec_extract (operands[0], operands[1], operands[2]); + else + { + if ((magic & 1) != 0) + insn = gen_neon_vget_lane_sext_internal (operands[0], operands[1], + operands[2]); + else + insn = gen_neon_vget_lane_zext_internal (operands[0], operands[1], + operands[2]); + } + emit_insn (insn); + DONE; +}) + +; Operand 3 (info word) is ignored because it does nothing useful with 64-bit +; elements. + +(define_expand "neon_vget_lanedi" + [(match_operand:DI 0 "s_register_operand" "=r") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vget_lanev2di" + [(match_operand:DI 0 "s_register_operand" "") + (match_operand:V2DI 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_move_insn (operands[0], gen_lowpart (DImode, operands[1])); + break; + case 1: + emit_move_insn (operands[0], gen_highpart (DImode, operands[1])); + break; + default: + neon_lane_bounds (operands[2], 0, 1); + FAIL; + } + DONE; +}) + +(define_expand "neon_vset_lane" + [(match_operand:VDQ 0 "s_register_operand" "=w") + (match_operand: 1 "s_register_operand" "r") + (match_operand:VDQ 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + unsigned int elt = INTVAL (operands[3]); + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + + if (BYTES_BIG_ENDIAN) + { + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + elt ^= reg_nelts - 1; + } + + emit_insn (gen_vec_set_internal (operands[0], operands[1], + GEN_INT (1 << elt), operands[2])); + DONE; +}) + +; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. + +(define_expand "neon_vset_lanedi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "r") + (match_operand:DI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vcreate" + [(match_operand:VDX 0 "s_register_operand" "") + (match_operand:DI 1 "general_operand" "")] + "TARGET_NEON" +{ + rtx src = gen_lowpart (mode, operands[1]); + emit_move_insn (operands[0], src); + DONE; +}) + +(define_insn "neon_vdup_n" + [(set (match_operand:VX 0 "s_register_operand" "=w") + (vec_duplicate:VX (match_operand: 1 "s_register_operand" "r")))] + "TARGET_NEON" + "vdup.\t%0, %1" + [(set_attr "type" "neon_from_gp")] +) + +(define_insn "neon_vdup_n" + [(set (match_operand:V32 0 "s_register_operand" "=w,w") + (vec_duplicate:V32 (match_operand: 1 "s_register_operand" "r,t")))] + "TARGET_NEON" + "@ + vdup.\t%0, %1 + vdup.\t%0, %y1" + [(set_attr "type" "neon_from_gp,neon_dup")] +) + +(define_expand "neon_vdup_ndi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "r")] + "TARGET_NEON" +{ + emit_move_insn (operands[0], operands[1]); + DONE; +} +) + +(define_insn "neon_vdup_nv2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") + (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] + "TARGET_NEON" + "@ + vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1 + vmov\t%e0, %P1\;vmov\t%f0, %P1" + [(set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "neon_vdup_lane_internal" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (vec_duplicate:VDQW + (vec_select: + (match_operand: 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + if () + return "vdup.\t%P0, %P1[%c2]"; + else + return "vdup.\t%q0, %P1[%c2]"; +} + [(set_attr "type" "neon_dup")] +) + +(define_expand "neon_vdup_lane" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand: 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (mode)); + if (BYTES_BIG_ENDIAN) + { + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + emit_insn (gen_neon_vdup_lane_internal (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Scalar index is ignored, since only zero is valid here. +(define_expand "neon_vdup_lanedi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +; Likewise for v2di, as the DImode second operand has only a single element. +(define_expand "neon_vdup_lanev2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + neon_lane_bounds (operands[2], 0, 1); + emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); + DONE; +}) + +; Disabled before reload because we don't want combine doing something silly, +; but used by the post-reload expansion of neon_vcombine. +(define_insn "*neon_vswp" + [(set (match_operand:VDQX 0 "s_register_operand" "+w") + (match_operand:VDQX 1 "s_register_operand" "+w")) + (set (match_dup 1) (match_dup 0))] + "TARGET_NEON && reload_completed" + "vswp\t%0, %1" + [(set_attr "type" "neon_permute")] +) + +;; In this insn, operand 1 should be low, and operand 2 the high part of the +;; dest vector. +;; FIXME: A different implementation of this builtin could make it much +;; more likely that we wouldn't actually need to output anything (we could make +;; it so that the reg allocator puts things in the right places magically +;; instead). Lack of subregs for vectors makes that tricky though, I think. + +(define_insn_and_split "neon_vcombine" + [(set (match_operand: 0 "s_register_operand" "=w") + (vec_concat: + (match_operand:VDX 1 "s_register_operand" "w") + (match_operand:VDX 2 "s_register_operand" "w")))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + neon_split_vcombine (operands); + DONE; +} +[(set_attr "type" "multiple")] +) + +(define_expand "neon_vget_high" + [(match_operand: 0 "s_register_operand") + (match_operand:VQX 1 "s_register_operand")] + "TARGET_NEON" +{ + emit_move_insn (operands[0], + simplify_gen_subreg (mode, operands[1], mode, + GET_MODE_SIZE (mode))); + DONE; +}) + +(define_expand "neon_vget_low" + [(match_operand: 0 "s_register_operand") + (match_operand:VQX 1 "s_register_operand")] + "TARGET_NEON" +{ + emit_move_insn (operands[0], + simplify_gen_subreg (mode, operands[1], + mode, 0)); + DONE; +}) + +(define_insn "float2" + [(set (match_operand: 0 "s_register_operand" "=w") + (float: (match_operand:VCVTI 1 "s_register_operand" "w")))] + "TARGET_NEON && !flag_rounding_math" + "vcvt.f32.s32\t%0, %1" + [(set_attr "type" "neon_int_to_fp_")] +) + +(define_insn "floatuns2" + [(set (match_operand: 0 "s_register_operand" "=w") + (unsigned_float: (match_operand:VCVTI 1 "s_register_operand" "w")))] + "TARGET_NEON && !flag_rounding_math" + "vcvt.f32.u32\t%0, %1" + [(set_attr "type" "neon_int_to_fp_")] +) + +(define_insn "fix_trunc2" + [(set (match_operand: 0 "s_register_operand" "=w") + (fix: (match_operand:VCVTF 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.s32.f32\t%0, %1" + [(set_attr "type" "neon_fp_to_int_")] +) + +(define_insn "fixuns_trunc2" + [(set (match_operand: 0 "s_register_operand" "=w") + (unsigned_fix: (match_operand:VCVTF 1 "s_register_operand" "w")))] + "TARGET_NEON" + "vcvt.u32.f32\t%0, %1" + [(set_attr "type" "neon_fp_to_int_")] +) + +(define_insn "neon_vcvt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VCVT))] + "TARGET_NEON" + "vcvt.%T2%#32.f32\t%0, %1" + [(set_attr "type" "neon_fp_to_int_")] +) + +(define_insn "neon_vcvt" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VCVT))] + "TARGET_NEON" + "vcvt.f32.%T2%#32\t%0, %1" + [(set_attr "type" "neon_int_to_fp_")] +) + +(define_insn "neon_vcvtv4sfv4hf" + [(set (match_operand:V4SF 0 "s_register_operand" "=w") + (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")] + UNSPEC_VCVT))] + "TARGET_NEON && TARGET_FP16" + "vcvt.f32.f16\t%q0, %P1" + [(set_attr "type" "neon_fp_cvt_widen_h")] +) + +(define_insn "neon_vcvtv4hfv4sf" + [(set (match_operand:V4HF 0 "s_register_operand" "=w") + (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")] + UNSPEC_VCVT))] + "TARGET_NEON && TARGET_FP16" + "vcvt.f16.f32\t%P0, %q1" + [(set_attr "type" "neon_fp_cvt_narrow_s_q")] +) + +(define_insn "neon_vcvt_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCVT_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, 33); + return "vcvt.%T3%#32.f32\t%0, %1, %2"; +} + [(set_attr "type" "neon_fp_to_int_")] +) + +(define_insn "neon_vcvt_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VCVTI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCVT_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, 33); + return "vcvt.f32.%T3%#32\t%0, %1, %2"; +} + [(set_attr "type" "neon_int_to_fp_")] +) + +(define_insn "neon_vmovn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VMOVN))] + "TARGET_NEON" + "vmovn.\t%P0, %q1" + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "neon_vqmovn" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQMOVN))] + "TARGET_NEON" + "vqmovn.%T2%#\t%P0, %q1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_insn "neon_vqmovun" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VQMOVUN))] + "TARGET_NEON" + "vqmovun.\t%P0, %q1" + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_insn "neon_vmovl" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VMOVL))] + "TARGET_NEON" + "vmovl.%T2%#\t%q0, %P1" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "neon_vmul_lane" + [(set (match_operand:VMD 0 "s_register_operand" "=w") + (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w") + (match_operand:VMD 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMUL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmul.\t%P0, %P1, %P2[%c3]"; +} + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mul_s_scalar") + (const_string "neon_mul__scalar")))] +) + +(define_insn "neon_vmul_lane" + [(set (match_operand:VMQ 0 "s_register_operand" "=w") + (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w") + (match_operand: 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMUL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmul.\t%q0, %q1, %P2[%c3]"; +} + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mul_s_scalar") + (const_string "neon_mul__scalar")))] +) + +(define_insn "neon_vmull_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VMULL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vmull.%T4%#\t%q0, %P1, %P2[%c3]"; +} + [(set_attr "type" "neon_mul__scalar_long")] +) + +(define_insn "neon_vqdmull_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMULL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vqdmull.\t%q0, %P1, %P2[%c3]"; +} + [(set_attr "type" "neon_sat_mul__scalar_long")] +) + +(define_insn "neon_vqdmulh_lane" + [(set (match_operand:VMQI 0 "s_register_operand" "=w") + (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w") + (match_operand: 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMULH_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vq%O4dmulh.%T4%#\t%q0, %q1, %P2[%c3]"; +} + [(set_attr "type" "neon_sat_mul__scalar_q")] +) + +(define_insn "neon_vqdmulh_lane" + [(set (match_operand:VMDI 0 "s_register_operand" "=w") + (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w") + (match_operand:VMDI 2 "s_register_operand" + "") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VQDMULH_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vq%O4dmulh.%T4%#\t%P0, %P1, %P2[%c3]"; +} + [(set_attr "type" "neon_sat_mul__scalar_q")] +) + +(define_insn "neon_vmla_lane" + [(set (match_operand:VMD 0 "s_register_operand" "=w") + (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") + (match_operand:VMD 2 "s_register_operand" "w") + (match_operand:VMD 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLA_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmla.\t%P0, %P2, %P3[%c4]"; +} + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s_scalar") + (const_string "neon_mla__scalar")))] +) + +(define_insn "neon_vmla_lane" + [(set (match_operand:VMQ 0 "s_register_operand" "=w") + (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") + (match_operand:VMQ 2 "s_register_operand" "w") + (match_operand: 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLA_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmla.\t%q0, %q2, %P3[%c4]"; +} + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s_scalar") + (const_string "neon_mla__scalar")))] +) + +(define_insn "neon_vmlal_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLAL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmlal.%T5%#\t%q0, %P2, %P3[%c4]"; +} + [(set_attr "type" "neon_mla__scalar_long")] +) + +(define_insn "neon_vqdmlal_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VQDMLAL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vqdmlal.\t%q0, %P2, %P3[%c4]"; +} + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "neon_vmls_lane" + [(set (match_operand:VMD 0 "s_register_operand" "=w") + (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0") + (match_operand:VMD 2 "s_register_operand" "w") + (match_operand:VMD 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLS_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmls.\t%P0, %P2, %P3[%c4]"; +} + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s_scalar") + (const_string "neon_mla__scalar")))] +) + +(define_insn "neon_vmls_lane" + [(set (match_operand:VMQ 0 "s_register_operand" "=w") + (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0") + (match_operand:VMQ 2 "s_register_operand" "w") + (match_operand: 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLS_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmls.\t%q0, %q2, %P3[%c4]"; +} + [(set (attr "type") + (if_then_else (match_test "") + (const_string "neon_fp_mla_s_scalar") + (const_string "neon_mla__scalar")))] +) + +(define_insn "neon_vmlsl_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VMLSL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vmlsl.%T5%#\t%q0, %P2, %P3[%c4]"; +} + [(set_attr "type" "neon_mla__scalar_long")] +) + +(define_insn "neon_vqdmlsl_lane" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand: 1 "s_register_operand" "0") + (match_operand:VMDI 2 "s_register_operand" "w") + (match_operand:VMDI 3 "s_register_operand" + "") + (match_operand:SI 4 "immediate_operand" "i") + (match_operand:SI 5 "immediate_operand" "i")] + UNSPEC_VQDMLSL_LANE))] + "TARGET_NEON" +{ + neon_lane_bounds (operands[4], 0, GET_MODE_NUNITS (mode)); + return "vqdmlsl.\t%q0, %P2, %P3[%c4]"; +} + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a +; core register into a temp register, then use a scalar taken from that. This +; isn't an optimal solution if e.g. the scalar has just been read from memory +; or extracted from another vector. The latter case it's currently better to +; use the "_lane" variant, and the former case can probably be implemented +; using vld1_lane, but that hasn't been done yet. + +(define_expand "neon_vmul_n" + [(match_operand:VMD 0 "s_register_operand" "") + (match_operand:VMD 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmul_lane (operands[0], operands[1], tmp, + const0_rtx, const0_rtx)); + DONE; +}) + +(define_expand "neon_vmul_n" + [(match_operand:VMQ 0 "s_register_operand" "") + (match_operand:VMQ 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmul_lane (operands[0], operands[1], tmp, + const0_rtx, const0_rtx)); + DONE; +}) + +(define_expand "neon_vmull_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VMDI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vmull_lane (operands[0], operands[1], tmp, + const0_rtx, operands[3])); + DONE; +}) + +(define_expand "neon_vqdmull_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand:VMDI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmull_lane (operands[0], operands[1], tmp, + const0_rtx, const0_rtx)); + DONE; +}) + +(define_expand "neon_vqdmulh_n" + [(match_operand:VMDI 0 "s_register_operand" "") + (match_operand:VMDI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmulh_lane (operands[0], operands[1], tmp, + const0_rtx, operands[3])); + DONE; +}) + +(define_expand "neon_vqdmulh_n" + [(match_operand:VMQI 0 "s_register_operand" "") + (match_operand:VMQI 1 "s_register_operand" "") + (match_operand: 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[2], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmulh_lane (operands[0], operands[1], tmp, + const0_rtx, operands[3])); + DONE; +}) + +(define_expand "neon_vmla_n" + [(match_operand:VMD 0 "s_register_operand" "") + (match_operand:VMD 1 "s_register_operand" "") + (match_operand:VMD 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmla_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmla_n" + [(match_operand:VMQ 0 "s_register_operand" "") + (match_operand:VMQ 1 "s_register_operand" "") + (match_operand:VMQ 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmla_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmlal_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmlal_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vqdmlal_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmlal_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmls_n" + [(match_operand:VMD 0 "s_register_operand" "") + (match_operand:VMD 1 "s_register_operand" "") + (match_operand:VMD 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmls_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmls_n" + [(match_operand:VMQ 0 "s_register_operand" "") + (match_operand:VMQ 1 "s_register_operand" "") + (match_operand:VMQ 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmls_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vmlsl_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vmlsl_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_expand "neon_vqdmlsl_n" + [(match_operand: 0 "s_register_operand" "") + (match_operand: 1 "s_register_operand" "") + (match_operand:VMDI 2 "s_register_operand" "") + (match_operand: 3 "s_register_operand" "") + (match_operand:SI 4 "immediate_operand" "")] + "TARGET_NEON" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_neon_vset_lane (tmp, operands[3], tmp, const0_rtx)); + emit_insn (gen_neon_vqdmlsl_lane (operands[0], operands[1], operands[2], + tmp, const0_rtx, operands[4])); + DONE; +}) + +(define_insn "neon_vext" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VEXT))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return "vext.\t%0, %1, %2, %3"; +} + [(set_attr "type" "neon_ext")] +) + +(define_insn "neon_vrev64" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VREV64))] + "TARGET_NEON" + "vrev64.\t%0, %1" + [(set_attr "type" "neon_rev")] +) + +(define_insn "neon_vrev32" + [(set (match_operand:VX 0 "s_register_operand" "=w") + (unspec:VX [(match_operand:VX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VREV32))] + "TARGET_NEON" + "vrev32.\t%0, %1" + [(set_attr "type" "neon_rev")] +) + +(define_insn "neon_vrev16" + [(set (match_operand:VE 0 "s_register_operand" "=w") + (unspec:VE [(match_operand:VE 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VREV16))] + "TARGET_NEON" + "vrev16.\t%0, %1" + [(set_attr "type" "neon_rev")] +) + +; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register +; allocation. For an intrinsic of form: +; rD = vbsl_* (rS, rN, rM) +; We can use any of: +; vbsl rS, rN, rM (if D = S) +; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM) +; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN) + +(define_insn "neon_vbsl_internal" + [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w") + (match_operand:VDQX 2 "s_register_operand" " w,w,0") + (match_operand:VDQX 3 "s_register_operand" " w,0,w")] + UNSPEC_VBSL))] + "TARGET_NEON" + "@ + vbsl\t%0, %2, %3 + vbit\t%0, %2, %1 + vbif\t%0, %3, %1" + [(set_attr "type" "neon_bsl")] +) + +(define_expand "neon_vbsl" + [(set (match_operand:VDQX 0 "s_register_operand" "") + (unspec:VDQX [(match_operand: 1 "s_register_operand" "") + (match_operand:VDQX 2 "s_register_operand" "") + (match_operand:VDQX 3 "s_register_operand" "")] + UNSPEC_VBSL))] + "TARGET_NEON" +{ + /* We can't alias operands together if they have different modes. */ + operands[1] = gen_lowpart (mode, operands[1]); +}) + +(define_insn "neon_vshl" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHL))] + "TARGET_NEON" + "v%O3shl.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "neon_vqshl" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHL))] + "TARGET_NEON" + "vq%O3shl.%T3%#\t%0, %1, %2" + [(set_attr "type" "neon_sat_shift_imm")] +) + +(define_insn "neon_vshr_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHR_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) + 1); + return "v%O3shr.%T3%#\t%0, %1, %2"; +} + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "neon_vshrn_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHRN_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); + return "v%O3shrn.\t%P0, %q1, %2"; +} + [(set_attr "type" "neon_shift_imm_narrow_q")] +) + +(define_insn "neon_vqshrn_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHRN_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); + return "vq%O3shrn.%T3%#\t%P0, %q1, %2"; +} + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_insn "neon_vqshrun_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VN 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHRUN_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 1, neon_element_bits (mode) / 2 + 1); + return "vq%O3shrun.%T3%#\t%P0, %q1, %2"; +} + [(set_attr "type" "neon_sat_shift_imm_narrow_q")] +) + +(define_insn "neon_vshl_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHL_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + return "vshl.\t%0, %1, %2"; +} + [(set_attr "type" "neon_shift_imm")] +) + +(define_insn "neon_vqshl_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHL_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + return "vqshl.%T3%#\t%0, %1, %2"; +} + [(set_attr "type" "neon_sat_shift_imm")] +) + +(define_insn "neon_vqshlu_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VQSHLU_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[2], 0, neon_element_bits (mode)); + return "vqshlu.%T3%#\t%0, %1, %2"; +} + [(set_attr "type" "neon_sat_shift_imm")] +) + +(define_insn "neon_vshll_n" + [(set (match_operand: 0 "s_register_operand" "=w") + (unspec: [(match_operand:VW 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSHLL_N))] + "TARGET_NEON" +{ + /* The boundaries are: 0 < imm <= size. */ + neon_const_bounds (operands[2], 0, neon_element_bits (mode) + 1); + return "vshll.%T3%#\t%q0, %P1, %2"; +} + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "neon_vsra_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VSRA_N))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 1, neon_element_bits (mode) + 1); + return "v%O4sra.%T4%#\t%0, %2, %3"; +} + [(set_attr "type" "neon_shift_acc")] +) + +(define_insn "neon_vsri_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSRI))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 1, neon_element_bits (mode) + 1); + return "vsri.\t%0, %2, %3"; +} + [(set_attr "type" "neon_shift_reg")] +) + +(define_insn "neon_vsli_n" + [(set (match_operand:VDQIX 0 "s_register_operand" "=w") + (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0") + (match_operand:VDQIX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VSLI))] + "TARGET_NEON" +{ + neon_const_bounds (operands[3], 0, neon_element_bits (mode)); + return "vsli.\t%0, %2, %3"; +} + [(set_attr "type" "neon_shift_reg")] +) + +(define_insn "neon_vtbl1v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" + "vtbl.8\t%P0, {%P1}, %P2" + [(set_attr "type" "neon_tbl1")] +) + +(define_insn "neon_vtbl2v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" +{ + rtx ops[4]; + int tabbase = REGNO (operands[1]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = operands[2]; + output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops); + + return ""; +} + [(set_attr "type" "neon_tbl2")] +) + +(define_insn "neon_vtbl3v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" +{ + rtx ops[5]; + int tabbase = REGNO (operands[1]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = operands[2]; + output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops); + + return ""; +} + [(set_attr "type" "neon_tbl3")] +) + +(define_insn "neon_vtbl4v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:V8QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" +{ + rtx ops[6]; + int tabbase = REGNO (operands[1]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); + ops[5] = operands[2]; + output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); + + return ""; +} + [(set_attr "type" "neon_tbl4")] +) + +;; These three are used by the vec_perm infrastructure for V16QImode. +(define_insn_and_split "neon_vtbl1v16qi" + [(set (match_operand:V16QI 0 "s_register_operand" "=&w") + (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w") + (match_operand:V16QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0, op1, op2, part0, part2; + unsigned ofs; + + op0 = operands[0]; + op1 = gen_lowpart (TImode, operands[1]); + op2 = operands[2]; + + ofs = subreg_lowpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + + ofs = subreg_highpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + DONE; +} + [(set_attr "type" "multiple")] +) + +(define_insn_and_split "neon_vtbl2v16qi" + [(set (match_operand:V16QI 0 "s_register_operand" "=&w") + (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:V16QI 2 "s_register_operand" "w")] + UNSPEC_VTBL))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0, op1, op2, part0, part2; + unsigned ofs; + + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; + + ofs = subreg_lowpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + + ofs = subreg_highpart_offset (V8QImode, V16QImode); + part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs); + part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs); + emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2)); + DONE; +} + [(set_attr "type" "multiple")] +) + +;; ??? Logically we should extend the regular neon_vcombine pattern to +;; handle quad-word input modes, producing octa-word output modes. But +;; that requires us to add support for octa-word vector modes in moves. +;; That seems overkill for this one use in vec_perm. +(define_insn_and_split "neon_vcombinev16qi" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w") + (match_operand:V16QI 2 "s_register_operand" "w")] + UNSPEC_VCONCAT))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] +{ + neon_split_vcombine (operands); + DONE; +} +[(set_attr "type" "multiple")] +) + +(define_insn "neon_vtbx1v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:V8QI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" + "vtbx.8\t%P0, {%P2}, %P3" + [(set_attr "type" "neon_tbl1")] +) + +(define_insn "neon_vtbx2v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:TI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" +{ + rtx ops[4]; + int tabbase = REGNO (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = operands[3]; + output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops); + + return ""; +} + [(set_attr "type" "neon_tbl2")] +) + +(define_insn "neon_vtbx3v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:EI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" +{ + rtx ops[5]; + int tabbase = REGNO (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = operands[3]; + output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops); + + return ""; +} + [(set_attr "type" "neon_tbl3")] +) + +(define_insn "neon_vtbx4v8qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") + (match_operand:OI 2 "s_register_operand" "w") + (match_operand:V8QI 3 "s_register_operand" "w")] + UNSPEC_VTBX))] + "TARGET_NEON" +{ + rtx ops[6]; + int tabbase = REGNO (operands[2]); + + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (V8QImode, tabbase); + ops[2] = gen_rtx_REG (V8QImode, tabbase + 2); + ops[3] = gen_rtx_REG (V8QImode, tabbase + 4); + ops[4] = gen_rtx_REG (V8QImode, tabbase + 6); + ops[5] = operands[3]; + output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops); + + return ""; +} + [(set_attr "type" "neon_tbl4")] +) + +(define_expand "neon_vtrn_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 "s_register_operand" "") + (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] + "TARGET_NEON" + "" +) + +;; Note: Different operand numbering to handle tied registers correctly. +(define_insn "*neon_vtrn_insn" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 3 "s_register_operand" "2")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 2 "s_register_operand" "=w") + (unspec:VDQW [(match_dup 1) (match_dup 3)] + UNSPEC_VTRN2))] + "TARGET_NEON" + "vtrn.\t%0, %2" + [(set_attr "type" "neon_permute")] +) + +(define_expand "neon_vtrn" + [(match_operand:SI 0 "s_register_operand" "r") + (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")] + "TARGET_NEON" +{ + neon_emit_pair_result_insn (mode, gen_neon_vtrn_internal, + operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "neon_vzip_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 "s_register_operand" "") + (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] + "TARGET_NEON" + "" +) + +;; Note: Different operand numbering to handle tied registers correctly. +(define_insn "*neon_vzip_insn" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 3 "s_register_operand" "2")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 2 "s_register_operand" "=w") + (unspec:VDQW [(match_dup 1) (match_dup 3)] + UNSPEC_VZIP2))] + "TARGET_NEON" + "vzip.\t%0, %2" + [(set_attr "type" "neon_zip")] +) + +(define_expand "neon_vzip" + [(match_operand:SI 0 "s_register_operand" "r") + (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")] + "TARGET_NEON" +{ + neon_emit_pair_result_insn (mode, gen_neon_vzip_internal, + operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "neon_vuzp_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") + (match_operand:VDQW 2 "s_register_operand" "")] + UNSPEC_VUZP1)) + (set (match_operand:VDQW 3 "s_register_operand" "") + (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] + "TARGET_NEON" + "" +) + +;; Note: Different operand numbering to handle tied registers correctly. +(define_insn "*neon_vuzp_insn" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 3 "s_register_operand" "2")] + UNSPEC_VUZP1)) + (set (match_operand:VDQW 2 "s_register_operand" "=w") + (unspec:VDQW [(match_dup 1) (match_dup 3)] + UNSPEC_VUZP2))] + "TARGET_NEON" + "vuzp.\t%0, %2" + [(set_attr "type" "neon_zip")] +) + +(define_expand "neon_vuzp" + [(match_operand:SI 0 "s_register_operand" "r") + (match_operand:VDQW 1 "s_register_operand" "w") + (match_operand:VDQW 2 "s_register_operand" "w")] + "TARGET_NEON" +{ + neon_emit_pair_result_insn (mode, gen_neon_vuzp_internal, + operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "neon_vreinterpretv8qi" + [(match_operand:V8QI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv4hi" + [(match_operand:V4HI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv2si" + [(match_operand:V2SI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv2sf" + [(match_operand:V2SF 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretdi" + [(match_operand:DI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretti" + [(match_operand:TI 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + + +(define_expand "neon_vreinterpretv16qi" + [(match_operand:V16QI 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv8hi" + [(match_operand:V8HI 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv4si" + [(match_operand:V4SI 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv4sf" + [(match_operand:V4SF 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "neon_vreinterpretv2di" + [(match_operand:V2DI 0 "s_register_operand" "") + (match_operand:VQXMOV 1 "s_register_operand" "")] + "TARGET_NEON" +{ + neon_reinterpret (operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_load_lanes" + [(set (match_operand:VDQX 0 "s_register_operand") + (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")] + UNSPEC_VLD1))] + "TARGET_NEON") + +(define_insn "neon_vld1" + [(set (match_operand:VDQX 0 "s_register_operand" "=w") + (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")] + UNSPEC_VLD1))] + "TARGET_NEON" + "vld1.\t%h0, %A1" + [(set_attr "type" "neon_load1_1reg")] +) + +(define_insn "neon_vld1_lane" + [(set (match_operand:VDX 0 "s_register_operand" "=w") + (unspec:VDX [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:VDX 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VLD1_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + if (lane < 0 || lane >= max) + error ("lane out of range"); + if (max == 1) + return "vld1.\t%P0, %A1"; + else + return "vld1.\t{%P0[%c3]}, %A1"; +} + [(set_attr "type" "neon_load1_one_lane")] +) + +(define_insn "neon_vld1_lane" + [(set (match_operand:VQX 0 "s_register_operand" "=w") + (unspec:VQX [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:VQX 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VLD1_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + operands[3] = GEN_INT (lane); + } + operands[0] = gen_rtx_REG (mode, regno); + if (max == 2) + return "vld1.\t%P0, %A1"; + else + return "vld1.\t{%P0[%c3]}, %A1"; +} + [(set_attr "type" "neon_load1_one_lane")] +) + +(define_insn "neon_vld1_dup" + [(set (match_operand:VD 0 "s_register_operand" "=w") + (vec_duplicate:VD (match_operand: 1 "neon_struct_operand" "Um")))] + "TARGET_NEON" + "vld1.\t{%P0[]}, %A1" + [(set_attr "type" "neon_load1_all_lanes")] +) + +;; Special case for DImode. Treat it exactly like a simple load. +(define_expand "neon_vld1_dupdi" + [(set (match_operand:DI 0 "s_register_operand" "") + (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")] + UNSPEC_VLD1))] + "TARGET_NEON" + "" +) + +(define_insn "neon_vld1_dup" + [(set (match_operand:VQ 0 "s_register_operand" "=w") + (vec_duplicate:VQ (match_operand: 1 "neon_struct_operand" "Um")))] + "TARGET_NEON" +{ + return "vld1.\t{%e0[], %f0[]}, %A1"; +} + [(set_attr "type" "neon_load1_all_lanes")] +) + +(define_insn_and_split "neon_vld1_dupv2di" + [(set (match_operand:V2DI 0 "s_register_operand" "=w") + (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))] + "TARGET_NEON" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx tmprtx = gen_lowpart (DImode, operands[0]); + emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1])); + emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx ); + DONE; + } + [(set_attr "length" "8") + (set_attr "type" "neon_load1_all_lanes_q")] +) + +(define_expand "vec_store_lanes" + [(set (match_operand:VDQX 0 "neon_struct_operand") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")] + UNSPEC_VST1))] + "TARGET_NEON") + +(define_insn "neon_vst1" + [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") + (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] + UNSPEC_VST1))] + "TARGET_NEON" + "vst1.\t%h1, %A0" + [(set_attr "type" "neon_store1_1reg")]) + +(define_insn "neon_vst1_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:VDX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VST1_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + if (lane < 0 || lane >= max) + error ("lane out of range"); + if (max == 1) + return "vst1.\t{%P1}, %A0"; + else + return "vst1.\t{%P1[%c2]}, %A0"; +} + [(set_attr "type" "neon_store1_one_lane")] +) + +(define_insn "neon_vst1_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:VQX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VST1_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + operands[2] = GEN_INT (lane); + } + operands[1] = gen_rtx_REG (mode, regno); + if (max == 2) + return "vst1.\t{%P1}, %A0"; + else + return "vst1.\t{%P1[%c2]}, %A0"; +} + [(set_attr "type" "neon_store1_one_lane")] +) + +(define_expand "vec_load_lanesti" + [(set (match_operand:TI 0 "s_register_operand") + (unspec:TI [(match_operand:TI 1 "neon_struct_operand") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON") + +(define_insn "neon_vld2" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON" +{ + if ( == 64) + return "vld1.64\t%h0, %A1"; + else + return "vld2.\t%h0, %A1"; +} + [(set (attr "type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_load1_2reg") + (const_string "neon_load2_2reg")))] +) + +(define_expand "vec_load_lanesoi" + [(set (match_operand:OI 0 "s_register_operand") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON") + +(define_insn "neon_vld2" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2))] + "TARGET_NEON" + "vld2.\t%h0, %A1" + [(set_attr "type" "neon_load2_2reg_q")]) + +(define_insn "neon_vld2_lane" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:TI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = operands[1]; + ops[3] = operands[3]; + output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, %A2", ops); + return ""; +} + [(set_attr "type" "neon_load2_one_lane")] +) + +(define_insn "neon_vld2_lane" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:OI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = operands[1]; + ops[3] = GEN_INT (lane); + output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, %A2", ops); + return ""; +} + [(set_attr "type" "neon_load2_one_lane")] +) + +(define_insn "neon_vld2_dup" + [(set (match_operand:TI 0 "s_register_operand" "=w") + (unspec:TI [(match_operand: 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD2_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + return "vld2.\t{%e0[], %f0[]}, %A1"; + else + return "vld1.\t%h0, %A1"; +} + [(set (attr "type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_load2_all_lanes") + (const_string "neon_load1_1reg")))] +) + +(define_expand "vec_store_lanesti" + [(set (match_operand:TI 0 "neon_struct_operand") + (unspec:TI [(match_operand:TI 1 "s_register_operand") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON") + +(define_insn "neon_vst2" + [(set (match_operand:TI 0 "neon_struct_operand" "=Um") + (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON" +{ + if ( == 64) + return "vst1.64\t%h1, %A0"; + else + return "vst2.\t%h1, %A0"; +} + [(set (attr "type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_store1_2reg") + (const_string "neon_store2_one_lane")))] +) + +(define_expand "vec_store_lanesoi" + [(set (match_operand:OI 0 "neon_struct_operand") + (unspec:OI [(match_operand:OI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON") + +(define_insn "neon_vst2" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2))] + "TARGET_NEON" + "vst2.\t%h1, %A0" + [(set_attr "type" "neon_store2_4reg")] +) + +(define_insn "neon_vst2_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:TI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = operands[2]; + output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store2_one_lane")] +) + +(define_insn "neon_vst2_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST2_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[4]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = GEN_INT (lane); + output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store2_one_lane")] +) + +(define_expand "vec_load_lanesei" + [(set (match_operand:EI 0 "s_register_operand") + (unspec:EI [(match_operand:EI 1 "neon_struct_operand") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3))] + "TARGET_NEON") + +(define_insn "neon_vld3" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3))] + "TARGET_NEON" +{ + if ( == 64) + return "vld1.64\t%h0, %A1"; + else + return "vld3.\t%h0, %A1"; +} + [(set (attr "type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_load1_3reg") + (const_string "neon_load3_3reg")))] +) + +(define_expand "vec_load_lanesci" + [(match_operand:CI 0 "s_register_operand") + (match_operand:CI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vld3 (operands[0], operands[1])); + DONE; +}) + +(define_expand "neon_vld3" + [(match_operand:CI 0 "s_register_operand") + (match_operand:CI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + rtx mem; + + mem = adjust_address (operands[1], EImode, 0); + emit_insn (gen_neon_vld3qa (operands[0], mem)); + mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); + emit_insn (gen_neon_vld3qb (operands[0], mem, operands[0])); + DONE; +}) + +(define_insn "neon_vld3qa" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3A))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = operands[1]; + output_asm_insn ("vld3.\t{%P0, %P1, %P2}, %A3", ops); + return ""; +} + [(set_attr "type" "neon_load3_3reg")] +) + +(define_insn "neon_vld3qb" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") + (match_operand:CI 2 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3B))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno + 2); + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = operands[1]; + output_asm_insn ("vld3.\t{%P0, %P1, %P2}, %A3", ops); + return ""; +} + [(set_attr "type" "neon_load3_3reg")] +) + +(define_insn "neon_vld3_lane" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:EI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; + ops[4] = operands[3]; + output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", + ops); + return ""; +} + [(set_attr "type" "neon_load3_one_lane")] +) + +(define_insn "neon_vld3_lane" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:CI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = operands[1]; + ops[4] = GEN_INT (lane); + output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", + ops); + return ""; +} + [(set_attr "type" "neon_load3_one_lane")] +) + +(define_insn "neon_vld3_dup" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(match_operand: 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + { + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; + output_asm_insn ("vld3.\t{%P0[], %P1[], %P2[]}, %3", ops); + return ""; + } + else + return "vld1.\t%h0, %A1"; +} + [(set (attr "type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_load3_all_lanes") + (const_string "neon_load1_1reg")))]) + +(define_expand "vec_store_lanesei" + [(set (match_operand:EI 0 "neon_struct_operand") + (unspec:EI [(match_operand:EI 1 "s_register_operand") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3))] + "TARGET_NEON") + +(define_insn "neon_vst3" + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3))] + "TARGET_NEON" +{ + if ( == 64) + return "vst1.64\t%h1, %A0"; + else + return "vst3.\t%h1, %A0"; +} + [(set (attr "type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_store1_3reg") + (const_string "neon_store3_one_lane")))]) + +(define_expand "vec_store_lanesci" + [(match_operand:CI 0 "neon_struct_operand") + (match_operand:CI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vst3 (operands[0], operands[1])); + DONE; +}) + +(define_expand "neon_vst3" + [(match_operand:CI 0 "neon_struct_operand") + (match_operand:CI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + rtx mem; + + mem = adjust_address (operands[0], EImode, 0); + emit_insn (gen_neon_vst3qa (mem, operands[1])); + mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode)); + emit_insn (gen_neon_vst3qb (mem, operands[1])); + DONE; +}) + +(define_insn "neon_vst3qa" + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3A))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + output_asm_insn ("vst3.\t{%P1, %P2, %P3}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store3_3reg")] +) + +(define_insn "neon_vst3qb" + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3B))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 6); + ops[3] = gen_rtx_REG (DImode, regno + 10); + output_asm_insn ("vst3.\t{%P1, %P2, %P3}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store3_3reg")] +) + +(define_insn "neon_vst3_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:EI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = operands[2]; + output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", + ops); + return ""; +} + [(set_attr "type" "neon_store3_one_lane")] +) + +(define_insn "neon_vst3_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:CI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST3_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[5]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = GEN_INT (lane); + output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", + ops); + return ""; +} + [(set_attr "type" "neon_store3_one_lane")] +) + +(define_expand "vec_load_lanesoi" + [(set (match_operand:OI 0 "s_register_operand") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4))] + "TARGET_NEON") + +(define_insn "neon_vld4" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4))] + "TARGET_NEON" +{ + if ( == 64) + return "vld1.64\t%h0, %A1"; + else + return "vld4.\t%h0, %A1"; +} + [(set (attr "type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_load1_4reg") + (const_string "neon_load4_4reg")))] +) + +(define_expand "vec_load_lanesxi" + [(match_operand:XI 0 "s_register_operand") + (match_operand:XI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vld4 (operands[0], operands[1])); + DONE; +}) + +(define_expand "neon_vld4" + [(match_operand:XI 0 "s_register_operand") + (match_operand:XI 1 "neon_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + rtx mem; + + mem = adjust_address (operands[1], OImode, 0); + emit_insn (gen_neon_vld4qa (operands[0], mem)); + mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); + emit_insn (gen_neon_vld4qb (operands[0], mem, operands[0])); + DONE; +}) + +(define_insn "neon_vld4qa" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4A))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = gen_rtx_REG (DImode, regno + 12); + ops[4] = operands[1]; + output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, %A4", ops); + return ""; +} + [(set_attr "type" "neon_load4_4reg")] +) + +(define_insn "neon_vld4qb" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") + (match_operand:XI 2 "s_register_operand" "0") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4B))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno + 2); + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = gen_rtx_REG (DImode, regno + 14); + ops[4] = operands[1]; + output_asm_insn ("vld4.\t{%P0, %P1, %P2, %P3}, %A4", ops); + return ""; +} + [(set_attr "type" "neon_load4_4reg")] +) + +(define_insn "neon_vld4_lane" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:OI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; + ops[5] = operands[3]; + output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", + ops); + return ""; +} + [(set_attr "type" "neon_load4_one_lane")] +) + +(define_insn "neon_vld4_lane" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(match_operand: 1 "neon_struct_operand" "Um") + (match_operand:XI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[0]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 4); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = gen_rtx_REG (DImode, regno + 12); + ops[4] = operands[1]; + ops[5] = GEN_INT (lane); + output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", + ops); + return ""; +} + [(set_attr "type" "neon_load4_one_lane")] +) + +(define_insn "neon_vld4_dup" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4_DUP))] + "TARGET_NEON" +{ + if (GET_MODE_NUNITS (mode) > 1) + { + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; + output_asm_insn ("vld4.\t{%P0[], %P1[], %P2[], %P3[]}, %A4", + ops); + return ""; + } + else + return "vld1.\t%h0, %A1"; +} + [(set (attr "type") + (if_then_else (gt (const_string "") (const_string "1")) + (const_string "neon_load4_all_lanes") + (const_string "neon_load1_1reg")))] +) + +(define_expand "vec_store_lanesoi" + [(set (match_operand:OI 0 "neon_struct_operand") + (unspec:OI [(match_operand:OI 1 "s_register_operand") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4))] + "TARGET_NEON") + +(define_insn "neon_vst4" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4))] + "TARGET_NEON" +{ + if ( == 64) + return "vst1.64\t%h1, %A0"; + else + return "vst4.\t%h1, %A0"; +} + [(set (attr "type") + (if_then_else (eq (const_string "") (const_string "64")) + (const_string "neon_store1_4reg") + (const_string "neon_store4_4reg")))] +) + +(define_expand "vec_store_lanesxi" + [(match_operand:XI 0 "neon_struct_operand") + (match_operand:XI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + emit_insn (gen_neon_vst4 (operands[0], operands[1])); + DONE; +}) + +(define_expand "neon_vst4" + [(match_operand:XI 0 "neon_struct_operand") + (match_operand:XI 1 "s_register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_NEON" +{ + rtx mem; + + mem = adjust_address (operands[0], OImode, 0); + emit_insn (gen_neon_vst4qa (mem, operands[1])); + mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode)); + emit_insn (gen_neon_vst4qb (mem, operands[1])); + DONE; +}) + +(define_insn "neon_vst4qa" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4A))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = gen_rtx_REG (DImode, regno + 12); + output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store4_4reg")] +) + +(define_insn "neon_vst4qb" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4B))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 6); + ops[3] = gen_rtx_REG (DImode, regno + 10); + ops[4] = gen_rtx_REG (DImode, regno + 14); + output_asm_insn ("vst4.\t{%P1, %P2, %P3, %P4}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store4_4reg")] +) + +(define_insn "neon_vst4_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:OI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = gen_rtx_REG (DImode, regno + 6); + ops[5] = operands[2]; + output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", + ops); + return ""; +} + [(set_attr "type" "neon_store4_one_lane")] +) + +(define_insn "neon_vst4_lane" + [(set (match_operand: 0 "neon_struct_operand" "=Um") + (unspec: + [(match_operand:XI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST4_LANE))] + "TARGET_NEON" +{ + HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + int regno = REGNO (operands[1]); + rtx ops[6]; + if (lane < 0 || lane >= max) + error ("lane out of range"); + else if (lane >= max / 2) + { + lane -= max / 2; + regno += 2; + } + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 8); + ops[4] = gen_rtx_REG (DImode, regno + 12); + ops[5] = GEN_INT (lane); + output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", + ops); + return ""; +} + [(set_attr "type" "neon_store4_4reg")] +) + +(define_expand "neon_vand" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_inv_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_and3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vorr" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_ior3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_veor" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "s_register_operand" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_xor3 (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vbic" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_bic3_neon (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "neon_vorn" + [(match_operand:VDQX 0 "s_register_operand" "") + (match_operand:VDQX 1 "s_register_operand" "") + (match_operand:VDQX 2 "neon_inv_logic_op2" "") + (match_operand:SI 3 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_orn3_neon (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "neon_vec_unpack_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vmovl. %q0, %e1" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "neon_vec_unpack_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vmovl. %q0, %f1" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand"))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2); i++) + RTVEC_ELT (v, i) = GEN_INT ((/2) + i); + + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_hi_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" ""))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + emit_insn (gen_neon_vec_unpack_lo_ (operands[0], + operands[1], + t1)); + DONE; + } +) + +(define_insn "neon_vec_mult_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_low" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vmull. %q0, %e1, %e3" + [(set_attr "type" "neon_mul__long")] +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_lo_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + } +) + +(define_insn "neon_vec_mult_hi_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: (vec_select: + (match_operand:VU 1 "register_operand" "w") + (match_operand:VU 2 "vect_par_constant_high" ""))) + (SE: (vec_select: + (match_operand:VU 3 "register_operand" "w") + (match_dup 2)))))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vmull. %q0, %f1, %f3" + [(set_attr "type" "neon_mul__long")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (SE: (match_operand:VU 2 "register_operand" ""))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + { + rtvec v = rtvec_alloc (/2) ; + rtx t1; + int i; + for (i = 0; i < (/2) ; i++) + RTVEC_ELT (v, i) = GEN_INT (/2 + i); + t1 = gen_rtx_PARALLEL (mode, v); + + emit_insn (gen_neon_vec_mult_hi_ (operands[0], + operands[1], + t1, + operands[2])); + DONE; + + } +) + +(define_insn "neon_vec_shiftl_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (ashift:VW (match_operand:VW 1 "register_operand" "w") + (match_operand: 2 "const_neon_scalar_shift_amount_operand" ""))))] + "TARGET_NEON" +{ + return "vshll. %q0, %P1, %2"; +} + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_expand "vec_widen_shiftl_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + { + emit_insn (gen_neon_vec_shiftl_ (operands[0], + simplify_gen_subreg (mode, operands[1], mode, 0), + operands[2])); + DONE; + } +) + +(define_expand "vec_widen_shiftl_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VU 1 "register_operand" "")) + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + { + emit_insn (gen_neon_vec_shiftl_ (operands[0], + simplify_gen_subreg (mode, operands[1], mode, + GET_MODE_SIZE (mode)), + operands[2])); + DONE; + } +) + +;; Vectorize for non-neon-quad case +(define_insn "neon_unpack_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (match_operand:VDI 1 "register_operand" "w")))] + "TARGET_NEON" + "vmovl. %q0, %P1" + [(set_attr "type" "neon_move")] +) + +(define_expand "vec_unpack_lo_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; +} +) + +(define_expand "vec_unpack_hi_" + [(match_operand: 0 "register_operand" "") + (SE:(match_operand:VDI 1 "register_operand"))] + "TARGET_NEON" +{ + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_unpack_ (tmpreg, operands[1])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; +} +) + +(define_insn "neon_vec_mult_" + [(set (match_operand: 0 "register_operand" "=w") + (mult: (SE: + (match_operand:VDI 1 "register_operand" "w")) + (SE: + (match_operand:VDI 2 "register_operand" "w"))))] + "TARGET_NEON" + "vmull. %q0, %P1, %P2" + [(set_attr "type" "neon_mul__long")] +) + +(define_expand "vec_widen_mult_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; + + } +) + +(define_expand "vec_widen_mult_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (SE: (match_operand:VDI 2 "register_operand" ""))] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_mult_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; + + } +) + +(define_expand "vec_widen_shiftl_hi_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_shiftl_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_high (operands[0], tmpreg)); + + DONE; + } +) + +(define_expand "vec_widen_shiftl_lo_" + [(match_operand: 0 "register_operand" "") + (SE: (match_operand:VDI 1 "register_operand" "")) + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" + { + rtx tmpreg = gen_reg_rtx (mode); + emit_insn (gen_neon_vec_shiftl_ (tmpreg, operands[1], operands[2])); + emit_insn (gen_neon_vget_low (operands[0], tmpreg)); + + DONE; + } +) + +; FIXME: These instruction patterns can't be used safely in big-endian mode +; because the ordering of vector elements in Q registers is different from what +; the semantics of the instructions require. + +(define_insn "vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=&w") + (vec_concat: + (truncate: + (match_operand:VN 1 "register_operand" "w")) + (truncate: + (match_operand:VN 2 "register_operand" "w"))))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vmovn.i\t%e0, %q1\;vmovn.i\t%f0, %q2" + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +;; For the non-quad case. +(define_insn "neon_vec_pack_trunc_" + [(set (match_operand: 0 "register_operand" "=w") + (truncate: (match_operand:VN 1 "register_operand" "w")))] + "TARGET_NEON && !BYTES_BIG_ENDIAN" + "vmovn.i\t%P0, %q1" + [(set_attr "type" "neon_move_narrow_q")] +) + +(define_expand "vec_pack_trunc_" + [(match_operand: 0 "register_operand" "") + (match_operand:VSHFT 1 "register_operand" "") + (match_operand:VSHFT 2 "register_operand")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + rtx tempreg = gen_reg_rtx (mode); + + emit_insn (gen_move_lo_quad_ (tempreg, operands[1])); + emit_insn (gen_move_hi_quad_ (tempreg, operands[2])); + emit_insn (gen_neon_vec_pack_trunc_ (operands[0], tempreg)); + DONE; +}) + +(define_insn "neon_vabd_2" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w"))))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vabd. %0, %1, %2" + [(set (attr "type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_abd_s") + (const_string "neon_abd")))] +) + +(define_insn "neon_vabd_3" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w") + (match_operand:VDQ 2 "s_register_operand" "w")] + UNSPEC_VSUB)))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "vabd. %0, %1, %2" + [(set (attr "type") + (if_then_else (ne (symbol_ref "") (const_int 0)) + (const_string "neon_fp_abd_s") + (const_string "neon_abd")))] +) + +;; Copy from core-to-neon regs, then extend, not vice-versa + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))] + { + operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] + { + operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] + { + operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))] + { + operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] + { + operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); + }) + +(define_split + [(set (match_operand:DI 0 "s_register_operand" "") + (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))] + "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" + [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))] + { + operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); + }) diff --git a/gcc-4.9/gcc/config/arm/neon.ml b/gcc-4.9/gcc/config/arm/neon.ml new file mode 100644 index 000000000..4289b8ce0 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/neon.ml @@ -0,0 +1,2355 @@ +(* Common code for ARM NEON header file, documentation and test case + generators. + + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . *) + +(* Shorthand types for vector elements. *) +type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16 + | P64 | P128 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts + | Cast of elts * elts | NoElts + +type eltclass = Signed | Unsigned | Float | Poly | Int | Bits + | ConvClass of eltclass * eltclass | NoType + +(* These vector types correspond directly to C types. *) +type vectype = T_int8x8 | T_int8x16 + | T_int16x4 | T_int16x8 + | T_int32x2 | T_int32x4 + | T_int64x1 | T_int64x2 + | T_uint8x8 | T_uint8x16 + | T_uint16x4 | T_uint16x8 + | T_uint32x2 | T_uint32x4 + | T_uint64x1 | T_uint64x2 + | T_float16x4 + | T_float32x2 | T_float32x4 + | T_poly8x8 | T_poly8x16 + | T_poly16x4 | T_poly16x8 + | T_immediate of int * int + | T_int8 | T_int16 + | T_int32 | T_int64 + | T_uint8 | T_uint16 + | T_uint32 | T_uint64 + | T_poly8 | T_poly16 + | T_poly64 | T_poly64x1 + | T_poly64x2 | T_poly128 + | T_float16 | T_float32 + | T_arrayof of int * vectype + | T_ptrto of vectype | T_const of vectype + | T_void | T_intQI + | T_intHI | T_intSI + | T_intDI | T_intTI + | T_floatHF | T_floatSF + +(* The meanings of the following are: + TImode : "Tetra", two registers (four words). + EImode : "hExa", three registers (six words). + OImode : "Octa", four registers (eight words). + CImode : "dodeCa", six registers (twelve words). + XImode : "heXadeca", eight registers (sixteen words). +*) + +type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode + +type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt + | PtrTo of shape_elt | CstPtrTo of shape_elt + (* These next ones are used only in the test generator. *) + | Element_of_dreg (* Used for "lane" variants. *) + | Element_of_qreg (* Likewise. *) + | All_elements_of_dreg (* Used for "dup" variants. *) + | Alternatives of shape_elt list (* Used for multiple valid operands *) + +type shape_form = All of int * shape_elt + | Long + | Long_noreg of shape_elt + | Wide + | Wide_noreg of shape_elt + | Narrow + | Long_imm + | Narrow_imm + | Binary_imm of shape_elt + | Use_operands of shape_elt array + | By_scalar of shape_elt + | Unary_scalar of shape_elt + | Wide_lane + | Wide_scalar + | Pair_result of shape_elt + +type arity = Arity0 of vectype + | Arity1 of vectype * vectype + | Arity2 of vectype * vectype * vectype + | Arity3 of vectype * vectype * vectype * vectype + | Arity4 of vectype * vectype * vectype * vectype * vectype + +type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI + | V16QI | V8HI | V4SI | V4SF | V2DI | TI + | QI | HI | SI | SF + +type opcode = + (* Binary ops. *) + Vadd + | Vmul + | Vmla + | Vmls + | Vfma + | Vfms + | Vsub + | Vceq + | Vcge + | Vcgt + | Vcle + | Vclt + | Vcage + | Vcagt + | Vcale + | Vcalt + | Vtst + | Vabd + | Vaba + | Vmax + | Vmin + | Vpadd + | Vpada + | Vpmax + | Vpmin + | Vrecps + | Vrsqrts + | Vshl + | Vshr_n + | Vshl_n + | Vsra_n + | Vsri + | Vsli + (* Logic binops. *) + | Vand + | Vorr + | Veor + | Vbic + | Vorn + | Vbsl + (* Ops with scalar. *) + | Vmul_lane + | Vmla_lane + | Vmls_lane + | Vmul_n + | Vmla_n + | Vmls_n + | Vmull_n + | Vmull_lane + | Vqdmull_n + | Vqdmull_lane + | Vqdmulh_n + | Vqdmulh_lane + (* Unary ops. *) + | Vrintn + | Vrinta + | Vrintp + | Vrintm + | Vrintz + | Vabs + | Vneg + | Vcls + | Vclz + | Vcnt + | Vrecpe + | Vrsqrte + | Vmvn + (* Vector extract. *) + | Vext + (* Reverse elements. *) + | Vrev64 + | Vrev32 + | Vrev16 + (* Transposition ops. *) + | Vtrn + | Vzip + | Vuzp + (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) + | Vldx of int + | Vstx of int + | Vldx_lane of int + | Vldx_dup of int + | Vstx_lane of int + (* Set/extract lanes from a vector. *) + | Vget_lane + | Vset_lane + (* Initialize vector from bit pattern. *) + | Vcreate + (* Set all lanes to same value. *) + | Vdup_n + | Vmov_n (* Is this the same? *) + (* Duplicate scalar to all lanes of vector. *) + | Vdup_lane + (* Combine vectors. *) + | Vcombine + (* Get quadword high/low parts. *) + | Vget_high + | Vget_low + (* Convert vectors. *) + | Vcvt + | Vcvt_n + (* Narrow/lengthen vectors. *) + | Vmovn + | Vmovl + (* Table lookup. *) + | Vtbl of int + | Vtbx of int + (* Reinterpret casts. *) + | Vreinterp + +let rev_elems revsize elsize nelts _ = + let mask = (revsize / elsize) - 1 in + let arr = Array.init nelts + (fun i -> i lxor mask) in + Array.to_list arr + +let permute_range i stride nelts increment = + let rec build i = function + 0 -> [] + | nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in + build i nelts + +(* Generate a list of integers suitable for vzip. *) +let zip_range i stride nelts = permute_range i stride nelts 1 + +(* Generate a list of integers suitable for vunzip. *) +let uzip_range i stride nelts = permute_range i stride nelts 4 + +(* Generate a list of integers suitable for trn. *) +let trn_range i stride nelts = permute_range i stride nelts 2 + +let zip_elems _ nelts part = + match part with + `lo -> zip_range 0 nelts (nelts / 2) + | `hi -> zip_range (nelts / 2) nelts (nelts / 2) + +let uzip_elems _ nelts part = + match part with + `lo -> uzip_range 0 2 (nelts / 2) + | `hi -> uzip_range 1 2 (nelts / 2) + +let trn_elems _ nelts part = + match part with + `lo -> trn_range 0 nelts (nelts / 2) + | `hi -> trn_range 1 nelts (nelts / 2) + +(* Features used for documentation, to distinguish between some instruction + variants, and to signal special requirements (e.g. swapping arguments). *) + +type features = + Halving + | Rounding + | Saturating + | Dst_unsign + | High_half + | Doubling + | Flipped of string (* Builtin name to use with flipped arguments. *) + | InfoWord (* Pass an extra word for signage/rounding etc. (always passed + for All _, Long, Wide, Narrow shape_forms. *) + (* Implement builtin as shuffle. The parameter is a function which returns + masks suitable for __builtin_shuffle: arguments are (element size, + number of elements, high/low part selector). *) + | Use_shuffle of (int -> int -> [`lo|`hi] -> int list) + (* A specification as to the shape of instruction expected upon + disassembly, used if it differs from the shape used to build the + intrinsic prototype. Multiple entries in the constructor's argument + indicate that the intrinsic expands to more than one assembly + instruction, each with a corresponding shape specified here. *) + | Disassembles_as of shape_form list + | Builtin_name of string (* Override the name of the builtin. *) + (* Override the name of the instruction. If more than one name + is specified, it means that the instruction can have any of those + names. *) + | Instruction_name of string list + (* Mark that the intrinsic yields no instructions, or expands to yield + behavior that the test generator cannot test. *) + | No_op + (* Mark that the intrinsic has constant arguments that cannot be set + to the defaults (zero for pointers and one otherwise) in the test + cases. The function supplied must return the integer to be written + into the testcase for the argument number (0-based) supplied to it. *) + | Const_valuator of (int -> int) + | Fixed_vector_reg + | Fixed_core_reg + (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *) + | Requires_feature of string + (* Mark that the intrinsic requires a particular architecture version. *) + | Requires_arch of int + (* Mark that the intrinsic requires a particular bit in __ARM_FP to + be set. *) + | Requires_FP_bit of int + +exception MixedMode of elts * elts + +let rec elt_width = function + S8 | U8 | P8 | I8 | B8 -> 8 + | S16 | U16 | P16 | I16 | B16 | F16 -> 16 + | S32 | F32 | U32 | I32 | B32 -> 32 + | S64 | U64 | P64 | I64 | B64 -> 64 + | P128 -> 128 + | Conv (a, b) -> + let wa = elt_width a and wb = elt_width b in + if wa = wb then wa else raise (MixedMode (a, b)) + | Cast (a, b) -> raise (MixedMode (a, b)) + | NoElts -> failwith "No elts" + +let rec elt_class = function + S8 | S16 | S32 | S64 -> Signed + | U8 | U16 | U32 | U64 -> Unsigned + | P8 | P16 | P64 | P128 -> Poly + | F16 | F32 -> Float + | I8 | I16 | I32 | I64 -> Int + | B8 | B16 | B32 | B64 -> Bits + | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) + | NoElts -> NoType + +let elt_of_class_width c w = + match c, w with + Signed, 8 -> S8 + | Signed, 16 -> S16 + | Signed, 32 -> S32 + | Signed, 64 -> S64 + | Float, 16 -> F16 + | Float, 32 -> F32 + | Unsigned, 8 -> U8 + | Unsigned, 16 -> U16 + | Unsigned, 32 -> U32 + | Unsigned, 64 -> U64 + | Poly, 8 -> P8 + | Poly, 16 -> P16 + | Poly, 64 -> P64 + | Poly, 128 -> P128 + | Int, 8 -> I8 + | Int, 16 -> I16 + | Int, 32 -> I32 + | Int, 64 -> I64 + | Bits, 8 -> B8 + | Bits, 16 -> B16 + | Bits, 32 -> B32 + | Bits, 64 -> B64 + | _ -> failwith "Bad element type" + +(* Return unsigned integer element the same width as argument. *) +let unsigned_of_elt elt = + elt_of_class_width Unsigned (elt_width elt) + +let signed_of_elt elt = + elt_of_class_width Signed (elt_width elt) + +(* Return untyped bits element the same width as argument. *) +let bits_of_elt elt = + elt_of_class_width Bits (elt_width elt) + +let non_signed_variant = function + S8 -> I8 + | S16 -> I16 + | S32 -> I32 + | S64 -> I64 + | U8 -> I8 + | U16 -> I16 + | U32 -> I32 + | U64 -> I64 + | x -> x + +let poly_unsigned_variant v = + let elclass = match elt_class v with + Poly -> Unsigned + | x -> x in + elt_of_class_width elclass (elt_width v) + +let widen_elt elt = + let w = elt_width elt + and c = elt_class elt in + elt_of_class_width c (w * 2) + +let narrow_elt elt = + let w = elt_width elt + and c = elt_class elt in + elt_of_class_width c (w / 2) + +(* If we're trying to find a mode from a "Use_operands" instruction, use the + last vector operand as the dominant mode used to invoke the correct builtin. + We must stick to this rule in neon.md. *) +let find_key_operand operands = + let rec scan opno = + match operands.(opno) with + Qreg -> Qreg + | Dreg -> Dreg + | VecArray (_, Qreg) -> Qreg + | VecArray (_, Dreg) -> Dreg + | _ -> scan (opno-1) + in + scan ((Array.length operands) - 1) + +(* Find a vecmode from a shape_elt ELT for an instruction with shape_form + SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode + for the given argument position, else determine which argument to return a + mode for automatically. *) + +let rec mode_of_elt ?argpos elt shape = + let flt = match elt_class elt with + Float | ConvClass(_, Float) -> true | _ -> false in + let idx = + match elt_width elt with + 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | 128 -> 4 + | _ -> failwith "Bad element width" + in match shape with + All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg + | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> + if flt then + [| V8QI; V4HF; V2SF; DI |].(idx) + else + [| V8QI; V4HI; V2SI; DI |].(idx) + | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg + | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> + [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI; TI|].(idx) + | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> + [| QI; HI; if flt then SF else SI; DI |].(idx) + | Long | Wide | Wide_lane | Wide_scalar + | Long_imm -> + [| V8QI; V4HI; V2SI; DI |].(idx) + | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) + | Use_operands ops -> + begin match argpos with + None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops))) + | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos))) + end + | _ -> failwith "invalid shape" + +(* Modify an element type dependent on the shape of the instruction and the + operand number. *) + +let shapemap shape no = + let ident = fun x -> x in + match shape with + All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ + | Binary_imm _ -> ident + | Long | Long_noreg _ | Wide_scalar | Long_imm -> + [| widen_elt; ident; ident |].(no) + | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) + | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) + | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) + +(* Register type (D/Q) of an operand, based on shape and operand number. *) + +let regmap shape no = + match shape with + All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg + | Long -> [| Qreg; Dreg; Dreg |].(no) + | Wide -> [| Qreg; Qreg; Dreg |].(no) + | Narrow -> [| Dreg; Qreg; Qreg |].(no) + | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) + | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) + | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) + | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) + | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) + | Binary_imm reg -> [| reg; reg; Immed |].(no) + | Long_imm -> [| Qreg; Dreg; Immed |].(no) + | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) + | Use_operands these -> these.(no) + +let type_for_elt shape elt no = + let elt = (shapemap shape no) elt in + let reg = regmap shape no in + let rec type_for_reg_elt reg elt = + match reg with + Dreg -> + begin match elt with + S8 -> T_int8x8 + | S16 -> T_int16x4 + | S32 -> T_int32x2 + | S64 -> T_int64x1 + | U8 -> T_uint8x8 + | U16 -> T_uint16x4 + | U32 -> T_uint32x2 + | U64 -> T_uint64x1 + | P64 -> T_poly64x1 + | P128 -> T_poly128 + | F16 -> T_float16x4 + | F32 -> T_float32x2 + | P8 -> T_poly8x8 + | P16 -> T_poly16x4 + | _ -> failwith "Bad elt type for Dreg" + end + | Qreg -> + begin match elt with + S8 -> T_int8x16 + | S16 -> T_int16x8 + | S32 -> T_int32x4 + | S64 -> T_int64x2 + | U8 -> T_uint8x16 + | U16 -> T_uint16x8 + | U32 -> T_uint32x4 + | U64 -> T_uint64x2 + | F32 -> T_float32x4 + | P8 -> T_poly8x16 + | P16 -> T_poly16x8 + | P64 -> T_poly64x2 + | P128 -> T_poly128 + | _ -> failwith "Bad elt type for Qreg" + end + | Corereg -> + begin match elt with + S8 -> T_int8 + | S16 -> T_int16 + | S32 -> T_int32 + | S64 -> T_int64 + | U8 -> T_uint8 + | U16 -> T_uint16 + | U32 -> T_uint32 + | U64 -> T_uint64 + | P8 -> T_poly8 + | P16 -> T_poly16 + | P64 -> T_poly64 + | P128 -> T_poly128 + | F32 -> T_float32 + | _ -> failwith "Bad elt type for Corereg" + end + | Immed -> + T_immediate (0, 0) + | VecArray (num, sub) -> + T_arrayof (num, type_for_reg_elt sub elt) + | PtrTo x -> + T_ptrto (type_for_reg_elt x elt) + | CstPtrTo x -> + T_ptrto (T_const (type_for_reg_elt x elt)) + (* Anything else is solely for the use of the test generator. *) + | _ -> assert false + in + type_for_reg_elt reg elt + +(* Return size of a vector type, in bits. *) +let vectype_size = function + T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 + | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 + | T_float32x2 | T_poly8x8 | T_poly64x1 | T_poly16x4 | T_float16x4 -> 64 + | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 + | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 + | T_float32x4 | T_poly8x16 | T_poly64x2 | T_poly16x8 -> 128 + | _ -> raise Not_found + +let inttype_for_array num elttype = + let eltsize = vectype_size elttype in + let numwords = (num * eltsize) / 32 in + match numwords with + 4 -> B_TImode + | 6 -> B_EImode + | 8 -> B_OImode + | 12 -> B_CImode + | 16 -> B_XImode + | _ -> failwith ("no int type for size " ^ string_of_int numwords) + +(* These functions return pairs of (internal, external) types, where "internal" + types are those seen by GCC, and "external" are those seen by the assembler. + These types aren't necessarily the same, since the intrinsics can munge more + than one C type into each assembler opcode. *) + +let make_sign_invariant func shape elt = + let arity, elt' = func shape elt in + arity, non_signed_variant elt' + +(* Don't restrict any types. *) + +let elts_same make_arity shape elt = + let vtype = type_for_elt shape elt in + make_arity vtype, elt + +(* As sign_invar_*, but when sign matters. *) +let elts_same_io_lane = + elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) + +let elts_same_io = + elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) + +let elts_same_2_lane = + elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) + +let elts_same_3 = elts_same_2_lane + +let elts_same_2 = + elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) + +let elts_same_1 = + elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) + +(* Use for signed/unsigned invariant operations (i.e. where the operation + doesn't depend on the sign of the data. *) + +let sign_invar_io_lane = make_sign_invariant elts_same_io_lane +let sign_invar_io = make_sign_invariant elts_same_io +let sign_invar_2_lane = make_sign_invariant elts_same_2_lane +let sign_invar_2 = make_sign_invariant elts_same_2 +let sign_invar_1 = make_sign_invariant elts_same_1 + +(* Sign-sensitive comparison. *) + +let cmp_sign_matters shape elt = + let vtype = type_for_elt shape elt + and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in + Arity2 (rtype, vtype 1, vtype 2), elt + +(* Signed/unsigned invariant comparison. *) + +let cmp_sign_invar shape elt = + let shape', elt' = cmp_sign_matters shape elt in + let elt'' = + match non_signed_variant elt' with + P8 -> I8 + | x -> x + in + shape', elt'' + +(* Comparison (VTST) where only the element width matters. *) + +let cmp_bits shape elt = + let vtype = type_for_elt shape elt + and rtype = type_for_elt shape (unsigned_of_elt elt) 0 + and bits_only = bits_of_elt elt in + Arity2 (rtype, vtype 1, vtype 2), bits_only + +let reg_shift shape elt = + let vtype = type_for_elt shape elt + and op2type = type_for_elt shape (signed_of_elt elt) 2 in + Arity2 (vtype 0, vtype 1, op2type), elt + +(* Genericised constant-shift type-generating function. *) + +let const_shift mkimm ?arity ?result shape elt = + let op2type = (shapemap shape 2) elt in + let op2width = elt_width op2type in + let op2 = mkimm op2width + and op1 = type_for_elt shape elt 1 + and r_elt = + match result with + None -> elt + | Some restriction -> restriction elt in + let rtype = type_for_elt shape r_elt 0 in + match arity with + None -> Arity2 (rtype, op1, op2), elt + | Some mkarity -> mkarity rtype op1 op2, elt + +(* Use for immediate right-shifts. *) + +let shift_right shape elt = + const_shift (fun imm -> T_immediate (1, imm)) shape elt + +let shift_right_acc shape elt = + const_shift (fun imm -> T_immediate (1, imm)) + ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt + +(* Use for immediate right-shifts when the operation doesn't care about + signedness. *) + +let shift_right_sign_invar = + make_sign_invariant shift_right + +(* Immediate right-shift; result is unsigned even when operand is signed. *) + +let shift_right_to_uns shape elt = + const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt + shape elt + +(* Immediate left-shift. *) + +let shift_left shape elt = + const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt + +(* Immediate left-shift, unsigned result. *) + +let shift_left_to_uns shape elt = + const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt + shape elt + +(* Immediate left-shift, don't care about signs. *) + +let shift_left_sign_invar = + make_sign_invariant shift_left + +(* Shift left/right and insert: only element size matters. *) + +let shift_insert shape elt = + let arity, elt = + const_shift (fun imm -> T_immediate (1, imm)) + ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in + arity, bits_of_elt elt + +(* Get/set lane. *) + +let get_lane shape elt = + let vtype = type_for_elt shape elt in + Arity2 (vtype 0, vtype 1, vtype 2), + (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) + +let set_lane shape elt = + let vtype = type_for_elt shape elt in + Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt + +let set_lane_notype shape elt = + let vtype = type_for_elt shape elt in + Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts + +let create_vector shape elt = + let vtype = type_for_elt shape U64 1 + and rtype = type_for_elt shape elt 0 in + Arity1 (rtype, vtype), elt + +let conv make_arity shape elt = + let edest, esrc = match elt with + Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc + | _ -> failwith "Non-conversion element in conversion" in + let vtype = type_for_elt shape esrc + and rtype = type_for_elt shape edest 0 in + make_arity rtype vtype, elt + +let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) +let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) + +(* Operation has an unsigned result even if operands are signed. *) + +let dst_unsign make_arity shape elt = + let vtype = type_for_elt shape elt + and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in + make_arity rtype vtype, elt + +let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) + +let make_bits_only func shape elt = + let arity, elt' = func shape elt in + arity, bits_of_elt elt' + +(* Extend operation. *) + +let extend shape elt = + let vtype = type_for_elt shape elt in + Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt + +(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned + integer ops respectively, or unsigned for polynomial ops. *) + +let table mkarity shape elt = + let vtype = type_for_elt shape elt in + let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in + mkarity vtype op2, bits_of_elt elt + +let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) +let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) + +(* Operations where only bits matter. *) + +let bits_1 = make_bits_only elts_same_1 +let bits_2 = make_bits_only elts_same_2 +let bits_3 = make_bits_only elts_same_3 + +(* Store insns. *) +let store_1 shape elt = + let vtype = type_for_elt shape elt in + Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt + +let store_3 shape elt = + let vtype = type_for_elt shape elt in + Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt + +let make_notype func shape elt = + let arity, _ = func shape elt in + arity, NoElts + +let notype_1 = make_notype elts_same_1 +let notype_2 = make_notype elts_same_2 +let notype_3 = make_notype elts_same_3 + +(* Bit-select operations (first operand is unsigned int). *) + +let bit_select shape elt = + let vtype = type_for_elt shape elt + and itype = type_for_elt shape (unsigned_of_elt elt) in + Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts + +(* Common lists of supported element types. *) + +let s_8_32 = [S8; S16; S32] +let u_8_32 = [U8; U16; U32] +let su_8_32 = [S8; S16; S32; U8; U16; U32] +let su_8_64 = S64 :: U64 :: su_8_32 +let su_16_64 = [S16; S32; S64; U16; U32; U64] +let pf_su_8_16 = [P8; P16; S8; S16; U8; U16] +let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 +let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 +let suf_32 = [S32; U32; F32] + +let ops = + [ + (* Addition. *) + Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32; + Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64]; + Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; + Vadd, [], Long, "vaddl", elts_same_2, su_8_32; + Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; + Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; + Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; + Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], + All (3, Dreg), "vRhadd", elts_same_2, su_8_32; + Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], + All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; + Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; + Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; + Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; + Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], + Narrow, "vRaddhn", sign_invar_2, su_16_64; + + (* Multiplication. *) + Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; + Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; + Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", + elts_same_2, [S16; S32]; + Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", + elts_same_2, [S16; S32]; + Vmul, + [Saturating; Rounding; Doubling; High_half; + Instruction_name ["vqrdmulh"]], + All (3, Dreg), "vqRdmulh", + elts_same_2, [S16; S32]; + Vmul, + [Saturating; Rounding; Doubling; High_half; + Instruction_name ["vqrdmulh"]], + All (3, Qreg), "vqRdmulhQ", + elts_same_2, [S16; S32]; + Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; + Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; + + (* Multiply-accumulate. *) + Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; + Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; + Vmla, [], Long, "vmlal", elts_same_io, su_8_32; + Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; + + (* Multiply-subtract. *) + Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; + Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; + Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; + Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; + + (* Fused-multiply-accumulate. *) + Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32]; + Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32]; + Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32]; + Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32]; + + (* Round to integral. *) + Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Dreg; Dreg |], + "vrndn", elts_same_1, [F32]; + Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Qreg; Qreg |], + "vrndqn", elts_same_1, [F32]; + Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Dreg; Dreg |], + "vrnda", elts_same_1, [F32]; + Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Qreg; Qreg |], + "vrndqa", elts_same_1, [F32]; + Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Dreg; Dreg |], + "vrndp", elts_same_1, [F32]; + Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Qreg; Qreg |], + "vrndqp", elts_same_1, [F32]; + Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Dreg; Dreg |], + "vrndm", elts_same_1, [F32]; + Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Qreg; Qreg |], + "vrndqm", elts_same_1, [F32]; + Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Dreg; Dreg |], + "vrnd", elts_same_1, [F32]; + Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Qreg; Qreg |], + "vrndq", elts_same_1, [F32]; + (* Subtraction. *) + Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32; + Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64]; + Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; + Vsub, [], Long, "vsubl", elts_same_2, su_8_32; + Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; + Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; + Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; + Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; + Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; + Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; + Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], + Narrow, "vRsubhn", sign_invar_2, su_16_64; + + (* Comparison, equal. *) + Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; + Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; + + (* Comparison, greater-than or equal. *) + Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32; + Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"], + All (3, Dreg), "vcge", cmp_sign_matters, + u_8_32; + Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32; + Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"], + All (3, Qreg), "vcgeQ", cmp_sign_matters, + u_8_32; + + (* Comparison, less-than or equal. *) + Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, + F32 :: s_8_32; + Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"], + All (3, Dreg), "vcle", cmp_sign_matters, + u_8_32; + Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], + All (3, Qreg), "vcleQ", cmp_sign_matters, + F32 :: s_8_32; + Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"], + All (3, Qreg), "vcleQ", cmp_sign_matters, + u_8_32; + + (* Comparison, greater-than. *) + Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32; + Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"], + All (3, Dreg), "vcgt", cmp_sign_matters, + u_8_32; + Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32; + Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"], + All (3, Qreg), "vcgtQ", cmp_sign_matters, + u_8_32; + + (* Comparison, less-than. *) + Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, + F32 :: s_8_32; + Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"], + All (3, Dreg), "vclt", cmp_sign_matters, + u_8_32; + Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], + All (3, Qreg), "vcltQ", cmp_sign_matters, + F32 :: s_8_32; + Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"], + All (3, Qreg), "vcltQ", cmp_sign_matters, + u_8_32; + + (* Compare absolute greater-than or equal. *) + Vcage, [Instruction_name ["vacge"]], + All (3, Dreg), "vcage", cmp_sign_matters, [F32]; + Vcage, [Instruction_name ["vacge"]], + All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; + + (* Compare absolute less-than or equal. *) + Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], + All (3, Dreg), "vcale", cmp_sign_matters, [F32]; + Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], + All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; + + (* Compare absolute greater-than or equal. *) + Vcagt, [Instruction_name ["vacgt"]], + All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; + Vcagt, [Instruction_name ["vacgt"]], + All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; + + (* Compare absolute less-than or equal. *) + Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], + All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; + Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], + All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; + + (* Test bits. *) + Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; + Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; + + (* Absolute difference. *) + Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; + Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; + Vabd, [], Long, "vabdl", elts_same_2, su_8_32; + + (* Absolute difference and accumulate. *) + Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; + Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; + Vaba, [], Long, "vabal", elts_same_io, su_8_32; + + (* Max. *) + Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; + Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; + + (* Min. *) + Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; + Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; + + (* Pairwise add. *) + Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; + Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; + Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; + + (* Pairwise add, widen and accumulate. *) + Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; + Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; + + (* Folding maximum, minimum. *) + Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; + Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; + + (* Reciprocal step. *) + Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; + Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; + Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; + Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; + + (* Vector shift left. *) + Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; + Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; + Vshl, [Instruction_name ["vrshl"]; Rounding], + All (3, Dreg), "vRshl", reg_shift, su_8_64; + Vshl, [Instruction_name ["vrshl"]; Rounding], + All (3, Qreg), "vRshlQ", reg_shift, su_8_64; + Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; + Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; + Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], + All (3, Dreg), "vqRshl", reg_shift, su_8_64; + Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], + All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; + + (* Vector shift right by constant. *) + Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; + Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; + Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, + "vRshr_n", shift_right, su_8_64; + Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, + "vRshrQ_n", shift_right, su_8_64; + Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; + Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", + shift_right_sign_invar, su_16_64; + Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; + Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, + "vqRshrn_n", shift_right, su_16_64; + Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", + shift_right_to_uns, [S16; S32; S64]; + Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], + Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; + + (* Vector shift left by constant. *) + Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; + Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; + Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; + Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; + Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", + shift_left_to_uns, [S8; S16; S32; S64]; + Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", + shift_left_to_uns, [S8; S16; S32; S64]; + Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; + + (* Vector shift right by constant and accumulate. *) + Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; + Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; + Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, + "vRsra_n", shift_right_acc, su_8_64; + Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, + "vRsraQ_n", shift_right_acc, su_8_64; + + (* Vector shift right and insert. *) + Vsri, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, + [P64]; + Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, + P8 :: P16 :: su_8_64; + Vsri, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, + [P64]; + Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, + P8 :: P16 :: su_8_64; + + (* Vector shift left and insert. *) + Vsli, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, + [P64]; + Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, + P8 :: P16 :: su_8_64; + Vsli, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, + [P64]; + Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, + P8 :: P16 :: su_8_64; + + (* Absolute value. *) + Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; + Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; + Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; + Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; + + (* Negate. *) + Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; + Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; + Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; + Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; + + (* Bitwise not. *) + Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; + Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; + + (* Count leading sign bits. *) + Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; + Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; + + (* Count leading zeros. *) + Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; + Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; + + (* Count number of set bits. *) + Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; + Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; + + (* Reciprocal estimate. *) + Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; + Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; + + (* Reciprocal square-root estimate. *) + Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; + Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; + + (* Get lanes from a vector. *) + Vget_lane, + [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; + Instruction_name ["vmov"]], + Use_operands [| Corereg; Dreg; Immed |], + "vget_lane", get_lane, pf_su_8_32; + Vget_lane, + [No_op; + InfoWord; + Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Corereg; Dreg; Immed |], + "vget_lane", notype_2, [S64; U64]; + Vget_lane, + [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; + Instruction_name ["vmov"]], + Use_operands [| Corereg; Qreg; Immed |], + "vgetQ_lane", get_lane, pf_su_8_32; + Vget_lane, + [InfoWord; + Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; + Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0); + Fixed_core_reg], + Use_operands [| Corereg; Qreg; Immed |], + "vgetQ_lane", notype_2, [S64; U64]; + + (* Set lanes in a vector. *) + Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; + Instruction_name ["vmov"]], + Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", + set_lane, pf_su_8_32; + Vset_lane, [No_op; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", + set_lane_notype, [S64; U64]; + Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; + Instruction_name ["vmov"]], + Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", + set_lane, pf_su_8_32; + Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; + Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], + Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", + set_lane_notype, [S64; U64]; + + (* Create vector from literal bit pattern. *) + Vcreate, + [Requires_feature "CRYPTO"; No_op], (* Not really, but it can yield various things that are too + hard for the test generator at this time. *) + Use_operands [| Dreg; Corereg |], "vcreate", create_vector, + [P64]; + Vcreate, + [No_op], (* Not really, but it can yield various things that are too + hard for the test generator at this time. *) + Use_operands [| Dreg; Corereg |], "vcreate", create_vector, + pf_su_8_64; + + (* Set all lanes to the same value. *) + Vdup_n, + [Disassembles_as [Use_operands [| Dreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, + pf_su_8_32; + Vdup_n, + [No_op; Requires_feature "CRYPTO"; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, + [P64]; + Vdup_n, + [No_op; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, + [S64; U64]; + Vdup_n, + [No_op; Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, + [P64]; + Vdup_n, + [Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, + pf_su_8_32; + Vdup_n, + [No_op; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; + Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, + [S64; U64]; + + (* These are just aliases for the above. *) + Vmov_n, + [Builtin_name "vdup_n"; + Disassembles_as [Use_operands [| Dreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Dreg; Corereg |], + "vmov_n", bits_1, pf_su_8_32; + Vmov_n, + [No_op; + Builtin_name "vdup_n"; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Dreg; Corereg |], + "vmov_n", notype_1, [S64; U64]; + Vmov_n, + [Builtin_name "vdupQ_n"; + Disassembles_as [Use_operands [| Qreg; + Alternatives [ Corereg; + Element_of_dreg ] |]]], + Use_operands [| Qreg; Corereg |], + "vmovQ_n", bits_1, pf_su_8_32; + Vmov_n, + [No_op; + Builtin_name "vdupQ_n"; + Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; + Use_operands [| Dreg; Corereg; Corereg |]]], + Use_operands [| Qreg; Corereg |], + "vmovQ_n", notype_1, [S64; U64]; + + (* Duplicate, lane version. We can't use Use_operands here because the + rightmost register (always Dreg) would be picked up by find_key_operand, + when we want the leftmost register to be used in this case (otherwise + the modes are indistinguishable in neon.md, etc. *) + Vdup_lane, + [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], + Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; + Vdup_lane, + [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Unary_scalar Dreg, "vdup_lane", bits_2, [P64]; + Vdup_lane, + [No_op; Const_valuator (fun _ -> 0)], + Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; + Vdup_lane, + [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], + Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; + Vdup_lane, + [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Unary_scalar Qreg, "vdupQ_lane", bits_2, [P64]; + Vdup_lane, + [No_op; Const_valuator (fun _ -> 0)], + Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; + + (* Combining vectors. *) + Vcombine, [Requires_feature "CRYPTO"; No_op], + Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, + [P64]; + Vcombine, [No_op], + Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, + pf_su_8_64; + + (* Splitting vectors. *) + Vget_high, [Requires_feature "CRYPTO"; No_op], + Use_operands [| Dreg; Qreg |], "vget_high", + notype_1, [P64]; + Vget_high, [No_op], + Use_operands [| Dreg; Qreg |], "vget_high", + notype_1, pf_su_8_64; + Vget_low, [Instruction_name ["vmov"]; + Disassembles_as [Use_operands [| Dreg; Dreg |]]; + Fixed_vector_reg], + Use_operands [| Dreg; Qreg |], "vget_low", + notype_1, pf_su_8_32; + Vget_low, [Requires_feature "CRYPTO"; No_op], + Use_operands [| Dreg; Qreg |], "vget_low", + notype_1, [P64]; + Vget_low, [No_op], + Use_operands [| Dreg; Qreg |], "vget_low", + notype_1, [S64; U64]; + + (* Conversions. *) + Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], + Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)]; + Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], + Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)]; + Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, + [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; + + (* Move, narrowing. *) + Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], + Narrow, "vmovn", sign_invar_1, su_16_64; + Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], + Narrow, "vqmovn", elts_same_1, su_16_64; + Vmovn, + [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], + Narrow, "vqmovun", dst_unsign_1, + [S16; S32; S64]; + + (* Move, long. *) + Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], + Long, "vmovl", elts_same_1, su_8_32; + + (* Table lookup. *) + Vtbl 1, + [Instruction_name ["vtbl"]; + Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; + Vtbl 2, [Instruction_name ["vtbl"]], + Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, + [U8; S8; P8]; + Vtbl 3, [Instruction_name ["vtbl"]], + Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, + [U8; S8; P8]; + Vtbl 4, [Instruction_name ["vtbl"]], + Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, + [U8; S8; P8]; + + (* Extended table lookup. *) + Vtbx 1, + [Instruction_name ["vtbx"]; + Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; + Vtbx 2, [Instruction_name ["vtbx"]], + Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, + [U8; S8; P8]; + Vtbx 3, [Instruction_name ["vtbx"]], + Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, + [U8; S8; P8]; + Vtbx 4, [Instruction_name ["vtbx"]], + Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, + [U8; S8; P8]; + + (* Multiply, lane. (note: these were undocumented at the time of + writing). *) + Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, + [S16; S32; U16; U32; F32]; + Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, + [S16; S32; U16; U32; F32]; + + (* Multiply-accumulate, lane. *) + Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, + [S16; S32; U16; U32]; + Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", + elts_same_io_lane, [S16; S32]; + + (* Multiply-subtract, lane. *) + Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, + [S16; S32; U16; U32; F32]; + Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, + [S16; S32; U16; U32]; + Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", + elts_same_io_lane, [S16; S32]; + + (* Long multiply, lane. *) + Vmull_lane, [], + Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; + + (* Saturating doubling long multiply, lane. *) + Vqdmull_lane, [Saturating; Doubling], + Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; + + (* Saturating doubling long multiply high, lane. *) + Vqdmulh_lane, [Saturating; Halving], + By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; + Vqdmulh_lane, [Saturating; Halving], + By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; + Vqdmulh_lane, [Saturating; Halving; Rounding; + Instruction_name ["vqrdmulh"]], + By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; + Vqdmulh_lane, [Saturating; Halving; Rounding; + Instruction_name ["vqrdmulh"]], + By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; + + (* Vector multiply by scalar. *) + Vmul_n, [InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", + sign_invar_2, [S16; S32; U16; U32; F32]; + Vmul_n, [InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", + sign_invar_2, [S16; S32; U16; U32; F32]; + + (* Vector long multiply by scalar. *) + Vmull_n, [Instruction_name ["vmull"]; + Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], + Wide_scalar, "vmull_n", + elts_same_2, [S16; S32; U16; U32]; + + (* Vector saturating doubling long multiply by scalar. *) + Vqdmull_n, [Saturating; Doubling; + Disassembles_as [Use_operands [| Qreg; Dreg; + Element_of_dreg |]]], + Wide_scalar, "vqdmull_n", + elts_same_2, [S16; S32]; + + (* Vector saturating doubling long multiply high by scalar. *) + Vqdmulh_n, + [Saturating; Halving; InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], + "vqdmulhQ_n", elts_same_2, [S16; S32]; + Vqdmulh_n, + [Saturating; Halving; InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], + "vqdmulh_n", elts_same_2, [S16; S32]; + Vqdmulh_n, + [Saturating; Halving; Rounding; InfoWord; + Instruction_name ["vqrdmulh"]; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], + "vqRdmulhQ_n", elts_same_2, [S16; S32]; + Vqdmulh_n, + [Saturating; Halving; Rounding; InfoWord; + Instruction_name ["vqrdmulh"]; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], + "vqRdmulh_n", elts_same_2, [S16; S32]; + + (* Vector multiply-accumulate by scalar. *) + Vmla_n, [InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmla_n, [InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; + Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, + [S16; S32]; + + (* Vector multiply subtract by scalar. *) + Vmls_n, [InfoWord; + Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], + Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmls_n, [InfoWord; + Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], + Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", + sign_invar_io, [S16; S32; U16; U32; F32]; + Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; + Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, + [S16; S32]; + + (* Vector extract. *) + Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, + [P64]; + Vext, [Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, + pf_su_8_64; + Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], + Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, + [P64]; + Vext, [Const_valuator (fun _ -> 0)], + Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, + pf_su_8_64; + + (* Reverse elements. *) + Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1, + P8 :: P16 :: F32 :: su_8_32; + Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1, + P8 :: P16 :: F32 :: su_8_32; + Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1, + [P8; P16; S8; U8; S16; U16]; + Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1, + [P8; P16; S8; U8; S16; U16]; + Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1, + [P8; S8; U8]; + Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1, + [P8; S8; U8]; + + (* Bit selection. *) + Vbsl, + [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, + [P64]; + Vbsl, + [Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], + Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, + pf_su_8_64; + Vbsl, + [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], + Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, + [P64]; + Vbsl, + [Instruction_name ["vbsl"; "vbit"; "vbif"]; + Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], + Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, + pf_su_8_64; + + Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16; + Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32; + Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; + (* Zip elements. *) + Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16; + Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32; + Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; + + (* Unzip elements. *) + Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2, + pf_su_8_32; + Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2, + pf_su_8_32; + + (* Element/structure loads. VLD1 variants. *) + Vldx 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, + [P64]; + Vldx 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, + pf_su_8_64; + Vldx 1, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, + [P64]; + Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, + pf_su_8_64; + + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], + "vld1_lane", bits_3, pf_su_8_32; + Vldx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], + "vld1_lane", bits_3, [P64]; + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], + "vld1_lane", bits_3, [S64; U64]; + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], + "vld1Q_lane", bits_3, pf_su_8_32; + Vldx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], + "vld1Q_lane", bits_3, [P64]; + Vldx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], + "vld1Q_lane", bits_3, [S64; U64]; + + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", + bits_1, pf_su_8_32; + Vldx_dup 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", + bits_1, [P64]; + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", + bits_1, [S64; U64]; + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", + bits_1, pf_su_8_32; + (* Treated identically to vld1_dup above as we now + do a single load followed by a duplicate. *) + Vldx_dup 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", + bits_1, [P64]; + Vldx_dup 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", + bits_1, [S64; U64]; + + (* VST1 variants. *) + Vstx 1, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Dreg |], "vst1", + store_1, [P64]; + Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Dreg |], "vst1", + store_1, pf_su_8_64; + Vstx 1, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", + store_1, [P64]; + Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", + store_1, pf_su_8_64; + + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Dreg; Immed |], + "vst1_lane", store_3, pf_su_8_32; + Vstx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| PtrTo Corereg; Dreg; Immed |], + "vst1_lane", store_3, [P64]; + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]; + Const_valuator (fun _ -> 0)], + Use_operands [| PtrTo Corereg; Dreg; Immed |], + "vst1_lane", store_3, [U64; S64]; + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg; Immed |], + "vst1Q_lane", store_3, pf_su_8_32; + Vstx_lane 1, + [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg; Immed |], + "vst1Q_lane", store_3, [P64]; + Vstx_lane 1, + [Disassembles_as [Use_operands [| VecArray (1, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; Qreg; Immed |], + "vst1Q_lane", store_3, [U64; S64]; + + (* VLD2 variants. *) + Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2", bits_1, pf_su_8_32; + Vldx 2, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2", bits_1, [P64]; + Vldx 2, [Instruction_name ["vld1"]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2", bits_1, [S64; U64]; + Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]; + Use_operands [| VecArray (2, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], + "vld2Q", bits_1, pf_su_8_32; + + Vldx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; + VecArray (2, Dreg); Immed |], + "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; + Vldx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; + VecArray (2, Qreg); Immed |], + "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; + + Vldx_dup 2, + [Disassembles_as [Use_operands + [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2_dup", bits_1, pf_su_8_32; + Vldx_dup 2, + [Requires_feature "CRYPTO"; + Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (2, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2_dup", bits_1, [P64]; + Vldx_dup 2, + [Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (2, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], + "vld2_dup", bits_1, [S64; U64]; + + (* VST2 variants. *) + Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", + store_1, pf_su_8_32; + Vstx 2, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", + store_1, [P64]; + Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", + store_1, [S64; U64]; + Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]; + Use_operands [| VecArray (2, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", + store_1, pf_su_8_32; + + Vstx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", + store_3, P8 :: P16 :: F32 :: su_8_32; + Vstx_lane 2, + [Disassembles_as [Use_operands + [| VecArray (2, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", + store_3, [P16; F32; U16; U32; S16; S32]; + + (* VLD3 variants. *) + Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3", bits_1, pf_su_8_32; + Vldx 3, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3", bits_1, [P64]; + Vldx 3, [Instruction_name ["vld1"]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3", bits_1, [S64; U64]; + Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); + CstPtrTo Corereg |]; + Use_operands [| VecArray (3, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], + "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; + + Vldx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; + VecArray (3, Dreg); Immed |], + "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; + Vldx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; + VecArray (3, Qreg); Immed |], + "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; + + Vldx_dup 3, + [Disassembles_as [Use_operands + [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3_dup", bits_1, pf_su_8_32; + Vldx_dup 3, + [Requires_feature "CRYPTO"; + Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (3, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3_dup", bits_1, [P64]; + Vldx_dup 3, + [Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (3, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], + "vld3_dup", bits_1, [S64; U64]; + + (* VST3 variants. *) + Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", + store_1, pf_su_8_32; + Vstx 3, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", + store_1, [P64]; + Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", + store_1, [S64; U64]; + Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); + PtrTo Corereg |]; + Use_operands [| VecArray (3, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", + store_1, pf_su_8_32; + + Vstx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", + store_3, P8 :: P16 :: F32 :: su_8_32; + Vstx_lane 3, + [Disassembles_as [Use_operands + [| VecArray (3, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", + store_3, [P16; F32; U16; U32; S16; S32]; + + (* VLD4/VST4 variants. *) + Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4", bits_1, pf_su_8_32; + Vldx 4, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4", bits_1, [P64]; + Vldx 4, [Instruction_name ["vld1"]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4", bits_1, [S64; U64]; + Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + CstPtrTo Corereg |]; + Use_operands [| VecArray (4, Dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], + "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; + + Vldx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; + VecArray (4, Dreg); Immed |], + "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; + Vldx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; + VecArray (4, Qreg); Immed |], + "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; + + Vldx_dup 4, + [Disassembles_as [Use_operands + [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4_dup", bits_1, pf_su_8_32; + Vldx_dup 4, + [Requires_feature "CRYPTO"; + Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (4, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4_dup", bits_1, [P64]; + Vldx_dup 4, + [Instruction_name ["vld1"]; Disassembles_as [Use_operands + [| VecArray (4, Dreg); CstPtrTo Corereg |]]], + Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], + "vld4_dup", bits_1, [S64; U64]; + + Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", + store_1, pf_su_8_32; + Vstx 4, [Requires_feature "CRYPTO"; + Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", + store_1, [P64]; + Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]; + Instruction_name ["vst1"]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", + store_1, [S64; U64]; + Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]; + Use_operands [| VecArray (4, Dreg); + PtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", + store_1, pf_su_8_32; + + Vstx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", + store_3, P8 :: P16 :: F32 :: su_8_32; + Vstx_lane 4, + [Disassembles_as [Use_operands + [| VecArray (4, Element_of_dreg); + CstPtrTo Corereg |]]], + Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", + store_3, [P16; F32; U16; U32; S16; S32]; + + (* Logical operations. And. *) + Vand, [], All (3, Dreg), "vand", notype_2, su_8_32; + Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64]; + Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; + + (* Or. *) + Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32; + Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64]; + Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; + + (* Eor. *) + Veor, [], All (3, Dreg), "veor", notype_2, su_8_32; + Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64]; + Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; + + (* Bic (And-not). *) + Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32; + Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64]; + Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64; + + (* Or-not. *) + Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32; + Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64]; + Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64; + ] + +let type_in_crypto_only t + = (t == P64) or (t == P128) + +let cross_product s1 s2 + = List.filter (fun (e, e') -> e <> e') + (List.concat (List.map (fun e1 -> List.map (fun e2 -> (e1,e2)) s1) s2)) + +let reinterp = + let elems = P8 :: P16 :: F32 :: P64 :: su_8_64 in + let casts = cross_product elems elems in + List.map + (fun (convto, convfrom) -> + Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom) + then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Dreg; Dreg |], + "vreinterpret", conv_1, [Cast (convto, convfrom)]) + casts + +let reinterpq = + let elems = P8 :: P16 :: F32 :: P64 :: P128 :: su_8_64 in + let casts = cross_product elems elems in + List.map + (fun (convto, convfrom) -> + Vreinterp, (if (type_in_crypto_only convto) or (type_in_crypto_only convfrom) + then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Qreg; Qreg |], + "vreinterpretQ", conv_1, [Cast (convto, convfrom)]) + casts + +(* Output routines. *) + +let rec string_of_elt = function + S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" + | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" + | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" + | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" + | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" + | P64 -> "p64" | P128 -> "p128" + | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b + | NoElts -> failwith "No elts" + +let string_of_elt_dots elt = + match elt with + Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b + | _ -> string_of_elt elt + +let string_of_vectype vt = + let rec name affix = function + T_int8x8 -> affix "int8x8" + | T_int8x16 -> affix "int8x16" + | T_int16x4 -> affix "int16x4" + | T_int16x8 -> affix "int16x8" + | T_int32x2 -> affix "int32x2" + | T_int32x4 -> affix "int32x4" + | T_int64x1 -> affix "int64x1" + | T_int64x2 -> affix "int64x2" + | T_uint8x8 -> affix "uint8x8" + | T_uint8x16 -> affix "uint8x16" + | T_uint16x4 -> affix "uint16x4" + | T_uint16x8 -> affix "uint16x8" + | T_uint32x2 -> affix "uint32x2" + | T_uint32x4 -> affix "uint32x4" + | T_uint64x1 -> affix "uint64x1" + | T_uint64x2 -> affix "uint64x2" + | T_float16x4 -> affix "float16x4" + | T_float32x2 -> affix "float32x2" + | T_float32x4 -> affix "float32x4" + | T_poly8x8 -> affix "poly8x8" + | T_poly8x16 -> affix "poly8x16" + | T_poly16x4 -> affix "poly16x4" + | T_poly16x8 -> affix "poly16x8" + | T_int8 -> affix "int8" + | T_int16 -> affix "int16" + | T_int32 -> affix "int32" + | T_int64 -> affix "int64" + | T_uint8 -> affix "uint8" + | T_uint16 -> affix "uint16" + | T_uint32 -> affix "uint32" + | T_uint64 -> affix "uint64" + | T_poly8 -> affix "poly8" + | T_poly16 -> affix "poly16" + | T_poly64 -> affix "poly64" + | T_poly64x1 -> affix "poly64x1" + | T_poly64x2 -> affix "poly64x2" + | T_poly128 -> affix "poly128" + | T_float16 -> affix "float16" + | T_float32 -> affix "float32" + | T_immediate _ -> "const int" + | T_void -> "void" + | T_intQI -> "__builtin_neon_qi" + | T_intHI -> "__builtin_neon_hi" + | T_intSI -> "__builtin_neon_si" + | T_intDI -> "__builtin_neon_di" + | T_intTI -> "__builtin_neon_ti" + | T_floatHF -> "__builtin_neon_hf" + | T_floatSF -> "__builtin_neon_sf" + | T_arrayof (num, base) -> + let basename = name (fun x -> x) base in + affix (Printf.sprintf "%sx%d" basename num) + | T_ptrto x -> + let basename = name affix x in + Printf.sprintf "%s *" basename + | T_const x -> + let basename = name affix x in + Printf.sprintf "const %s" basename + in + name (fun x -> x ^ "_t") vt + +let string_of_inttype = function + B_TImode -> "__builtin_neon_ti" + | B_EImode -> "__builtin_neon_ei" + | B_OImode -> "__builtin_neon_oi" + | B_CImode -> "__builtin_neon_ci" + | B_XImode -> "__builtin_neon_xi" + +let string_of_mode = function + V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si" + | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" + | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" + | HI -> "hi" | SI -> "si" | SF -> "sf" | TI -> "ti" + +(* Use uppercase chars for letters which form part of the intrinsic name, but + should be omitted from the builtin name (the info is passed in an extra + argument, instead). *) +let intrinsic_name name = String.lowercase name + +(* Allow the name of the builtin to be overridden by things (e.g. Flipped) + found in the features list. *) +let builtin_name features name = + let name = List.fold_right + (fun el name -> + match el with + Flipped x | Builtin_name x -> x + | _ -> name) + features name in + let islower x = let str = String.make 1 x in (String.lowercase str) = str + and buf = Buffer.create (String.length name) in + String.iter (fun c -> if islower c then Buffer.add_char buf c) name; + Buffer.contents buf + +(* Transform an arity into a list of strings. *) +let strings_of_arity a = + match a with + | Arity0 vt -> [string_of_vectype vt] + | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] + | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; + string_of_vectype vt2; + string_of_vectype vt3] + | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; + string_of_vectype vt2; + string_of_vectype vt3; + string_of_vectype vt4] + | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; + string_of_vectype vt2; + string_of_vectype vt3; + string_of_vectype vt4; + string_of_vectype vt5] + +(* Suffixes on the end of builtin names that are to be stripped in order + to obtain the name used as an instruction. They are only stripped if + preceded immediately by an underscore. *) +let suffixes_to_strip = [ "n"; "lane"; "dup" ] + +(* Get the possible names of an instruction corresponding to a "name" from the + ops table. This is done by getting the equivalent builtin name and + stripping any suffixes from the list at the top of this file, unless + the features list presents with an Instruction_name entry, in which + case that is used; or unless the features list presents with a Flipped + entry, in which case that is used. If both such entries are present, + the first in the list will be chosen. *) +let get_insn_names features name = + let names = try + begin + match List.find (fun feature -> match feature with + Instruction_name _ -> true + | Flipped _ -> true + | _ -> false) features + with + Instruction_name names -> names + | Flipped name -> [name] + | _ -> assert false + end + with Not_found -> [builtin_name features name] + in + begin + List.map (fun name' -> + try + let underscore = String.rindex name' '_' in + let our_suffix = String.sub name' (underscore + 1) + ((String.length name') - underscore - 1) + in + let rec strip remaining_suffixes = + match remaining_suffixes with + [] -> name' + | s::ss when our_suffix = s -> String.sub name' 0 underscore + | _::ss -> strip ss + in + strip suffixes_to_strip + with (Not_found | Invalid_argument _) -> name') names + end + +(* Apply a function to each element of a list and then comma-separate + the resulting strings. *) +let rec commas f elts acc = + match elts with + [] -> acc + | [elt] -> acc ^ (f elt) + | elt::elts -> + commas f elts (acc ^ (f elt) ^ ", ") + +(* Given a list of features and the shape specified in the "ops" table, apply + a function to each possible shape that the instruction may have. + By default, this is the "shape" entry in "ops". If the features list + contains a Disassembles_as entry, the shapes contained in that entry are + mapped to corresponding outputs and returned in a list. If there is more + than one Disassembles_as entry, only the first is used. *) +let analyze_all_shapes features shape f = + try + match List.find (fun feature -> + match feature with Disassembles_as _ -> true + | _ -> false) + features with + Disassembles_as shapes -> List.map f shapes + | _ -> assert false + with Not_found -> [f shape] + +(* The crypto intrinsics have unconventional shapes and are not that + numerous to be worth the trouble of encoding here. We implement them + explicitly here. *) +let crypto_intrinsics = +" +#ifdef __ARM_FEATURE_CRYPTO + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vldrq_p128 (poly128_t const * __ptr) +{ +#ifdef __ARM_BIG_ENDIAN + poly64_t* __ptmp = (poly64_t*) __ptr; + poly64_t __d0 = vld1_p64 (__ptmp); + poly64_t __d1 = vld1_p64 (__ptmp + 1); + return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0)); +#else + return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr)); +#endif +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) +vstrq_p128 (poly128_t * __ptr, poly128_t __val) +{ +#ifdef __ARM_BIG_ENDIAN + poly64x2_t __tmp = vreinterpretq_p64_p128 (__val); + poly64_t __d0 = vget_high_p64 (__tmp); + poly64_t __d1 = vget_low_p64 (__tmp); + vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1)); +#else + vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val)); +#endif +} + +/* The vceq_p64 intrinsic does not map to a single instruction. + Instead we emulate it by performing a 32-bit variant of the vceq + and applying a pairwise min reduction to the result. + vceq_u32 will produce two 32-bit halves, each of which will contain either + all ones or all zeros depending on whether the corresponding 32-bit + halves of the poly64_t were equal. The whole poly64_t values are equal + if and only if both halves are equal, i.e. vceq_u32 returns all ones. + If the result is all zeroes for any half then the whole result is zeroes. + This is what the pairwise min reduction achieves. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vceq_p64 (poly64x1_t __a, poly64x1_t __b) +{ + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vceq_u32 (__t_a, __t_b); + uint32x2_t __m = vpmin_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); +} + +/* The vtst_p64 intrinsic does not map to a single instruction. + We emulate it in way similar to vceq_p64 above but here we do + a reduction with max since if any two corresponding bits + in the two poly64_t's match, then the whole result must be all ones. */ + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vtst_p64 (poly64x1_t __a, poly64x1_t __b) +{ + uint32x2_t __t_a = vreinterpret_u32_p64 (__a); + uint32x2_t __t_b = vreinterpret_u32_p64 (__b); + uint32x2_t __c = vtst_u32 (__t_a, __t_b); + uint32x2_t __m = vpmax_u32 (__c, __c); + return vreinterpret_u64_u32 (__m); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aese (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) +{ + return __builtin_arm_crypto_aesd (__data, __key); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesmcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesmc (__data); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vaesimcq_u8 (uint8x16_t __data) +{ + return __builtin_arm_crypto_aesimc (__data); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +vsha1h_u32 (uint32_t __hash_e) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + __t = __builtin_arm_crypto_sha1h (__t); + return vgetq_lane_u32 (__t, 0); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) +{ + uint32x4_t __t = vdupq_n_u32 (0); + __t = vsetq_lane_u32 (__hash_e, __t, 0); + return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) +{ + return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) +{ + return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) +{ + return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) +{ + return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) +{ + return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) +{ + return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_p64 (poly64_t __a, poly64_t __b) +{ + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); +} + +__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) +vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) +{ + poly64_t __t1 = vget_high_p64 (__a); + poly64_t __t2 = vget_high_p64 (__b); + + return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); +} + +#endif +" diff --git a/gcc-4.9/gcc/config/arm/netbsd-elf.h b/gcc-4.9/gcc/config/arm/netbsd-elf.h new file mode 100644 index 000000000..9deda9679 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/netbsd-elf.h @@ -0,0 +1,154 @@ +/* Definitions of target machine for GNU compiler, NetBSD/arm ELF version. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + Contributed by Wasabi Systems, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Run-time Target Specification. */ + +/* arm.h defaults to ARM6 CPU. */ + +/* This defaults us to little-endian. */ +#ifndef TARGET_ENDIAN_DEFAULT +#define TARGET_ENDIAN_DEFAULT 0 +#endif + +#undef MULTILIB_DEFAULTS + +/* Default it to use ATPCS with soft-VFP. */ +#undef TARGET_DEFAULT +#define TARGET_DEFAULT \ + (MASK_APCS_FRAME \ + | TARGET_ENDIAN_DEFAULT) + +#undef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_ATPCS + +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + NETBSD_OS_CPP_BUILTINS_ELF(); \ + } \ + while (0) + +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC + +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC \ + "-matpcs %{fpic|fpie:-k} %{fPIC|fPIE:-k}" + +/* Default to full VFP if -mfloat-abi=hard is specified. */ +#undef SUBTARGET_ASM_FLOAT_SPEC +#define SUBTARGET_ASM_FLOAT_SPEC \ + "%{mfloat-abi=hard:{!mfpu=*:-mfpu=vfp}}" + +#undef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "subtarget_extra_asm_spec", SUBTARGET_EXTRA_ASM_SPEC }, \ + { "subtarget_asm_float_spec", SUBTARGET_ASM_FLOAT_SPEC }, \ + { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \ + { "netbsd_entry_point", NETBSD_ENTRY_POINT }, + +#define NETBSD_ENTRY_POINT "__start" + +#undef LINK_SPEC +#define LINK_SPEC \ + "-X %{mbig-endian:-EB} %{mlittle-endian:-EL} \ + %(netbsd_link_spec)" + +/* Make GCC agree with . */ + +#undef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "long int" + +/* We don't have any limit on the length as out debugger is GDB. */ +#undef DBX_CONTIN_LENGTH + +/* NetBSD does its profiling differently to the Acorn compiler. We + don't need a word following the mcount call; and to skip it + requires either an assembly stub or use of fomit-frame-pointer when + compiling the profiling functions. Since we break Acorn CC + compatibility below a little more won't hurt. */ + +#undef ARM_FUNCTION_PROFILER +#define ARM_FUNCTION_PROFILER(STREAM,LABELNO) \ +{ \ + asm_fprintf (STREAM, "\tmov\t%Rip, %Rlr\n"); \ + asm_fprintf (STREAM, "\tbl\t__mcount%s\n", \ + (TARGET_ARM && NEED_PLT_RELOC) \ + ? "(PLT)" : ""); \ +} + +/* VERY BIG NOTE: Change of structure alignment for NetBSD/arm. + There are consequences you should be aware of... + + Normally GCC/arm uses a structure alignment of 32 for compatibility + with armcc. This means that structures are padded to a word + boundary. However this causes problems with bugged NetBSD kernel + code (possibly userland code as well - I have not checked every + binary). The nature of this bugged code is to rely on sizeof() + returning the correct size of various structures rounded to the + nearest byte (SCSI and ether code are two examples, the vm system + is another). This code breaks when the structure alignment is 32 + as sizeof() will report a word=rounded size. By changing the + structure alignment to 8. GCC will conform to what is expected by + NetBSD. + + This has several side effects that should be considered. + 1. Structures will only be aligned to the size of the largest member. + i.e. structures containing only bytes will be byte aligned. + structures containing shorts will be half word aligned. + structures containing ints will be word aligned. + + This means structures should be padded to a word boundary if + alignment of 32 is required for byte structures etc. + + 2. A potential performance penalty may exist if strings are no longer + word aligned. GCC will not be able to use word load/stores to copy + short strings. + + This modification is not encouraged but with the present state of the + NetBSD source tree it is currently the only solution that meets the + requirements. */ + +#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8 + +/* Clear the instruction cache from `BEG' to `END'. This makes a + call to the ARM_SYNC_ICACHE architecture specific syscall. */ +#define CLEAR_INSN_CACHE(BEG, END) \ +do \ + { \ + extern int sysarch(int number, void *args); \ + struct \ + { \ + unsigned int addr; \ + int len; \ + } s; \ + s.addr = (unsigned int)(BEG); \ + s.len = (END) - (BEG); \ + (void) sysarch (0, &s); \ + } \ +while (0) + +#undef FPUTYPE_DEFAULT +#define FPUTYPE_DEFAULT "vfp" + diff --git a/gcc-4.9/gcc/config/arm/predicates.md b/gcc-4.9/gcc/config/arm/predicates.md new file mode 100644 index 000000000..ce5c9a830 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/predicates.md @@ -0,0 +1,677 @@ +;; Predicate definitions for ARM and Thumb +;; Copyright (C) 2004-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_predicate "s_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + /* We don't consider registers whose class is NO_REGS + to be a register operand. */ + /* XXX might have to check for lo regs only for thumb ??? */ + return (REG_P (op) + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) != NO_REGS)); +}) + +(define_predicate "imm_for_neon_inv_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL)); +}) + +(define_predicate "neon_inv_logic_op2" + (ior (match_operand 0 "imm_for_neon_inv_logic_operand") + (match_operand 0 "s_register_operand"))) + +(define_predicate "imm_for_neon_logic_operand" + (match_code "const_vector") +{ + return (TARGET_NEON + && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL)); +}) + +(define_predicate "neon_logic_op2" + (ior (match_operand 0 "imm_for_neon_logic_operand") + (match_operand 0 "s_register_operand"))) + +;; Any general register. +(define_predicate "arm_hard_general_register_operand" + (match_code "reg") +{ + return REGNO (op) <= LAST_ARM_REGNUM; +}) + +;; A low register. +(define_predicate "low_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) <= LAST_LO_REGNUM"))) + +;; A low register or const_int. +(define_predicate "low_reg_or_int_operand" + (ior (match_code "const_int") + (match_operand 0 "low_register_operand"))) + +;; Any core register, or any pseudo. */ +(define_predicate "arm_general_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return (REG_P (op) + && (REGNO (op) <= LAST_ARM_REGNUM + || REGNO (op) >= FIRST_PSEUDO_REGISTER)); +}) + +(define_predicate "vfp_register_operand" + (match_code "reg,subreg") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + /* We don't consider registers whose class is NO_REGS + to be a register operand. */ + return (REG_P (op) + && (REGNO (op) >= FIRST_PSEUDO_REGISTER + || REGNO_REG_CLASS (REGNO (op)) == VFP_D0_D7_REGS + || REGNO_REG_CLASS (REGNO (op)) == VFP_LO_REGS + || (TARGET_VFPD32 + && REGNO_REG_CLASS (REGNO (op)) == VFP_REGS))); +}) + +(define_predicate "vfp_hard_register_operand" + (match_code "reg") +{ + return (IS_VFP_REGNUM (REGNO (op))); +}) + +(define_predicate "zero_operand" + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (mode)"))) + +;; Match a register, or zero in the appropriate mode. +(define_predicate "reg_or_zero_operand" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "zero_operand"))) + +(define_special_predicate "subreg_lowpart_operator" + (and (match_code "subreg") + (match_test "subreg_lowpart_p (op)"))) + +;; Reg, subreg(reg) or const_int. +(define_predicate "reg_or_int_operand" + (ior (match_code "const_int") + (match_operand 0 "s_register_operand"))) + +(define_predicate "arm_immediate_operand" + (and (match_code "const_int") + (match_test "const_ok_for_arm (INTVAL (op))"))) + +;; A constant value which fits into two instructions, each taking +;; an arithmetic constant operand for one of the words. +(define_predicate "arm_immediate_di_operand" + (and (match_code "const_int,const_double") + (match_test "arm_const_double_by_immediates (op)"))) + +(define_predicate "arm_neg_immediate_operand" + (and (match_code "const_int") + (match_test "const_ok_for_arm (-INTVAL (op))"))) + +(define_predicate "arm_not_immediate_operand" + (and (match_code "const_int") + (match_test "const_ok_for_arm (~INTVAL (op))"))) + +(define_predicate "const0_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 0"))) + +;; Something valid on the RHS of an ARM data-processing instruction +(define_predicate "arm_rhs_operand" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "arm_immediate_operand"))) + +(define_predicate "arm_rhsm_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "memory_operand"))) + +;; This doesn't have to do much because the constant is already checked +;; in the shift_operator predicate. +(define_predicate "shift_amount_operand" + (ior (and (match_test "TARGET_ARM") + (match_operand 0 "s_register_operand")) + (match_operand 0 "const_int_operand"))) + +(define_predicate "const_neon_scalar_shift_amount_operand" + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode) + && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0"))) + +(define_predicate "ldrd_strd_offset_operand" + (and (match_operand 0 "const_int_operand") + (match_test "TARGET_LDRD && offset_ok_for_ldrd_strd (INTVAL (op))"))) + +(define_predicate "arm_add_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "arm_neg_immediate_operand"))) + +(define_predicate "arm_anddi_operand_neon" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), AND)")) + (match_operand 0 "neon_inv_logic_op2"))) + +(define_predicate "arm_iordi_operand_neon" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)")) + (match_operand 0 "neon_logic_op2"))) + +(define_predicate "arm_xordi_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)")))) + +(define_predicate "arm_adddi_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_int") + (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)")))) + +(define_predicate "arm_addimm_operand" + (ior (match_operand 0 "arm_immediate_operand") + (match_operand 0 "arm_neg_immediate_operand"))) + +(define_predicate "arm_not_operand" + (ior (match_operand 0 "arm_rhs_operand") + (match_operand 0 "arm_not_immediate_operand"))) + +(define_predicate "arm_di_operand" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "arm_immediate_di_operand"))) + +;; True if the operand is a memory reference which contains an +;; offsettable address. +(define_predicate "offsettable_memory_operand" + (and (match_code "mem") + (match_test + "offsettable_address_p (reload_completed | reload_in_progress, + mode, XEXP (op, 0))"))) + +;; True if the operand is a memory operand that does not have an +;; automodified base register (and thus will not generate output reloads). +(define_predicate "call_memory_operand" + (and (match_code "mem") + (and (match_test "GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) + != RTX_AUTOINC") + (match_operand 0 "memory_operand")))) + +(define_predicate "arm_reload_memory_operand" + (and (match_code "mem,reg,subreg") + (match_test "(!CONSTANT_P (op) + && (true_regnum(op) == -1 + || (REG_P (op) + && REGNO (op) >= FIRST_PSEUDO_REGISTER)))"))) + +(define_predicate "vfp_compare_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_code "const_double") + (match_test "arm_const_double_rtx (op)")))) + +(define_predicate "arm_float_compare_operand" + (if_then_else (match_test "TARGET_VFP") + (match_operand 0 "vfp_compare_operand") + (match_operand 0 "s_register_operand"))) + +;; True for valid index operands. +(define_predicate "index_operand" + (ior (match_operand 0 "s_register_operand") + (and (match_operand 0 "immediate_operand") + (match_test "(!CONST_INT_P (op) + || (INTVAL (op) < 4096 && INTVAL (op) > -4096))")))) + +;; True for operators that can be combined with a shift in ARM state. +(define_special_predicate "shiftable_operator" + (and (match_code "plus,minus,ior,xor,and") + (match_test "mode == GET_MODE (op)"))) + +(define_special_predicate "shiftable_operator_strict_it" + (and (match_code "plus,and") + (match_test "mode == GET_MODE (op)"))) + +;; True for logical binary operators. +(define_special_predicate "logical_binary_operator" + (and (match_code "ior,xor,and") + (match_test "mode == GET_MODE (op)"))) + +;; True for commutative operators +(define_special_predicate "commutative_binary_operator" + (and (match_code "ior,xor,and,plus") + (match_test "mode == GET_MODE (op)"))) + +;; True for shift operators. +;; Notes: +;; * mult is only permitted with a constant shift amount +;; * patterns that permit register shift amounts only in ARM mode use +;; shift_amount_operand, patterns that always allow registers do not, +;; so we don't have to worry about that sort of thing here. +(define_special_predicate "shift_operator" + (and (ior (ior (and (match_code "mult") + (match_test "power_of_two_operand (XEXP (op, 1), mode)")) + (and (match_code "rotate") + (match_test "CONST_INT_P (XEXP (op, 1)) + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") + (match_test "!CONST_INT_P (XEXP (op, 1)) + || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (match_test "mode == GET_MODE (op)"))) + +;; True for shift operators which can be used with saturation instructions. +(define_special_predicate "sat_shift_operator" + (and (ior (and (match_code "mult") + (match_test "power_of_two_operand (XEXP (op, 1), mode)")) + (and (match_code "ashift,ashiftrt") + (match_test "CONST_INT_P (XEXP (op, 1)) + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1)) < 32)"))) + (match_test "mode == GET_MODE (op)"))) + +;; True for MULT, to identify which variant of shift_operator is in use. +(define_special_predicate "mult_operator" + (match_code "mult")) + +;; True for operators that have 16-bit thumb variants. */ +(define_special_predicate "thumb_16bit_operator" + (match_code "plus,minus,and,ior,xor")) + +;; True for EQ & NE +(define_special_predicate "equality_operator" + (match_code "eq,ne")) + +;; True for integer comparisons and, if FP is active, for comparisons +;; other than LTGT or UNEQ. +(define_special_predicate "expandable_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu, + unordered,ordered,unlt,unle,unge,ungt")) + +;; Likewise, but only accept comparisons that are directly supported +;; by ARM condition codes. +(define_special_predicate "arm_comparison_operator" + (and (match_operand 0 "expandable_comparison_operator") + (match_test "maybe_get_arm_condition_code (op) != ARM_NV"))) + +(define_special_predicate "lt_ge_comparison_operator" + (match_code "lt,ge")) + +;; The vsel instruction only accepts the ARM condition codes listed below. +(define_special_predicate "arm_vsel_comparison_operator" + (and (match_operand 0 "expandable_comparison_operator") + (match_test "maybe_get_arm_condition_code (op) == ARM_GE + || maybe_get_arm_condition_code (op) == ARM_GT + || maybe_get_arm_condition_code (op) == ARM_EQ + || maybe_get_arm_condition_code (op) == ARM_VS + || maybe_get_arm_condition_code (op) == ARM_LT + || maybe_get_arm_condition_code (op) == ARM_LE + || maybe_get_arm_condition_code (op) == ARM_NE + || maybe_get_arm_condition_code (op) == ARM_VC"))) + +(define_special_predicate "arm_cond_move_operator" + (if_then_else (match_test "arm_restrict_it") + (and (match_test "TARGET_FPU_ARMV8") + (match_operand 0 "arm_vsel_comparison_operator")) + (match_operand 0 "expandable_comparison_operator"))) + +(define_special_predicate "noov_comparison_operator" + (match_code "lt,ge,eq,ne")) + +(define_special_predicate "minmax_operator" + (and (match_code "smin,smax,umin,umax") + (match_test "mode == GET_MODE (op)"))) + +(define_special_predicate "cc_register" + (and (match_code "reg") + (and (match_test "REGNO (op) == CC_REGNUM") + (ior (match_test "mode == GET_MODE (op)") + (match_test "mode == VOIDmode && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))) + +(define_special_predicate "dominant_cc_register" + (match_code "reg") +{ + if (mode == VOIDmode) + { + mode = GET_MODE (op); + + if (GET_MODE_CLASS (mode) != MODE_CC) + return false; + } + + return (cc_register (op, mode) + && (mode == CC_DNEmode + || mode == CC_DEQmode + || mode == CC_DLEmode + || mode == CC_DLTmode + || mode == CC_DGEmode + || mode == CC_DGTmode + || mode == CC_DLEUmode + || mode == CC_DLTUmode + || mode == CC_DGEUmode + || mode == CC_DGTUmode)); +}) + +(define_special_predicate "arm_extendqisi_mem_op" + (and (match_operand 0 "memory_operand") + (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode, + XEXP (op, 0), + SIGN_EXTEND, + 0) + : memory_address_p (QImode, XEXP (op, 0))"))) + +(define_special_predicate "arm_reg_or_extendqisi_mem_op" + (ior (match_operand 0 "arm_extendqisi_mem_op") + (match_operand 0 "s_register_operand"))) + +(define_predicate "power_of_two_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT value = INTVAL (op) & 0xffffffff; + + return value != 0 && (value & (value - 1)) == 0; +}) + +(define_predicate "nonimmediate_di_operand" + (match_code "reg,subreg,mem") +{ + if (s_register_operand (op, mode)) + return true; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return MEM_P (op) && memory_address_p (DImode, XEXP (op, 0)); +}) + +(define_predicate "di_operand" + (ior (match_code "const_int,const_double") + (and (match_code "reg,subreg,mem") + (match_operand 0 "nonimmediate_di_operand")))) + +(define_predicate "nonimmediate_soft_df_operand" + (match_code "reg,subreg,mem") +{ + if (s_register_operand (op, mode)) + return true; + + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + return MEM_P (op) && memory_address_p (DFmode, XEXP (op, 0)); +}) + +(define_predicate "soft_df_operand" + (ior (match_code "const_double") + (and (match_code "reg,subreg,mem") + (match_operand 0 "nonimmediate_soft_df_operand")))) + +(define_special_predicate "load_multiple_operation" + (match_code "parallel") +{ + return ldm_stm_operation_p (op, /*load=*/true, SImode, + /*consecutive=*/false, + /*return_pc=*/false); +}) + +(define_special_predicate "store_multiple_operation" + (match_code "parallel") +{ + return ldm_stm_operation_p (op, /*load=*/false, SImode, + /*consecutive=*/false, + /*return_pc=*/false); +}) + +(define_special_predicate "pop_multiple_return" + (match_code "parallel") +{ + return ldm_stm_operation_p (op, /*load=*/true, SImode, + /*consecutive=*/false, + /*return_pc=*/true); +}) + +(define_special_predicate "pop_multiple_fp" + (match_code "parallel") +{ + return ldm_stm_operation_p (op, /*load=*/true, DFmode, + /*consecutive=*/true, + /*return_pc=*/false); +}) + +(define_special_predicate "multi_register_push" + (match_code "parallel") +{ + if ((GET_CODE (XVECEXP (op, 0, 0)) != SET) + || (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != UNSPEC) + || (XINT (SET_SRC (XVECEXP (op, 0, 0)), 1) != UNSPEC_PUSH_MULT)) + return false; + + return true; +}) + +(define_predicate "push_mult_memory_operand" + (match_code "mem") +{ + /* ??? Given how PUSH_MULT is generated in the prologues, is there + any point in testing for thumb1 specially? All of the variants + use the same form. */ + if (TARGET_THUMB1) + { + /* ??? No attempt is made to represent STMIA, or validate that + the stack adjustment matches the register count. This is + true of the ARM/Thumb2 path as well. */ + rtx x = XEXP (op, 0); + if (GET_CODE (x) != PRE_MODIFY) + return false; + if (XEXP (x, 0) != stack_pointer_rtx) + return false; + x = XEXP (x, 1); + if (GET_CODE (x) != PLUS) + return false; + if (XEXP (x, 0) != stack_pointer_rtx) + return false; + return CONST_INT_P (XEXP (x, 1)); + } + + /* ARM and Thumb2 handle pre-modify in their legitimate_address. */ + return memory_operand (op, mode); +}) + +;;------------------------------------------------------------------------- +;; +;; Thumb predicates +;; + +(define_predicate "thumb1_cmp_operand" + (ior (and (match_code "reg,subreg") + (match_operand 0 "s_register_operand")) + (and (match_code "const_int") + (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 256")))) + +(define_predicate "thumb1_cmpneg_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) < 0 && INTVAL (op) > -256"))) + +;; Return TRUE if a result can be stored in OP without clobbering the +;; condition code register. Prior to reload we only accept a +;; register. After reload we have to be able to handle memory as +;; well, since a pseudo may not get a hard reg and reload cannot +;; handle output-reloads on jump insns. + +;; We could possibly handle mem before reload as well, but that might +;; complicate things with the need to handle increment +;; side-effects. +(define_predicate "thumb_cbrch_target_operand" + (and (match_code "reg,subreg,mem") + (ior (match_operand 0 "s_register_operand") + (and (match_test "reload_in_progress || reload_completed") + (match_operand 0 "memory_operand"))))) + +;;------------------------------------------------------------------------- +;; +;; iWMMXt predicates +;; + +(define_predicate "imm_or_reg_operand" + (ior (match_operand 0 "immediate_operand") + (match_operand 0 "register_operand"))) + +;; Neon predicates + +(define_predicate "const_multiple_of_8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT val = INTVAL (op); + return (val & 7) == 0; +}) + +(define_predicate "imm_for_neon_mov_operand" + (match_code "const_vector,const_int") +{ + return neon_immediate_valid_for_move (op, mode, NULL, NULL); +}) + +(define_predicate "imm_for_neon_lshift_operand" + (match_code "const_vector") +{ + return neon_immediate_valid_for_shift (op, mode, NULL, NULL, true); +}) + +(define_predicate "imm_for_neon_rshift_operand" + (match_code "const_vector") +{ + return neon_immediate_valid_for_shift (op, mode, NULL, NULL, false); +}) + +(define_predicate "imm_lshift_or_reg_neon" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "imm_for_neon_lshift_operand"))) + +(define_predicate "imm_rshift_or_reg_neon" + (ior (match_operand 0 "s_register_operand") + (match_operand 0 "imm_for_neon_rshift_operand"))) + +;; Predicates for named expanders that overlap multiple ISAs. + +(define_predicate "cmpdi_operand" + (and (match_test "TARGET_32BIT") + (match_operand 0 "arm_di_operand"))) + +;; True if the operand is memory reference suitable for a ldrex/strex. +(define_predicate "arm_sync_memory_operand" + (and (match_operand 0 "memory_operand") + (match_code "reg" "0"))) + +;; Predicates for parallel expanders based on mode. +(define_special_predicate "vect_par_constant_high" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (!CONST_INT_P (elt)) + return false; + + val = INTVAL (elt); + if (val != (base/2) + i) + return false; + } + return true; +}) + +(define_special_predicate "vect_par_constant_low" + (match_code "parallel") +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + int i; + int base = GET_MODE_NUNITS (mode); + + if ((count < 1) + || (count != base/2)) + return false; + + if (!VECTOR_MODE_P (mode)) + return false; + + for (i = 0; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i); + int val; + + if (!CONST_INT_P (elt)) + return false; + + val = INTVAL (elt); + if (val != i) + return false; + } + return true; +}) + +(define_predicate "const_double_vcvt_power_of_two_reciprocal" + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP + && vfp3_const_double_for_fract_bits (op)"))) + +(define_predicate "const_double_vcvt_power_of_two" + (and (match_code "const_double") + (match_test "TARGET_32BIT && TARGET_VFP + && vfp3_const_double_for_bits (op)"))) + +(define_predicate "neon_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)"))) + +(define_predicate "neon_permissive_struct_operand" + (and (match_code "mem") + (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, false)"))) + +(define_predicate "neon_perm_struct_or_reg_operand" + (ior (match_operand 0 "neon_permissive_struct_operand") + (match_operand 0 "s_register_operand"))) + +(define_special_predicate "add_operator" + (match_code "plus")) + +(define_predicate "mem_noofs_operand" + (and (match_code "mem") + (match_code "reg" "0"))) + +(define_predicate "call_insn_operand" + (ior (match_code "symbol_ref") + (match_operand 0 "s_register_operand"))) diff --git a/gcc-4.9/gcc/config/arm/rtems-eabi.h b/gcc-4.9/gcc/config/arm/rtems-eabi.h new file mode 100644 index 000000000..4bdcf0d87 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/rtems-eabi.h @@ -0,0 +1,29 @@ +/* Definitions for RTEMS based ARM systems using EABI. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#define HAS_INIT_SECTION + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + TARGET_BPABI_CPP_BUILTINS(); \ + } while (0) diff --git a/gcc-4.9/gcc/config/arm/semi.h b/gcc-4.9/gcc/config/arm/semi.h new file mode 100644 index 000000000..f937e47b9 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/semi.h @@ -0,0 +1,68 @@ +/* Definitions of target machine for GNU compiler. ARM on semi-hosted platform + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributed by Richard Earnshaw (richard.earnshaw@arm.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#define STARTFILE_SPEC "crt0.o%s" + +#ifndef LIB_SPEC +#define LIB_SPEC "-lc" +#endif + +#ifndef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__semi__" +#endif + +#ifndef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} -X" +#endif + +#ifndef TARGET_DEFAULT_FLOAT_ABI +#define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_HARD +#endif + +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_APCS_FRAME) +#endif + +#ifndef SUBTARGET_EXTRA_SPECS +#define SUBTARGET_EXTRA_SPECS \ + { "subtarget_extra_asm_spec", SUBTARGET_EXTRA_ASM_SPEC }, +#endif + +#ifndef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC "" +#endif + +/* The compiler supports PIC code generation, even though the binutils + may not. If we are asked to compile position independent code, we + always pass -k to the assembler. If it doesn't recognize it, then + it will barf, which probably means that it doesn't know how to + assemble PIC code. This is what we want, since otherwise tools + may incorrectly assume we support PIC compilation even if the + binutils can't. */ +#ifndef ASM_SPEC +#define ASM_SPEC "\ +%{fpic|fpie: -k} %{fPIC|fPIE: -k} \ +%{mbig-endian:-EB} \ +%(arm_cpu_spec) \ +%{mapcs-float:-mfloat} \ +%{mfloat-abi=*} %{mfpu=*} \ +%{mthumb-interwork:-mthumb-interwork} \ +%(subtarget_extra_asm_spec)" +#endif diff --git a/gcc-4.9/gcc/config/arm/symbian.h b/gcc-4.9/gcc/config/arm/symbian.h new file mode 100644 index 000000000..777742d6e --- /dev/null +++ b/gcc-4.9/gcc/config/arm/symbian.h @@ -0,0 +1,101 @@ +/* Configuration file for Symbian OS on ARM processors. + Copyright (C) 2004-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Do not expand builtin functions (unless explicitly prefixed with + "__builtin"). Symbian OS code relies on properties of the standard + library that go beyond those guaranteed by the ANSI/ISO standard. + For example, "memcpy" works even with overlapping memory, like + "memmove". We cannot simply set flag_no_builtin in arm.c because + (a) flag_no_builtin is not declared in language-independent code, + and (b) that would prevent users from explicitly overriding the + default with -fbuiltin, which may sometimes be useful. + + Make all symbols hidden by default. Symbian OS expects that all + exported symbols will be explicitly marked with + "__declspec(dllexport)". + + Enumeration types use 4 bytes, even if the enumerals are small, + unless explicitly overridden. + + The wchar_t type is a 2-byte type, unless explicitly + overridden. */ +#define CC1_SPEC \ + "%{!fbuiltin:%{!fno-builtin:-fno-builtin}} " \ + "%{!fvisibility=*:-fvisibility=hidden} " \ + "%{!fshort-enums:%{!fno-short-enums:-fno-short-enums}} " \ + "%{!fshort-wchar:%{!fno-short-wchar:-fshort-wchar}} " +#define CC1PLUS_SPEC CC1_SPEC + +/* Symbian OS does not use crt*.o, unlike the generic unknown-elf + configuration. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "" + +/* Do not link with any libraries by default. On Symbian OS, the user + must supply all required libraries on the command line. */ +#undef LIB_SPEC +#define LIB_SPEC "" + +/* Support the "dllimport" attribute. */ +#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1 + +/* Symbian OS assumes ARM V5 or above. Since -march=armv5 is + equivalent to making the ARM 10TDMI core the default, we can set + SUBTARGET_CPU_DEFAULT and get an equivalent effect. */ +#undef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm10tdmi + +/* The assembler should assume VFP FPU format, and armv5t. */ +#undef SUBTARGET_ASM_FLOAT_SPEC +#define SUBTARGET_ASM_FLOAT_SPEC \ + "%{!mfpu=*:-mfpu=vfp} %{!mcpu=*:%{!march=*:-march=armv5t}}" + +/* Define the __symbian__ macro. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + /* Include the default BPABI stuff. */ \ + TARGET_BPABI_CPP_BUILTINS (); \ + /* Symbian OS does not support merging symbols across DLL \ + boundaries. */ \ + builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0"); \ + builtin_define ("__symbian__"); \ + } \ + while (false) + +/* On SymbianOS, these sections are not writable, so we use "a", + rather than "aw", for the section attributes. */ +#undef ARM_EABI_CTORS_SECTION_OP +#define ARM_EABI_CTORS_SECTION_OP \ + "\t.section\t.init_array,\"a\",%init_array" +#undef ARM_EABI_DTORS_SECTION_OP +#define ARM_EABI_DTORS_SECTION_OP \ + "\t.section\t.fini_array,\"a\",%fini_array" + +/* SymbianOS cannot merge entities with vague linkage at runtime. */ +#define TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P false + +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 + +#define ARM_TARGET2_DWARF_FORMAT DW_EH_PE_absptr diff --git a/gcc-4.9/gcc/config/arm/sync.md b/gcc-4.9/gcc/config/arm/sync.md new file mode 100644 index 000000000..aa8e9abcf --- /dev/null +++ b/gcc-4.9/gcc/config/arm/sync.md @@ -0,0 +1,472 @@ +;; Machine description for ARM processor synchronization primitives. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com) +;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +(define_mode_attr sync_predtab + [(QI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER") + (HI "TARGET_HAVE_LDREXBH && TARGET_HAVE_MEMORY_BARRIER") + (SI "TARGET_HAVE_LDREX && TARGET_HAVE_MEMORY_BARRIER") + (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN + && TARGET_HAVE_MEMORY_BARRIER")]) + +(define_code_iterator syncop [plus minus ior xor and]) + +(define_code_attr sync_optab + [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")]) + +(define_mode_attr sync_sfx + [(QI "b") (HI "h") (SI "") (DI "d")]) + +(define_expand "memory_barrier" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + "TARGET_HAVE_MEMORY_BARRIER" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_barrier" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + "TARGET_HAVE_MEMORY_BARRIER" + { + if (TARGET_HAVE_DMB) + { + /* Note we issue a system level barrier. We should consider issuing + a inner shareabilty zone barrier here instead, ie. "DMB ISH". */ + /* ??? Differentiate based on SEQ_CST vs less strict? */ + return "dmb\tsy"; + } + + if (TARGET_HAVE_DMB_MCR) + return "mcr\tp15, 0, r0, c7, c10, 5"; + + gcc_unreachable (); + } + [(set_attr "length" "4") + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")]) + +(define_insn "atomic_load" + [(set (match_operand:QHSI 0 "register_operand" "=r") + (unspec_volatile:QHSI + [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q") + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_LDA))] + "TARGET_HAVE_LDACQ" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_RELEASE) + return \"ldr\\t%0, %1\"; + else + return \"lda\\t%0, %1\"; + } +) + +(define_insn "atomic_store" + [(set (match_operand:QHSI 0 "memory_operand" "=Q") + (unspec_volatile:QHSI + [(match_operand:QHSI 1 "general_operand" "r") + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_STL))] + "TARGET_HAVE_LDACQ" + { + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + if (model == MEMMODEL_RELAXED + || model == MEMMODEL_CONSUME + || model == MEMMODEL_ACQUIRE) + return \"str\t%1, %0\"; + else + return \"stl\t%1, %0\"; + } +) + +;; Note that ldrd and vldr are *not* guaranteed to be single-copy atomic, +;; even for a 64-bit aligned address. Instead we use a ldrexd unparied +;; with a store. +(define_expand "atomic_loaddi" + [(match_operand:DI 0 "s_register_operand") ;; val out + (match_operand:DI 1 "mem_noofs_operand") ;; memory + (match_operand:SI 2 "const_int_operand")] ;; model + "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN" +{ + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + expand_mem_thread_fence (model); + emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); + if (model == MEMMODEL_SEQ_CST) + expand_mem_thread_fence (model); + DONE; +}) + +(define_insn "atomic_loaddi_1" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + UNSPEC_LL))] + "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN" + "ldrexd%?\t%0, %H0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "s_register_operand" "") ;; bool out + (match_operand:QHSD 1 "s_register_operand" "") ;; val out + (match_operand:QHSD 2 "mem_noofs_operand" "") ;; memory + (match_operand:QHSD 3 "general_operand" "") ;; expected + (match_operand:QHSD 4 "s_register_operand" "") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f + "" +{ + arm_expand_compare_and_swap (operands); + DONE; +}) + +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (reg:CC_Z CC_REGNUM) ;; bool out + (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS)) + (set (match_operand:SI 0 "s_register_operand" "=&r") ;; val out + (zero_extend:SI + (match_operand:NARROW 1 "mem_noofs_operand" "+Ua"))) ;; memory + (set (match_dup 1) + (unspec_volatile:NARROW + [(match_operand:SI 2 "arm_add_operand" "rIL") ;; expected + (match_operand:NARROW 3 "s_register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + VUNSPEC_ATOMIC_CAS)) + (clobber (match_scratch:SI 7 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_compare_and_swap (operands); + DONE; + }) + +(define_mode_attr cas_cmp_operand + [(SI "arm_add_operand") (DI "cmpdi_operand")]) +(define_mode_attr cas_cmp_str + [(SI "rIL") (DI "rDi")]) + +(define_insn_and_split "atomic_compare_and_swap_1" + [(set (reg:CC_Z CC_REGNUM) ;; bool out + (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS)) + (set (match_operand:SIDI 0 "s_register_operand" "=&r") ;; val out + (match_operand:SIDI 1 "mem_noofs_operand" "+Ua")) ;; memory + (set (match_dup 1) + (unspec_volatile:SIDI + [(match_operand:SIDI 2 "" "") ;; expect + (match_operand:SIDI 3 "s_register_operand" "r") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + VUNSPEC_ATOMIC_CAS)) + (clobber (match_scratch:SI 7 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_compare_and_swap (operands); + DONE; + }) + +(define_insn_and_split "atomic_exchange" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") ;; output + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) ;; memory + (set (match_dup 1) + (unspec_volatile:QHSD + [(match_operand:QHSD 2 "s_register_operand" "r") ;; input + (match_operand:SI 3 "const_int_operand" "")] ;; model + VUNSPEC_ATOMIC_XCHG)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (SET, operands[0], NULL, operands[1], + operands[2], operands[3], operands[4]); + DONE; + }) + +(define_mode_attr atomic_op_operand + [(QI "reg_or_int_operand") + (HI "reg_or_int_operand") + (SI "reg_or_int_operand") + (DI "s_register_operand")]) + +(define_mode_attr atomic_op_str + [(QI "rn") (HI "rn") (SI "rn") (DI "r")]) + +(define_insn_and_split "atomic_" + [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua") + (unspec_volatile:QHSD + [(syncop:QHSD (match_dup 0) + (match_operand:QHSD 1 "" "")) + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:QHSD 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + }) + +(define_insn_and_split "atomic_nand" + [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua") + (unspec_volatile:QHSD + [(not:QHSD + (and:QHSD (match_dup 0) + (match_operand:QHSD 1 "" ""))) + (match_operand:SI 2 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:QHSD 3 "=&r")) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (NOT, NULL, operands[3], operands[0], + operands[1], operands[2], operands[4]); + DONE; + }) + +(define_insn_and_split "atomic_fetch_" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) + (set (match_dup 1) + (unspec_volatile:QHSD + [(syncop:QHSD (match_dup 1) + (match_operand:QHSD 2 "" "")) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:QHSD 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (, operands[0], operands[4], operands[1], + operands[2], operands[3], operands[5]); + DONE; + }) + +(define_insn_and_split "atomic_fetch_nand" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) + (set (match_dup 1) + (unspec_volatile:QHSD + [(not:QHSD + (and:QHSD (match_dup 1) + (match_operand:QHSD 2 "" ""))) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:QHSD 4 "=&r")) + (clobber (match_scratch:SI 5 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (NOT, operands[0], operands[4], operands[1], + operands[2], operands[3], operands[5]); + DONE; + }) + +(define_insn_and_split "atomic__fetch" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (syncop:QHSD + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua") + (match_operand:QHSD 2 "" ""))) + (set (match_dup 1) + (unspec_volatile:QHSD + [(match_dup 1) (match_dup 2) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (, NULL, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }) + +(define_insn_and_split "atomic_nand_fetch" + [(set (match_operand:QHSD 0 "s_register_operand" "=&r") + (not:QHSD + (and:QHSD + (match_operand:QHSD 1 "mem_noofs_operand" "+Ua") + (match_operand:QHSD 2 "" "")))) + (set (match_dup 1) + (unspec_volatile:QHSD + [(match_dup 1) (match_dup 2) + (match_operand:SI 3 "const_int_operand")] ;; model + VUNSPEC_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + arm_split_atomic_op (NOT, NULL, operands[0], operands[1], + operands[2], operands[3], operands[4]); + DONE; + }) + +(define_insn "arm_load_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (unspec_volatile:NARROW + [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL)))] + "TARGET_HAVE_LDREXBH" + "ldrex%?\t%0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_load_acquire_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (zero_extend:SI + (unspec_volatile:NARROW + [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX)))] + "TARGET_HAVE_LDACQ" + "ldaex%?\\t%0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_load_exclusivesi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec_volatile:SI + [(match_operand:SI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL))] + "TARGET_HAVE_LDREX" + "ldrex%?\t%0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_load_acquire_exclusivesi" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (unspec_volatile:SI + [(match_operand:SI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX))] + "TARGET_HAVE_LDACQ" + "ldaex%?\t%0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_load_exclusivedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec_volatile:DI + [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LL))] + "TARGET_HAVE_LDREXD" + "ldrexd%?\t%0, %H0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_load_acquire_exclusivedi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (unspec_volatile:DI + [(match_operand:DI 1 "mem_noofs_operand" "Ua")] + VUNSPEC_LAX))] + "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" + "ldaexd%?\t%0, %H0, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_store_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SC)) + (set (match_operand:QHSD 1 "mem_noofs_operand" "=Ua") + (unspec_volatile:QHSD + [(match_operand:QHSD 2 "s_register_operand" "r")] + VUNSPEC_SC))] + "" + { + if (mode == DImode) + { + rtx value = operands[2]; + /* The restrictions on target registers in ARM mode are that the two + registers are consecutive and the first one is even; Thumb is + actually more flexible, but DI should give us this anyway. + Note that the 1st register always gets the lowest word in memory. */ + gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); + operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); + return "strexd%?\t%0, %2, %3, %C1"; + } + return "strex%?\t%0, %2, %C1"; + } + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_store_release_exclusivedi" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX)) + (set (match_operand:DI 1 "mem_noofs_operand" "=Ua") + (unspec_volatile:DI + [(match_operand:DI 2 "s_register_operand" "r")] + VUNSPEC_SLX))] + "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" + { + rtx value = operands[2]; + /* See comment in arm_store_exclusive above. */ + gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); + operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); + return "stlexd%?\t%0, %2, %3, %C1"; + } + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) + +(define_insn "arm_store_release_exclusive" + [(set (match_operand:SI 0 "s_register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX)) + (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua") + (unspec_volatile:QHSI + [(match_operand:QHSI 2 "s_register_operand" "r")] + VUNSPEC_SLX))] + "TARGET_HAVE_LDACQ" + "stlex%?\t%0, %2, %C1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no")]) diff --git a/gcc-4.9/gcc/config/arm/t-aprofile b/gcc-4.9/gcc/config/arm/t-aprofile new file mode 100644 index 000000000..b968711c1 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-aprofile @@ -0,0 +1,178 @@ +# Copyright (C) 2012-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# This is a target makefile fragment that attempts to get +# multilibs built for the range of CPU's, FPU's and ABI's that +# are relevant for the A-profile architecture. It should +# not be used in conjunction with another make file fragment and +# assumes --with-arch, --with-cpu, --with-fpu, --with-float, --with-mode +# have their default values during the configure step. We enforce +# this during the top-level configury. + +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = +MULTILIB_REUSE = + +# We have the following hierachy: +# ISA: A32 (.) or T32 (thumb) +# Architecture: ARMv7-A (v7-a), ARMv7VE (v7ve), or ARMv8-A (v8-a). +# FPU: VFPv3-D16 (fpv3), NEONv1 (simdv1), VFPv4-D16 (fpv4), +# NEON-VFPV4 (simdvfpv4), NEON for ARMv8 (simdv8), or None (.). +# Float-abi: Soft (.), softfp (softfp), or hard (hardfp). + +MULTILIB_OPTIONS += mthumb +MULTILIB_DIRNAMES += thumb + +MULTILIB_OPTIONS += march=armv7-a/march=armv7ve/march=armv8-a +MULTILIB_DIRNAMES += v7-a v7ve v8-a + +MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=neon/mfpu=vfpv4-d16/mfpu=neon-vfpv4/mfpu=neon-fp-armv8 +MULTILIB_DIRNAMES += fpv3 simdv1 fpv4 simdvfpv4 simdv8 + +MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard +MULTILIB_DIRNAMES += softfp hard + +# We don't build no-float libraries with an FPU. +MULTILIB_EXCEPTIONS += *mfpu=vfpv3-d16 +MULTILIB_EXCEPTIONS += *mfpu=neon +MULTILIB_EXCEPTIONS += *mfpu=vfpv4-d16 +MULTILIB_EXCEPTIONS += *mfpu=neon-vfpv4 +MULTILIB_EXCEPTIONS += *mfpu=neon-fp-armv8 + +# We don't build libraries requiring an FPU at the CPU/Arch/ISA level. +MULTILIB_EXCEPTIONS += mfloat-abi=* +MULTILIB_EXCEPTIONS += mfpu=* +MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=* +MULTILIB_EXCEPTIONS += mthumb/mfpu=* +MULTILIB_EXCEPTIONS += *march=armv7-a/mfloat-abi=* +MULTILIB_EXCEPTIONS += *march=armv7ve/mfloat-abi=* +MULTILIB_EXCEPTIONS += *march=armv8-a/mfloat-abi=* + +# Ensure the correct FPU variants apply to the correct base architectures. +MULTILIB_EXCEPTIONS += *march=armv7ve/*mfpu=vfpv3-d16* +MULTILIB_EXCEPTIONS += *march=armv7ve/*mfpu=neon/* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=vfpv3-d16* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=neon/* +MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=vfpv4-d16* +MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=neon-vfpv4* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=vfpv4-d16* +MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=neon-vfpv4* +MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=neon-fp-armv8* +MULTILIB_EXCEPTIONS += *march=armv7ve/*mfpu=neon-fp-armv8* + +# CPU Matches +MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8 +MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9 +MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5 +MULTILIB_MATCHES += march?armv7ve=mcpu?cortex-a15=mcpu?cortex-a12 +MULTILIB_MATCHES += march?armv7ve=mcpu?cortex-a15.cortex-a7 +MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a53 +MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57 +MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57.cortex-a53 + +# FPU matches +MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 +MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16 +MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16-d16 +MULTILIB_MATCHES += mfpu?vfpv4-d16=mfpu?vfpv4 +MULTILIB_MATCHES += mfpu?neon-fp-armv8=mfpu?crypto-neon-fp-armv8 + + +# Map all requests for vfpv3 with a later CPU to vfpv3-d16 v7-a. +# So if new CPUs are added above at the newer architecture levels, +# do something to map them below here. +# We take the approach of mapping down to v7-a regardless of what +# the fp option is if the integer architecture brings things down. +# This applies to any similar combination at the v7ve and v8-a arch +# levels. + +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp + + +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7ve/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7ve/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv8-a/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv8-a/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp + + +MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv7ve/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv7ve/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp + + +MULTILIB_REUSE += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.softfp + + + +# And again for mthumb. + +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp + + +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.neon/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.neon/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp + + +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.fp-armv8/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp + + +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.neon-vfpv4/mfloat-abi.softfp +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.hard +MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.neon-vfpv4/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.neon-fp-armv8/mfloat-abi.softfp diff --git a/gcc-4.9/gcc/config/arm/t-arm b/gcc-4.9/gcc/config/arm/t-arm new file mode 100644 index 000000000..99bd696e4 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-arm @@ -0,0 +1,100 @@ +# Rules common to all arm targets +# +# Copyright (C) 2004-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +TM_H += $(srcdir)/config/arm/arm-cores.def +OPTIONS_H_EXTRA += $(srcdir)/config/arm/arm-cores.def + +# All md files - except for arm.md. +# This list should be kept in alphabetical order and updated whenever an md +# file is added or removed. +MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ + $(srcdir)/config/arm/arm1026ejs.md \ + $(srcdir)/config/arm/arm1136jfs.md \ + $(srcdir)/config/arm/arm926ejs.md \ + $(srcdir)/config/arm/arm-fixed.md \ + $(srcdir)/config/arm/arm-generic.md \ + $(srcdir)/config/arm/arm-tune.md \ + $(srcdir)/config/arm/constraints.md \ + $(srcdir)/config/arm/cortex-a15.md \ + $(srcdir)/config/arm/cortex-a5.md \ + $(srcdir)/config/arm/cortex-a7.md \ + $(srcdir)/config/arm/cortex-a8.md \ + $(srcdir)/config/arm/cortex-a8-neon.md \ + $(srcdir)/config/arm/cortex-a9.md \ + $(srcdir)/config/arm/cortex-a9-neon.md \ + $(srcdir)/config/arm/cortex-a53.md \ + $(srcdir)/config/arm/cortex-m4-fpu.md \ + $(srcdir)/config/arm/cortex-m4.md \ + $(srcdir)/config/arm/cortex-r4f.md \ + $(srcdir)/config/arm/cortex-r4.md \ + $(srcdir)/config/arm/fa526.md \ + $(srcdir)/config/arm/fa606te.md \ + $(srcdir)/config/arm/fa626te.md \ + $(srcdir)/config/arm/fa726te.md \ + $(srcdir)/config/arm/fmp626.md \ + $(srcdir)/config/arm/iterators.md \ + $(srcdir)/config/arm/iwmmxt.md \ + $(srcdir)/config/arm/iwmmxt2.md \ + $(srcdir)/config/arm/ldmstm.md \ + $(srcdir)/config/arm/ldrdstrd.md \ + $(srcdir)/config/arm/marvell-f-iwmmxt.md \ + $(srcdir)/config/arm/neon.md \ + $(srcdir)/config/arm/predicates.md \ + $(srcdir)/config/arm/sync.md \ + $(srcdir)/config/arm/thumb2.md \ + $(srcdir)/config/arm/vec-common.md \ + $(srcdir)/config/arm/vfp11.md \ + $(srcdir)/config/arm/vfp.md + +s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ + s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) + +$(srcdir)/config/arm/arm-tune.md: $(srcdir)/config/arm/gentune.sh \ + $(srcdir)/config/arm/arm-cores.def + $(SHELL) $(srcdir)/config/arm/gentune.sh \ + $(srcdir)/config/arm/arm-cores.def > \ + $(srcdir)/config/arm/arm-tune.md + +$(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \ + $(srcdir)/config/arm/arm-cores.def $(srcdir)/config/arm/arm-arches.def \ + $(srcdir)/config/arm/arm-fpus.def + $(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \ + $(srcdir)/config/arm/arm-tables.opt + +aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/aarch-common.c + +arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \ + insn-config.h conditions.h output.h dumpfile.h \ + $(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \ + $(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \ + $(GGC_H) except.h $(C_PRAGMA_H) $(TM_P_H) \ + $(TARGET_H) $(TARGET_DEF_H) debug.h langhooks.h $(DF_H) \ + intl.h libfuncs.h $(PARAMS_H) $(OPTS_H) $(srcdir)/config/arm/arm-cores.def \ + $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \ + $(srcdir)/config/arm/arm_neon_builtins.def + +arm-c.o: $(srcdir)/config/arm/arm-c.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arm/arm-c.c diff --git a/gcc-4.9/gcc/config/arm/t-arm-elf b/gcc-4.9/gcc/config/arm/t-arm-elf new file mode 100644 index 000000000..8ef6b04ff --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-arm-elf @@ -0,0 +1,90 @@ +# Copyright (C) 1998-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = marm/mthumb +MULTILIB_DIRNAMES = arm thumb +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = + +#MULTILIB_OPTIONS += mcpu=fa526/mcpu=fa626/mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te +#MULTILIB_DIRNAMES += fa526 fa626 fa606te fa626te fmp626 fa726te +#MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=fa526 *mthumb*/*mcpu=fa626 + +#MULTILIB_OPTIONS += march=armv7 +#MULTILIB_DIRNAMES += thumb2 +#MULTILIB_EXCEPTIONS += march=armv7* marm/*march=armv7* +#MULTILIB_MATCHES += march?armv7=march?armv7-a +#MULTILIB_MATCHES += march?armv7=march?armv7-r +#MULTILIB_MATCHES += march?armv7=march?armv7-m +#MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 +#MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 +#MULTILIB_MATCHES += march?armv7=mcpu?cortex-m3 + +# Not quite true. We can support hard-vfp calling in Thumb2, but how do we +# express that here? Also, we really need architecture v5e or later +# (mcrr etc). +MULTILIB_OPTIONS += mfloat-abi=hard +MULTILIB_DIRNAMES += fpu +MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* +#MULTILIB_EXCEPTIONS += *mcpu=fa526/*mfloat-abi=hard* +#MULTILIB_EXCEPTIONS += *mcpu=fa626/*mfloat-abi=hard* + +# MULTILIB_OPTIONS += mcpu=ep9312 +# MULTILIB_DIRNAMES += ep9312 +# MULTILIB_EXCEPTIONS += *mthumb/*mcpu=ep9312* +# +# MULTILIB_OPTIONS += mlittle-endian/mbig-endian +# MULTILIB_DIRNAMES += le be +# MULTILIB_MATCHES += mbig-endian=mbe mlittle-endian=mle +# +# MULTILIB_OPTIONS += mfloat-abi=hard/mfloat-abi=soft +# MULTILIB_DIRNAMES += fpu soft +# MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* +# +# MULTILIB_OPTIONS += mno-thumb-interwork/mthumb-interwork +# MULTILIB_DIRNAMES += normal interwork +# +# MULTILIB_OPTIONS += fno-leading-underscore/fleading-underscore +# MULTILIB_DIRNAMES += elf under +# +# MULTILIB_OPTIONS += mcpu=arm7 +# MULTILIB_DIRNAMES += nofmult +# MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=arm7* +# # Note: the multilib_exceptions matches both -mthumb and +# # -mthumb-interwork +# # +# # We have to match all the arm cpu variants which do not have the +# # multiply instruction and treat them as if the user had specified +# # -mcpu=arm7. Note that in the following the ? is interpreted as +# # an = for the purposes of matching command line options. +# # FIXME: There ought to be a better way to do this. +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7d +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7di +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm70 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm700 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm700i +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm710 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm710c +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7100 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7500 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm7500fe +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm6 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm60 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm600 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm610 +# MULTILIB_MATCHES += mcpu?arm7=mcpu?arm620 diff --git a/gcc-4.9/gcc/config/arm/t-bpabi b/gcc-4.9/gcc/config/arm/t-bpabi new file mode 100644 index 000000000..ef019ea37 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-bpabi @@ -0,0 +1 @@ +EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h diff --git a/gcc-4.9/gcc/config/arm/t-linux-androideabi b/gcc-4.9/gcc/config/arm/t-linux-androideabi new file mode 100644 index 000000000..8f1307c55 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-linux-androideabi @@ -0,0 +1,10 @@ +MULTILIB_OPTIONS = march=armv7-a mthumb +MULTILIB_DIRNAMES = armv7-a thumb +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = +MULTILIB_OSDIRNAMES = + +# The "special" multilib can be used to build native applications for Android, +# as opposed to native shared libraries that are then called via JNI. +#MULTILIB_OPTIONS += tno-android-cc +#MULTILIB_DIRNAMES += special diff --git a/gcc-4.9/gcc/config/arm/t-linux-eabi b/gcc-4.9/gcc/config/arm/t-linux-eabi new file mode 100644 index 000000000..1087914b5 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-linux-eabi @@ -0,0 +1,31 @@ +# Copyright (C) 2005-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# We do not build a Thumb multilib for Linux because the definition of +# CLEAR_INSN_CACHE in linux-gas.h does not work in Thumb mode. +# If you set MULTILIB_OPTIONS to a non-empty value you should also set +# MULTILIB_DEFAULTS in linux-elf.h. +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = + +#MULTILIB_OPTIONS += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te +#MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te +#MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te* + +ARM_EB = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),eb) +MULTIARCH_DIRNAME = $(call if_multiarch,arm$(ARM_EB)-linux-gnueabi$(if $(filter hard,$(with_float)),hf)) diff --git a/gcc-4.9/gcc/config/arm/t-rtems-eabi b/gcc-4.9/gcc/config/arm/t-rtems-eabi new file mode 100644 index 000000000..d81fbf7ec --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-rtems-eabi @@ -0,0 +1,47 @@ +# Custom RTEMS EABI multilibs + +MULTILIB_OPTIONS = mthumb march=armv6-m/march=armv7-a/march=armv7-r/march=armv7-m mfpu=neon mfloat-abi=hard +MULTILIB_DIRNAMES = thumb armv6-m armv7-a armv7-r armv7-m neon hard + +# Enumeration of multilibs + +MULTILIB_EXCEPTIONS = +MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=neon +MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfloat-abi=hard +# MULTILIB_EXCEPTIONS += mthumb/march=armv6-m +# MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=neon +MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfloat-abi=hard +# MULTILIB_EXCEPTIONS += mthumb/march=armv7-a +MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=neon +MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfloat-abi=hard +# MULTILIB_EXCEPTIONS += mthumb/march=armv7-r +MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=neon +MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfloat-abi=hard +# MULTILIB_EXCEPTIONS += mthumb/march=armv7-m +MULTILIB_EXCEPTIONS += mthumb/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mthumb/mfpu=neon +MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=hard +# MULTILIB_EXCEPTIONS += mthumb +MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=neon +MULTILIB_EXCEPTIONS += march=armv6-m/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv6-m +MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon +MULTILIB_EXCEPTIONS += march=armv7-a/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv7-a +MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=neon +MULTILIB_EXCEPTIONS += march=armv7-r/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv7-r +MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=neon +MULTILIB_EXCEPTIONS += march=armv7-m/mfloat-abi=hard +MULTILIB_EXCEPTIONS += march=armv7-m +MULTILIB_EXCEPTIONS += mfpu=neon/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mfpu=neon +MULTILIB_EXCEPTIONS += mfloat-abi=hard diff --git a/gcc-4.9/gcc/config/arm/t-symbian b/gcc-4.9/gcc/config/arm/t-symbian new file mode 100644 index 000000000..35ee02889 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-symbian @@ -0,0 +1,26 @@ +# Copyright (C) 2004-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h + +# Create a multilib for processors with VFP floating-point, and a +# multilib for those without -- using the soft-float ABI in both +# cases. Symbian OS object should be compiled with interworking +# enabled, so there are no separate thumb-mode libraries. +MULTILIB_OPTIONS = mfloat-abi=softfp +MULTILIB_DIRNAMES = softfp diff --git a/gcc-4.9/gcc/config/arm/t-vxworks b/gcc-4.9/gcc/config/arm/t-vxworks new file mode 100644 index 000000000..802d8e4bd --- /dev/null +++ b/gcc-4.9/gcc/config/arm/t-vxworks @@ -0,0 +1,24 @@ +# Copyright (C) 2003-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = \ + mrtp fPIC \ + t4/t4be/t4t/t4tbe/t5/t5be/t5t/t5tbe/tstrongarm/txscale/txscalebe +MULTILIB_MATCHES = fPIC=fpic +# Don't build -fPIC multilibs for kernel or Thumb code. +MULTILIB_EXCEPTIONS = fPIC* mrtp/fPIC/*t[45]t* diff --git a/gcc-4.9/gcc/config/arm/thumb2.md b/gcc-4.9/gcc/config/arm/thumb2.md new file mode 100644 index 000000000..d84938f30 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/thumb2.md @@ -0,0 +1,1495 @@ +;; ARM Thumb-2 Machine Description +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery, LLC. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; Note: Thumb-2 is the variant of the Thumb architecture that adds +;; 32-bit encodings of [almost all of] the Arm instruction set. +;; Some old documents refer to the relatively minor interworking +;; changes made in armv5t as "thumb2". These are considered part +;; the 16-bit Thumb-1 instruction set. + +;; Thumb-2 only allows shift by constant on data processing instructions +(define_insn "*thumb_andsi_not_shiftsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (and:SI (not:SI (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "const_int_operand" "M")])) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_THUMB2" + "bic%?\\t%0, %1, %2%S4" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "2") + (set_attr "type" "alu_shift_imm")] +) + +;; We use the '0' constraint for operand 1 because reload should +;; be smart enough to generate an appropriate move for the r/r/r case. +(define_insn_and_split "*thumb2_smaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (smax:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + ; cmp\\t%1, %2\;it\\tlt\;movlt\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (lt:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "length" "6,6,10") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_sminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (smin:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + ; cmp\\t%1, %2\;it\\tge\;movge\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (ge:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "length" "6,6,10") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb32_umaxsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (umax:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + ; cmp\\t%1, %2\;it\\tcc\;movcc\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (ltu:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "length" "6,6,10") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_uminsi3" + [(set (match_operand:SI 0 "s_register_operand" "=r,l,r") + (umin:SI (match_operand:SI 1 "s_register_operand" "%0,0,0") + (match_operand:SI 2 "arm_rhs_operand" "r,Py,I"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + ; cmp\\t%1, %2\;it\\tcs\;movcs\\t%0, %2 + "TARGET_THUMB2 && reload_completed" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (cond_exec (geu:SI (reg:CC CC_REGNUM) (const_int 0)) + (set (match_dup 0) + (match_dup 2)))] + "" + [(set_attr "conds" "clob") + (set_attr "length" "6,6,10") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "type" "multiple")] +) + +;; Thumb-2 does not have rsc, so use a clever trick with shifter operands. +(define_insn_and_split "*thumb2_negdi2" + [(set (match_operand:DI 0 "s_register_operand" "=&r,r") + (neg:DI (match_operand:DI 1 "s_register_operand" "?r,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" ; negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1 + "&& reload_completed" + [(parallel [(set (reg:CC CC_REGNUM) + (compare:CC (const_int 0) (match_dup 1))) + (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))]) + (set (match_dup 2) (minus:SI (minus:SI (match_dup 3) + (ashift:SI (match_dup 3) + (const_int 1))) + (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "conds" "clob") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=&r,l,r") + (abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + ; eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31 + ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 + ; cmp\\t%0, #0\;it\tlt\;rsblt\\t%0, %0, #0 + "&& reload_completed" + [(const_int 0)] + { + if (REGNO(operands[0]) == REGNO(operands[1])) + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + (gen_rtx_LT (SImode, + cc_reg, + const0_rtx)), + (gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1])))))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31))))); + } + DONE; + } + [(set_attr "conds" "*,clob,clob") + (set_attr "shift" "1") + (set_attr "predicable" "yes,no,no") + (set_attr "predicable_short_it" "no") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "ce_count" "2") + (set_attr "length" "8,6,10") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_neg_abssi2" + [(set (match_operand:SI 0 "s_register_operand" "=&r,l,r") + (neg:SI (abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0")))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + ; eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31 + ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 + ; cmp\\t%0, #0\;it\\tgt\;rsbgt\\t%0, %0, #0 + "&& reload_completed" + [(const_int 0)] + { + if (REGNO(operands[0]) == REGNO(operands[1])) + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[0], const0_rtx))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + (gen_rtx_GT (SImode, + cc_reg, + const0_rtx)), + (gen_rtx_SET (VOIDmode, + operands[0], + (gen_rtx_MINUS (SImode, + const0_rtx, + operands[1])))))); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_XOR (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[1]))); + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_MINUS (SImode, + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)), + operands[0]))); + } + DONE; + } + [(set_attr "conds" "*,clob,clob") + (set_attr "shift" "1") + (set_attr "predicable" "yes,no,no") + (set_attr "enabled_for_depr_it" "yes,yes,no") + (set_attr "predicable_short_it" "no") + (set_attr "ce_count" "2") + (set_attr "length" "8,6,10") + (set_attr "type" "multiple")] +) + +;; We have two alternatives here for memory loads (and similarly for stores) +;; to reflect the fact that the permissible constant pool ranges differ +;; between ldr instructions taking low regs and ldr instructions taking high +;; regs. The high register alternatives are not taken into account when +;; choosing register preferences in order to reflect their expense. +(define_insn "*thumb2_movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))] + "TARGET_THUMB2 && ! TARGET_IWMMXT + && !(TARGET_HARD_FLOAT && TARGET_VFP) + && ( register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mov%?\\t%0, %1 + mvn%?\\t%0, #%B1 + movw%?\\t%0, %1 + ldr%?\\t%0, %1 + ldr%?\\t%0, %1 + str%?\\t%1, %0 + str%?\\t%1, %0" + [(set_attr "type" "mov_reg,alu_imm,alu_imm,alu_imm,mov_imm,load1,load1,store1,store1") + (set_attr "length" "2,4,2,4,4,4,4,4,4") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*") + (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")] +) + +(define_insn "tls_load_dot_plus_four" + [(set (match_operand:SI 0 "register_operand" "=l,l,r,r") + (mem:SI (unspec:SI [(match_operand:SI 2 "register_operand" "0,1,0,1") + (const_int 4) + (match_operand 3 "" "")] + UNSPEC_PIC_BASE))) + (clobber (match_scratch:SI 1 "=X,l,X,r"))] + "TARGET_THUMB2" + "* + (*targetm.asm_out.internal_label) (asm_out_file, \"LPIC\", + INTVAL (operands[3])); + return \"add\\t%2, %|pc\;ldr%?\\t%0, [%2]\"; + " + [(set_attr "length" "4,4,6,6") + (set_attr "type" "multiple")] +) + +;; Thumb-2 always has load/store halfword instructions, so we can avoid a lot +;; of the messiness associated with the ARM patterns. +(define_insn "*thumb2_movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,l,r,m,r") + (match_operand:HI 1 "general_operand" "r,I,Py,n,r,m"))] + "TARGET_THUMB2 + && (register_operand (operands[0], HImode) + || register_operand (operands[1], HImode))" + "@ + mov%?\\t%0, %1\\t%@ movhi + mov%?\\t%0, %1\\t%@ movhi + mov%?\\t%0, %1\\t%@ movhi + movw%?\\t%0, %L1\\t%@ movhi + str%(h%)\\t%1, %0\\t%@ movhi + ldr%(h%)\\t%0, %1\\t%@ movhi" + [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no,yes,no,no,no") + (set_attr "length" "2,4,2,4,4,4") + (set_attr "pool_range" "*,*,*,*,*,4094") + (set_attr "neg_pool_range" "*,*,*,*,*,250")] +) + +(define_insn "*thumb2_storewb_pairsi" + [(set (match_operand:SI 0 "register_operand" "=&kr") + (plus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (set (mem:SI (plus:SI (match_dup 0) (match_dup 2))) + (match_operand:SI 3 "register_operand" "r")) + (set (mem:SI (plus:SI (match_dup 0) + (match_operand:SI 5 "const_int_operand" "n"))) + (match_operand:SI 4 "register_operand" "r"))] + "TARGET_THUMB2 + && INTVAL (operands[5]) == INTVAL (operands[2]) + 4" + "strd\\t%3, %4, [%0, %2]!" + [(set_attr "type" "store2")] +) + +(define_insn "*thumb2_cmpsi_neg_shiftsi" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "s_register_operand" "r") + (neg:SI (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "const_int_operand" "M")]))))] + "TARGET_THUMB2" + "cmn%?\\t%0, %1%S3" + [(set_attr "conds" "set") + (set_attr "shift" "1") + (set_attr "type" "alus_shift_imm")] +) + +(define_insn_and_split "*thumb2_mov_scc" + [(set (match_operand:SI 0 "s_register_operand" "=l,r") + (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]))] + "TARGET_THUMB2" + "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;mov%d1\\t%0, #1" + "TARGET_THUMB2" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (const_int 1) + (const_int 0)))] + "" + [(set_attr "conds" "use") + (set_attr "enabled_for_depr_it" "yes,no") + (set_attr "length" "8,10") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_mov_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (neg:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && !arm_restrict_it" + "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;mvn%d1\\t%0, #0" + "TARGET_THUMB2" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (const_int 0)))] + { + operands[3] = GEN_INT (~0); + } + [(set_attr "conds" "use") + (set_attr "length" "10") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_mov_negscc_strict_it" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (neg:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && arm_restrict_it" + "#" ; ";mvn\\t%0, #0 ;it\\t%D1\;mov%D1\\t%0, #0\" + "&& reload_completed" + [(set (match_dup 0) + (match_dup 3)) + (cond_exec (match_dup 4) + (set (match_dup 0) + (const_int 0)))] + { + operands[3] = GEN_INT (~0); + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + + } + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_mov_notscc" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (not:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && !arm_restrict_it" + "#" ; "ite\\t%D1\;mvn%D1\\t%0, #0\;mvn%d1\\t%0, #1" + "TARGET_THUMB2" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) + (match_dup 3) + (match_dup 4)))] + { + operands[3] = GEN_INT (~1); + operands[4] = GEN_INT (~0); + } + [(set_attr "conds" "use") + (set_attr "length" "10") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_mov_notscc_strict_it" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (not:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)])))] + "TARGET_THUMB2 && arm_restrict_it" + "#" ; "mvn %0, #0 ; it%d1 ; lsl%d1 %0, %0, #1" + "&& reload_completed" + [(set (match_dup 0) + (match_dup 3)) + (cond_exec (match_dup 4) + (set (match_dup 0) + (ashift:SI (match_dup 0) + (const_int 1))))] + { + operands[3] = GEN_INT (~0); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[1]), + VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_movsicc_insn" + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r") + (if_then_else:SI + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "arm_not_operand" "0 ,lPy,0 ,0,rI,K,rI,rI,K ,K,r") + (match_operand:SI 2 "arm_not_operand" "lPy,0 ,rI,K,0 ,0,rI,K ,rI,K,r")))] + "TARGET_THUMB2" + "@ + it\\t%D3\;mov%D3\\t%0, %2 + it\\t%d3\;mov%d3\\t%0, %1 + it\\t%D3\;mov%D3\\t%0, %2 + it\\t%D3\;mvn%D3\\t%0, #%B2 + it\\t%d3\;mov%d3\\t%0, %1 + it\\t%d3\;mvn%d3\\t%0, #%B1 + # + # + # + # + #" + ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2 + ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2 + ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2 + ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2 + "&& reload_completed" + [(const_int 0)] + { + enum rtx_code rev_code; + enum machine_mode mode; + rtx rev_cond; + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + operands[3], + gen_rtx_SET (VOIDmode, + operands[0], + operands[1]))); + rev_code = GET_CODE (operands[3]); + mode = GET_MODE (operands[4]); + if (mode == CCFPmode || mode == CCFPEmode) + rev_code = reverse_condition_maybe_unordered (rev_code); + else + rev_code = reverse_condition (rev_code); + + rev_cond = gen_rtx_fmt_ee (rev_code, + VOIDmode, + gen_rtx_REG (mode, CC_REGNUM), + const0_rtx); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + rev_cond, + gen_rtx_SET (VOIDmode, + operands[0], + operands[2]))); + DONE; + } + [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6") + (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes") + (set_attr "conds" "use") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_movsfcc_soft_insn" + [(set (match_operand:SF 0 "s_register_operand" "=r,r") + (if_then_else:SF (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,r") + (match_operand:SF 2 "s_register_operand" "r,0")))] + "TARGET_THUMB2 && TARGET_SOFT_FLOAT" + "@ + it\\t%D3\;mov%D3\\t%0, %2 + it\\t%d3\;mov%d3\\t%0, %1" + [(set_attr "length" "6,6") + (set_attr "conds" "use") + (set_attr "type" "multiple")] +) + +(define_insn "*call_reg_thumb2" + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2" + "blx%?\\t%0" + [(set_attr "type" "call")] +) + +(define_insn "*call_value_reg_thumb2" + [(set (match_operand 0 "" "") + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (clobber (reg:SI LR_REGNUM))] + "TARGET_THUMB2" + "blx\\t%1" + [(set_attr "type" "call")] +) + +(define_insn "*thumb2_indirect_jump" + [(set (pc) + (match_operand:SI 0 "register_operand" "l*r"))] + "TARGET_THUMB2" + "bx\\t%0" + [(set_attr "conds" "clob") + (set_attr "type" "branch")] +) +;; Don't define thumb2_load_indirect_jump because we can't guarantee label +;; addresses will have the thumb bit set correctly. + + +(define_insn_and_split "*thumb2_and_scc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts") + (and:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "r")))] + "TARGET_THUMB2" + "#" ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0" + "&& reload_completed" + [(set (match_dup 0) + (and:SI (match_dup 3) (const_int 1))) + (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "type" "multiple") + (set (attr "length") (if_then_else (match_test "arm_restrict_it") + (const_int 8) + (const_int 10)))] +) + +(define_insn_and_split "*thumb2_ior_scc" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (ior:SI (match_operator:SI 1 "arm_comparison_operator" + [(match_operand 2 "cc_register" "") (const_int 0)]) + (match_operand:SI 3 "s_register_operand" "0,?r")))] + "TARGET_THUMB2 && !arm_restrict_it" + "@ + it\\t%d1\;orr%d1\\t%0, %3, #1 + #" + ; alt 1: ite\\t%D1\;mov%D1\\t%0, %3\;orr%d1\\t%0, %3, #1 + "&& reload_completed + && REGNO (operands [0]) != REGNO (operands[3])" + [(cond_exec (match_dup 5) (set (match_dup 0) (match_dup 3))) + (cond_exec (match_dup 4) (set (match_dup 0) + (ior:SI (match_dup 3) (const_int 1))))] + { + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code rc = GET_CODE (operands[1]); + + operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + if (mode == CCFPmode || mode == CCFPEmode) + rc = reverse_condition_maybe_unordered (rc); + else + rc = reverse_condition (rc); + operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx); + } + [(set_attr "conds" "use") + (set_attr "length" "6,10") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_ior_scc_strict_it" + [(set (match_operand:SI 0 "s_register_operand" "=l,l") + (ior:SI (match_operator:SI 2 "arm_comparison_operator" + [(match_operand 3 "cc_register" "") (const_int 0)]) + (match_operand:SI 1 "s_register_operand" "0,?l")))] + "TARGET_THUMB2 && arm_restrict_it" + "@ + it\\t%d2\;mov%d2\\t%0, #1\;it\\t%d2\;orr%d2\\t%0, %1 + mov\\t%0, #1\;orr\\t%0, %1\;it\\t%D2\;mov%D2\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "8") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_cond_move" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (if_then_else:SI (match_operator 3 "equality_operator" + [(match_operator 4 "arm_comparison_operator" + [(match_operand 5 "cc_register" "") (const_int 0)]) + (const_int 0)]) + (match_operand:SI 1 "arm_rhs_operand" "0,rI,?rI") + (match_operand:SI 2 "arm_rhs_operand" "rI,0,rI")))] + "TARGET_THUMB2" + "* + if (GET_CODE (operands[3]) == NE) + { + if (which_alternative != 1) + output_asm_insn (\"it\\t%D4\;mov%D4\\t%0, %2\", operands); + if (which_alternative != 0) + output_asm_insn (\"it\\t%d4\;mov%d4\\t%0, %1\", operands); + return \"\"; + } + switch (which_alternative) + { + case 0: + output_asm_insn (\"it\\t%d4\", operands); + break; + case 1: + output_asm_insn (\"it\\t%D4\", operands); + break; + case 2: + if (arm_restrict_it) + output_asm_insn (\"it\\t%D4\", operands); + else + output_asm_insn (\"ite\\t%D4\", operands); + break; + default: + abort(); + } + if (which_alternative != 0) + { + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + if (arm_restrict_it && which_alternative == 2) + output_asm_insn (\"it\\t%d4\", operands); + } + if (which_alternative != 1) + output_asm_insn (\"mov%d4\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "use") + (set_attr "length" "6,6,10") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_cond_arith" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (match_operator:SI 5 "shiftable_operator" + [(match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]) + (match_operand:SI 1 "s_register_operand" "0,?r")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && !arm_restrict_it" + "* + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + return \"%i5\\t%0, %1, %2, lsr #31\"; + + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (GET_CODE (operands[5]) == AND) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, #0\", operands); + } + else if (GET_CODE (operands[5]) == MINUS) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"rsb%D4\\t%0, %1, #0\", operands); + } + else if (which_alternative != 0) + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + } + else + output_asm_insn (\"it\\t%d4\", operands); + return \"%i5%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "14") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_cond_arith_strict_it" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (match_operator:SI 5 "shiftable_operator_strict_it" + [(match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "arm_rhs_operand" "rI")]) + (match_operand:SI 1 "s_register_operand" "0")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && arm_restrict_it" + "#" + "&& reload_completed" + [(const_int 0)] + { + if (GET_CODE (operands[4]) == LT && operands[3] == const0_rtx) + { + /* %i5 %0, %1, %2, lsr #31 */ + rtx shifted_op = gen_rtx_LSHIFTRT (SImode, operands[2], GEN_INT (31)); + rtx op = NULL_RTX; + + switch (GET_CODE (operands[5])) + { + case AND: + op = gen_rtx_AND (SImode, shifted_op, operands[1]); + break; + case PLUS: + op = gen_rtx_PLUS (SImode, shifted_op, operands[1]); + break; + default: gcc_unreachable (); + } + emit_insn (gen_rtx_SET (VOIDmode, operands[0], op)); + DONE; + } + + /* "cmp %2, %3" */ + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), + gen_rtx_COMPARE (CCmode, operands[2], operands[3]))); + + if (GET_CODE (operands[5]) == AND) + { + /* %i5 %0, %1, #1 + it%D4 + mov%D4 %0, #0 */ + enum rtx_code rc = reverse_condition (GET_CODE (operands[4])); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], gen_rtx_AND (SImode, operands[1], GEN_INT (1)))); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + DONE; + } + else + { + /* it\\t%d4 + %i5%d4\\t%0, %1, #1 */ + emit_insn (gen_rtx_COND_EXEC (VOIDmode, gen_rtx_fmt_ee (GET_CODE (operands[4]), + VOIDmode, + gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx), + gen_rtx_SET(VOIDmode, operands[0], + gen_rtx_PLUS (SImode, + operands[1], + GEN_INT (1))))); + DONE; + } + FAIL; + } + [(set_attr "conds" "clob") + (set_attr "length" "12") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_cond_sub" + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts") + (minus:SI (match_operand:SI 1 "s_register_operand" "0,?Ts") + (match_operator:SI 4 "arm_comparison_operator" + [(match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "arm_rhs_operand" "rI,rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + output_asm_insn (\"cmp\\t%2, %3\", operands); + if (which_alternative != 0) + { + if (arm_restrict_it) + { + output_asm_insn (\"mov\\t%0, %1\", operands); + output_asm_insn (\"it\\t%d4\", operands); + } + else + { + output_asm_insn (\"ite\\t%D4\", operands); + output_asm_insn (\"mov%D4\\t%0, %1\", operands); + } + } + else + output_asm_insn (\"it\\t%d4\", operands); + return \"sub%d4\\t%0, %1, #1\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "10,14") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*thumb2_negscc" + [(set (match_operand:SI 0 "s_register_operand" "=Ts") + (neg:SI (match_operator 3 "arm_comparison_operator" + [(match_operand:SI 1 "s_register_operand" "r") + (match_operand:SI 2 "arm_rhs_operand" "rI")]))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + + if (GET_CODE (operands[3]) == LT && operands[2] == const0_rtx) + { + /* Emit asr\\t%0, %1, #31 */ + emit_insn (gen_rtx_SET (VOIDmode, + operands[0], + gen_rtx_ASHIFTRT (SImode, + operands[1], + GEN_INT (31)))); + DONE; + } + else if (GET_CODE (operands[3]) == NE && !arm_restrict_it) + { + /* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */ + if (CONST_INT_P (operands[2])) + emit_insn (gen_cmpsi2_addneg (operands[0], operands[1], operands[2], + GEN_INT (- INTVAL (operands[2])))); + else + emit_insn (gen_subsi3_compare (operands[0], operands[1], operands[2])); + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_NE (SImode, + cc_reg, + const0_rtx), + gen_rtx_SET (SImode, + operands[0], + GEN_INT (~0)))); + DONE; + } + else + { + /* Emit: cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/ + enum rtx_code rc = reverse_condition (GET_CODE (operands[3])); + enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]); + rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM); + + emit_insn (gen_rtx_SET (VOIDmode, + cc_reg, + gen_rtx_COMPARE (CCmode, operands[1], operands[2]))); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0))); + + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (rc, + VOIDmode, + tmp1, + const0_rtx), + gen_rtx_SET (VOIDmode, operands[0], const0_rtx))); + DONE; + } + FAIL; + } + [(set_attr "conds" "clob") + (set_attr "length" "14") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_movcond" + [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts,Ts") + (if_then_else:SI + (match_operator 5 "arm_comparison_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "arm_add_operand" "rIL,rIL,rIL")]) + (match_operand:SI 1 "arm_rhs_operand" "0,TsI,?TsI") + (match_operand:SI 2 "arm_rhs_operand" "TsI,0,TsI"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (GET_CODE (operands[5]) == LT + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && REG_P (operands[1])) + { + if (operands[2] == const0_rtx) + return \"and\\t%0, %1, %3, asr #31\"; + return \"ands\\t%0, %1, %3, asr #32\;it\\tcc\;movcc\\t%0, %2\"; + } + else if (which_alternative != 0 && REG_P (operands[2])) + { + if (operands[1] == const0_rtx) + return \"bic\\t%0, %2, %3, asr #31\"; + return \"bics\\t%0, %2, %3, asr #32\;it\\tcs\;movcs\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + + if (GET_CODE (operands[5]) == GE + && (operands[4] == const0_rtx)) + { + if (which_alternative != 1 && REG_P (operands[1])) + { + if (operands[2] == const0_rtx) + return \"bic\\t%0, %1, %3, asr #31\"; + return \"bics\\t%0, %1, %3, asr #32\;it\\tcs\;movcs\\t%0, %2\"; + } + else if (which_alternative != 0 && REG_P (operands[2])) + { + if (operands[1] == const0_rtx) + return \"and\\t%0, %2, %3, asr #31\"; + return \"ands\\t%0, %2, %3, asr #32\;it\tcc\;movcc\\t%0, %1\"; + } + /* The only case that falls through to here is when both ops 1 & 2 + are constants. */ + } + if (CONST_INT_P (operands[4]) + && !const_ok_for_arm (INTVAL (operands[4]))) + output_asm_insn (\"cmn\\t%3, #%n4\", operands); + else + output_asm_insn (\"cmp\\t%3, %4\", operands); + switch (which_alternative) + { + case 0: + output_asm_insn (\"it\\t%D5\", operands); + break; + case 1: + output_asm_insn (\"it\\t%d5\", operands); + break; + case 2: + if (arm_restrict_it) + { + output_asm_insn (\"mov\\t%0, %1\", operands); + output_asm_insn (\"it\\t%D5\", operands); + } + else + output_asm_insn (\"ite\\t%d5\", operands); + break; + default: + abort(); + } + if (which_alternative != 0 && !(arm_restrict_it && which_alternative == 2)) + output_asm_insn (\"mov%d5\\t%0, %1\", operands); + if (which_alternative != 1) + output_asm_insn (\"mov%D5\\t%0, %2\", operands); + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "10,10,14") + (set_attr "type" "multiple")] +) + +;; Zero and sign extension instructions. + +;; All supported Thumb2 implementations are armv6, so only that case is +;; provided. +(define_insn "*thumb2_extendqisi_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_THUMB2 && arm_arch6" + "@ + sxtb%?\\t%0, %1 + ldr%(sb%)\\t%0, %1" + [(set_attr "type" "extend,load_byte") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "pool_range" "*,4094") + (set_attr "neg_pool_range" "*,250")] +) + +(define_insn "*thumb2_zero_extendhisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_THUMB2 && arm_arch6" + "@ + uxth%?\\t%0, %1 + ldr%(h%)\\t%0, %1" + [(set_attr "type" "extend,load_byte") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "pool_range" "*,4094") + (set_attr "neg_pool_range" "*,250")] +) + +(define_insn "thumb2_zero_extendqisi2_v6" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] + "TARGET_THUMB2 && arm_arch6" + "@ + uxtb%(%)\\t%0, %1 + ldr%(b%)\\t%0, %1\\t%@ zero_extendqisi2" + [(set_attr "type" "extend,load_byte") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "pool_range" "*,4094") + (set_attr "neg_pool_range" "*,250")] +) + +(define_insn "thumb2_casesi_internal" + [(parallel [(set (pc) + (if_then_else + (leu (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) + (label_ref (match_operand 2 "" "")))) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r")) + (use (label_ref (match_dup 2)))])] + "TARGET_THUMB2 && !flag_pic" + "* return thumb2_output_casesi(operands);" + [(set_attr "conds" "clob") + (set_attr "length" "16") + (set_attr "type" "multiple")] +) + +(define_insn "thumb2_casesi_internal_pic" + [(parallel [(set (pc) + (if_then_else + (leu (match_operand:SI 0 "s_register_operand" "r") + (match_operand:SI 1 "arm_rhs_operand" "rI")) + (mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4)) + (label_ref (match_operand 2 "" "")))) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (match_scratch:SI 5 "=r")) + (use (label_ref (match_dup 2)))])] + "TARGET_THUMB2 && flag_pic" + "* return thumb2_output_casesi(operands);" + [(set_attr "conds" "clob") + (set_attr "length" "20") + (set_attr "type" "multiple")] +) + +(define_insn "*thumb2_return" + [(simple_return)] + "TARGET_THUMB2" + "* return output_return_instruction (const_true_rtx, true, false, true);" + [(set_attr "type" "branch") + (set_attr "length" "4")] +) + +(define_insn_and_split "thumb2_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")] + VUNSPEC_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "TARGET_THUMB2" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + thumb_set_return_address (operands[0], operands[1]); + DONE; + }" +) + +(define_insn "*thumb2_alusi3_short" + [(set (match_operand:SI 0 "s_register_operand" "=l") + (match_operator:SI 3 "thumb_16bit_operator" + [(match_operand:SI 1 "s_register_operand" "0") + (match_operand:SI 2 "s_register_operand" "l")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed + && GET_CODE(operands[3]) != PLUS + && GET_CODE(operands[3]) != MINUS" + "%I3%!\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "alu_reg")] +) + +(define_insn "*thumb2_shiftsi3_short" + [(set (match_operand:SI 0 "low_register_operand" "=l,l") + (match_operator:SI 3 "shift_operator" + [(match_operand:SI 1 "low_register_operand" "0,l") + (match_operand:SI 2 "low_reg_or_int_operand" "l,M")])) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed + && ((GET_CODE(operands[3]) != ROTATE && GET_CODE(operands[3]) != ROTATERT) + || REG_P (operands[2]))" + "* return arm_output_shift(operands, 2);" + [(set_attr "predicable" "yes") + (set_attr "shift" "1") + (set_attr "length" "2") + (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "") + (const_string "alu_shift_imm") + (const_string "alu_shift_reg")))] +) + +(define_insn "*thumb2_mov_shortim" + [(set (match_operand:QHSI 0 "low_register_operand" "=l") + (match_operand:QHSI 1 "const_int_operand" "I")) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "mov%!\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "mov_imm")] +) + +(define_insn "*thumb2_addsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l,l") + (plus:SI (match_operand:SI 1 "low_register_operand" "l,0") + (match_operand:SI 2 "low_reg_or_int_operand" "lPt,Ps"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "* + HOST_WIDE_INT val; + + if (CONST_INT_P (operands[2])) + val = INTVAL(operands[2]); + else + val = 0; + + /* We prefer eg. subs rn, rn, #1 over adds rn, rn, #0xffffffff. */ + if (val < 0 && const_ok_for_arm(ARM_SIGN_EXTEND (-val))) + return \"sub%!\\t%0, %1, #%n2\"; + else + return \"add%!\\t%0, %1, %2\"; + " + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "alu_reg")] +) + +(define_insn "*thumb2_subsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (minus:SI (match_operand:SI 1 "low_register_operand" "l") + (match_operand:SI 2 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "sub%!\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "alu_reg")] +) + +(define_peephole2 + [(set (match_operand:CC 0 "cc_register" "") + (compare:CC (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_THUMB2 + && peep2_reg_dead_p (1, operands[1]) + && satisfies_constraint_Pw (operands[2])" + [(parallel + [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 1) (plus:SI (match_dup 1) (match_dup 3)))])] + "operands[3] = GEN_INT (- INTVAL (operands[2]));" +) + +(define_peephole2 + [(match_scratch:SI 3 "l") + (set (match_operand:CC 0 "cc_register" "") + (compare:CC (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_THUMB2 + && satisfies_constraint_Px (operands[2])" + [(parallel + [(set (match_dup 0) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 3) (plus:SI (match_dup 1) (match_dup 4)))])] + "operands[4] = GEN_INT (- INTVAL (operands[2]));" +) + +(define_insn "thumb2_addsi3_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 1 "s_register_operand" "l, 0, r") + (match_operand:SI 2 "arm_add_operand" "lPt,Ps,rIL")) + (const_int 0))) + (set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "TARGET_THUMB2" + "* + HOST_WIDE_INT val; + + if (CONST_INT_P (operands[2])) + val = INTVAL (operands[2]); + else + val = 0; + + if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val))) + return \"subs\\t%0, %1, #%n2\"; + else + return \"adds\\t%0, %1, %2\"; + " + [(set_attr "conds" "set") + (set_attr "length" "2,2,4") + (set_attr "type" "alu_reg")] +) + +(define_insn "*thumb2_addsi3_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (plus:SI (match_operand:SI 0 "s_register_operand" "l,l, r,r") + (match_operand:SI 1 "arm_add_operand" "Pv,l,IL,r")) + (const_int 0)))] + "TARGET_THUMB2" + "* + HOST_WIDE_INT val; + + if (CONST_INT_P (operands[1])) + val = INTVAL (operands[1]); + else + val = 0; + + if (val < 0 && const_ok_for_arm (ARM_SIGN_EXTEND (-val))) + return \"cmp\\t%0, #%n1\"; + else + return \"cmn\\t%0, %1\"; + " + [(set_attr "conds" "set") + (set_attr "length" "2,2,4,4") + (set_attr "type" "alus_imm,alus_reg,alus_imm,alus_reg")] +) + +(define_insn "*thumb2_mulsi_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (mult:SI (match_operand:SI 1 "low_register_operand" "%0") + (match_operand:SI 2 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && optimize_size && reload_completed" + "mul%!\\t%0, %2, %0" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "muls")]) + +(define_insn "*thumb2_mulsi_short_compare0" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=l") + (mult:SI (match_dup 1) (match_dup 2)))] + "TARGET_THUMB2 && optimize_size" + "muls\\t%0, %2, %0" + [(set_attr "length" "2") + (set_attr "type" "muls")]) + +(define_insn "*thumb2_mulsi_short_compare0_scratch" + [(set (reg:CC_NOOV CC_REGNUM) + (compare:CC_NOOV + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "l")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=l"))] + "TARGET_THUMB2 && optimize_size" + "muls\\t%0, %2, %0" + [(set_attr "length" "2") + (set_attr "type" "muls")]) + +(define_insn "*thumb2_cbz" + [(set (pc) (if_then_else + (eq (match_operand:SI 0 "s_register_operand" "l,?r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (get_attr_length (insn) == 2) + return \"cbz\\t%0, %l1\"; + else + return \"cmp\\t%0, #0\;beq\\t%l1\"; + " + [(set (attr "length") + (if_then_else + (and (ge (minus (match_dup 1) (pc)) (const_int 2)) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (not (match_test "which_alternative"))) + (const_int 2) + (const_int 8))) + (set_attr "type" "branch,multiple")] +) + +(define_insn "*thumb2_cbnz" + [(set (pc) (if_then_else + (ne (match_operand:SI 0 "s_register_operand" "l,?r") + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2" + "* + if (get_attr_length (insn) == 2) + return \"cbnz\\t%0, %l1\"; + else + return \"cmp\\t%0, #0\;bne\\t%l1\"; + " + [(set (attr "length") + (if_then_else + (and (ge (minus (match_dup 1) (pc)) (const_int 2)) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (not (match_test "which_alternative"))) + (const_int 2) + (const_int 8))) + (set_attr "type" "branch,multiple")] +) + +(define_insn "*thumb2_one_cmplsi2_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (not:SI (match_operand:SI 1 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "mvn%!\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "mvn_reg")] +) + +(define_insn "*thumb2_negsi2_short" + [(set (match_operand:SI 0 "low_register_operand" "=l") + (neg:SI (match_operand:SI 1 "low_register_operand" "l"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_THUMB2 && reload_completed" + "neg%!\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "length" "2") + (set_attr "type" "alu_reg")] +) + +(define_insn "*orsi_notsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_THUMB2" + "orn%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "logic_reg")] +) + +(define_insn "*orsi_not_shiftsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (not:SI (match_operator:SI 4 "shift_operator" + [(match_operand:SI 2 "s_register_operand" "r") + (match_operand:SI 3 "const_int_operand" "M")])) + (match_operand:SI 1 "s_register_operand" "r")))] + "TARGET_THUMB2" + "orn%?\\t%0, %1, %2%S4" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "2") + (set_attr "type" "alu_shift_imm")] +) + +(define_peephole2 + [(set (match_operand:CC_NOOV 0 "cc_register" "") + (compare:CC_NOOV (zero_extract:SI + (match_operand:SI 1 "low_register_operand" "") + (const_int 1) + (match_operand:SI 2 "const_int_operand" "")) + (const_int 0))) + (match_scratch:SI 3 "l") + (set (pc) + (if_then_else (match_operator:CC_NOOV 4 "equality_operator" + [(match_dup 0) (const_int 0)]) + (match_operand 5 "" "") + (match_operand 6 "" "")))] + "TARGET_THUMB2 + && (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 32)" + [(parallel [(set (match_dup 0) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (clobber (match_dup 3))]) + (set (pc) + (if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)]) + (match_dup 5) (match_dup 6)))] + " + operands[2] = GEN_INT (31 - INTVAL (operands[2])); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[4]) == NE ? LT : GE, + VOIDmode, operands[0], const0_rtx); + ") + +(define_peephole2 + [(set (match_operand:CC_NOOV 0 "cc_register" "") + (compare:CC_NOOV (zero_extract:SI + (match_operand:SI 1 "low_register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (const_int 0)) + (const_int 0))) + (match_scratch:SI 3 "l") + (set (pc) + (if_then_else (match_operator:CC_NOOV 4 "equality_operator" + [(match_dup 0) (const_int 0)]) + (match_operand 5 "" "") + (match_operand 6 "" "")))] + "TARGET_THUMB2 + && (INTVAL (operands[2]) > 0 && INTVAL (operands[2]) < 32)" + [(parallel [(set (match_dup 0) + (compare:CC_NOOV (ashift:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (clobber (match_dup 3))]) + (set (pc) + (if_then_else (match_op_dup 4 [(match_dup 0) (const_int 0)]) + (match_dup 5) (match_dup 6)))] + " + operands[2] = GEN_INT (32 - INTVAL (operands[2])); + ") + +;; Define the subtract-one-and-jump insns so loop.c +;; knows what to generate. +(define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" ""))] ; label + "TARGET_32BIT" + " + { + /* Currently SMS relies on the do-loop pattern to recognize loops + where (1) the control part consists of all insns defining and/or + using a certain 'count' register and (2) the loop count can be + adjusted by modifying this register prior to the loop. + ??? The possible introduction of a new block to initialize the + new IV can potentially affect branch optimizations. */ + if (optimize > 0 && flag_modulo_sched) + { + rtx s0; + rtx bcomp; + rtx loc_ref; + rtx cc_reg; + rtx insn; + rtx cmp; + + if (GET_MODE (operands[0]) != SImode) + FAIL; + + s0 = operands [0]; + if (TARGET_THUMB2) + insn = emit_insn (gen_thumb2_addsi3_compare0 (s0, s0, GEN_INT (-1))); + else + insn = emit_insn (gen_addsi3_compare0 (s0, s0, GEN_INT (-1))); + + cmp = XVECEXP (PATTERN (insn), 0, 0); + cc_reg = SET_DEST (cmp); + bcomp = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); + loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [1]); + emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, + gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, + loc_ref, pc_rtx))); + DONE; + }else + FAIL; + }") + diff --git a/gcc-4.9/gcc/config/arm/types.md b/gcc-4.9/gcc/config/arm/types.md new file mode 100644 index 000000000..cc39cd11f --- /dev/null +++ b/gcc-4.9/gcc/config/arm/types.md @@ -0,0 +1,1077 @@ +;; Instruction Classification for ARM for GNU compiler. + +;; Copyright (C) 1991-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; TYPE attribute is used to classify instructions for use in scheduling. +; +; Instruction classification: +; +; adc_imm add/subtract with carry and with an immediate operand. +; adc_reg add/subtract with carry and no immediate operand. +; adcs_imm as adc_imm, setting condition flags. +; adcs_reg as adc_reg, setting condition flags. +; adr calculate address. +; alu_ext From ARMv8-A: any arithmetic instruction that has a +; sign/zero-extended. +; AArch64 Only. +; source operand +; alu_imm any arithmetic instruction that doesn't have a shifted +; operand and has an immediate operand. This +; excludes MOV, MVN and RSB(S) immediate. +; alu_reg any arithmetic instruction that doesn't have a shifted +; or an immediate operand. This excludes +; MOV and MVN but includes MOVT. This is also the default. +; alu_shift_imm any arithmetic instruction that has a source operand +; shifted by a constant. This excludes simple shifts. +; alu_shift_reg as alu_shift_imm, with the shift amount specified in a +; register. +; alus_ext From ARMv8-A: as alu_ext, setting condition flags. +; AArch64 Only. +; alus_imm as alu_imm, setting condition flags. +; alus_reg as alu_reg, setting condition flags. +; alus_shift_imm as alu_shift_imm, setting condition flags. +; alus_shift_reg as alu_shift_reg, setting condition flags. +; bfm bitfield move operation. +; block blockage insn, this blocks all functional units. +; branch branch. +; call subroutine call. +; clz count leading zeros (CLZ). +; csel From ARMv8-A: conditional select. +; extend extend instruction (SXTB, SXTH, UXTB, UXTH). +; f_cvt conversion between float representations. +; f_cvtf2i conversion between float and integral types. +; f_cvti2f conversion between integral and float types. +; f_flag transfer of co-processor flags to the CPSR. +; f_load[d,s] double/single load from memory. Used for VFP unit. +; f_mcr transfer arm to vfp reg. +; f_mcrr transfer two arm regs to vfp reg. +; f_minmax[d,s] double/single floating point minimum/maximum. +; f_mrc transfer vfp to arm reg. +; f_mrrc transfer vfp to two arm regs. +; f_rint[d,s] double/single floating point rount to integral. +; f_sel[d,s] double/single floating byte select. +; f_store[d,s] double/single store to memory. Used for VFP unit. +; fadd[d,s] double/single floating-point scalar addition. +; fcmp[d,s] double/single floating-point compare. +; fconst[d,s] double/single load immediate. +; fcsel From ARMv8-A: Floating-point conditional select. +; fdiv[d,s] double/single precision floating point division. +; ffarith[d,s] double/single floating point abs/neg/cpy. +; ffma[d,s] double/single floating point fused multiply-accumulate. +; float floating point arithmetic operation. +; fmac[d,s] double/single floating point multiply-accumulate. +; fmov floating point to floating point register move. +; fmul[d,s] double/single floating point multiply. +; fsqrt[d,s] double/single precision floating point square root. +; load_acq load-acquire. +; load_byte load byte(s) from memory to arm registers. +; load1 load 1 word from memory to arm registers. +; load2 load 2 words from memory to arm registers. +; load3 load 3 words from memory to arm registers. +; load4 load 4 words from memory to arm registers. +; logic_imm any logical instruction that doesn't have a shifted +; operand and has an immediate operand. +; logic_reg any logical instruction that doesn't have a shifted +; operand or an immediate operand. +; logic_shift_imm any logical instruction that has a source operand +; shifted by a constant. This excludes simple shifts. +; logic_shift_reg as logic_shift_imm, with the shift amount specified in a +; register. +; logics_imm as logic_imm, setting condition flags. +; logics_reg as logic_reg, setting condition flags. +; logics_shift_imm as logic_shift_imm, setting condition flags. +; logics_shift_reg as logic_shift_reg, setting condition flags. +; mla integer multiply accumulate. +; mlas integer multiply accumulate, flag setting. +; mov_imm simple MOV instruction that moves an immediate to +; register. This includes MOVW, but not MOVT. +; mov_reg simple MOV instruction that moves a register to another +; register. This includes MOVW, but not MOVT. +; mov_shift simple MOV instruction, shifted operand by a constant. +; mov_shift_reg simple MOV instruction, shifted operand by a register. +; mrs system/special/co-processor register move. +; mul integer multiply. +; muls integer multiply, flag setting. +; multiple more than one instruction, candidate for future +; splitting, or better modeling. +; mvn_imm inverting move instruction, immediate. +; mvn_reg inverting move instruction, register. +; mvn_shift inverting move instruction, shifted operand by a constant. +; mvn_shift_reg inverting move instruction, shifted operand by a register. +; no_insn an insn which does not represent an instruction in the +; final output, thus having no impact on scheduling. +; rbit reverse bits. +; rev reverse bytes. +; sdiv signed division. +; shift_imm simple shift operation (LSL, LSR, ASR, ROR) with an +; immediate. +; shift_reg simple shift by a register. +; smlad signed multiply accumulate dual. +; smladx signed multiply accumulate dual reverse. +; smlal signed multiply accumulate long. +; smlald signed multiply accumulate long dual. +; smlals signed multiply accumulate long, flag setting. +; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate. +; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate. +; smlawy signed multiply accumulate wide, 32x16-bit, +; 32-bit accumulate. +; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate. +; smlsd signed multiply subtract dual. +; smlsdx signed multiply subtract dual reverse. +; smlsld signed multiply subtract long dual. +; smmla signed most significant word multiply accumulate. +; smmul signed most significant word multiply. +; smmulr signed most significant word multiply, rounded. +; smuad signed dual multiply add. +; smuadx signed dual multiply add reverse. +; smull signed multiply long. +; smulls signed multiply long, flag setting. +; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate. +; smulxy signed multiply, 16x16-bit, 32-bit accumulate. +; smusd signed dual multiply subtract. +; smusdx signed dual multiply subtract reverse. +; store_rel store-release. +; store1 store 1 word to memory from arm registers. +; store2 store 2 words to memory from arm registers. +; store3 store 3 words to memory from arm registers. +; store4 store 4 (or more) words to memory from arm registers. +; trap cause a trap in the kernel. +; udiv unsigned division. +; umaal unsigned multiply accumulate accumulate long. +; umlal unsigned multiply accumulate long. +; umlals unsigned multiply accumulate long, flag setting. +; umull unsigned multiply long. +; umulls unsigned multiply long, flag setting. +; untyped insn without type information - default, and error, +; case. +; +; The classification below is for instructions used by the Wireless MMX +; Technology. Each attribute value is used to classify an instruction of the +; same name or family. +; +; wmmx_tandc +; wmmx_tbcst +; wmmx_textrc +; wmmx_textrm +; wmmx_tinsr +; wmmx_tmcr +; wmmx_tmcrr +; wmmx_tmia +; wmmx_tmiaph +; wmmx_tmiaxy +; wmmx_tmrc +; wmmx_tmrrc +; wmmx_tmovmsk +; wmmx_torc +; wmmx_torvsc +; wmmx_wabs +; wmmx_wdiff +; wmmx_wacc +; wmmx_wadd +; wmmx_waddbhus +; wmmx_waddsubhx +; wmmx_waligni +; wmmx_walignr +; wmmx_wand +; wmmx_wandn +; wmmx_wavg2 +; wmmx_wavg4 +; wmmx_wcmpeq +; wmmx_wcmpgt +; wmmx_wmac +; wmmx_wmadd +; wmmx_wmax +; wmmx_wmerge +; wmmx_wmiawxy +; wmmx_wmiaxy +; wmmx_wmin +; wmmx_wmov +; wmmx_wmul +; wmmx_wmulw +; wmmx_wldr +; wmmx_wor +; wmmx_wpack +; wmmx_wqmiaxy +; wmmx_wqmulm +; wmmx_wqmulwm +; wmmx_wror +; wmmx_wsad +; wmmx_wshufh +; wmmx_wsll +; wmmx_wsra +; wmmx_wsrl +; wmmx_wstr +; wmmx_wsub +; wmmx_wsubaddhx +; wmmx_wunpckeh +; wmmx_wunpckel +; wmmx_wunpckih +; wmmx_wunpckil +; wmmx_wxor +; +; The classification below is for NEON instructions. +; +; neon_add +; neon_add_q +; neon_add_widen +; neon_add_long +; neon_qadd +; neon_qadd_q +; neon_add_halve +; neon_add_halve_q +; neon_add_halve_narrow_q +; neon_sub +; neon_sub_q +; neon_sub_widen +; neon_sub_long +; neon_qsub +; neon_qsub_q +; neon_sub_halve +; neon_sub_halve_q +; neon_sub_halve_narrow_q +; neon_abs +; neon_abs_q +; neon_neg +; neon_neg_q +; neon_qneg +; neon_qneg_q +; neon_qabs +; neon_qabs_q +; neon_abd +; neon_abd_q +; neon_abd_long +; neon_minmax +; neon_minmax_q +; neon_compare +; neon_compare_q +; neon_compare_zero +; neon_compare_zero_q +; neon_arith_acc +; neon_arith_acc_q +; neon_reduc_add +; neon_reduc_add_q +; neon_reduc_add_long +; neon_reduc_add_acc +; neon_reduc_add_acc_q +; neon_reduc_minmax +; neon_reduc_minmax_q +; neon_logic +; neon_logic_q +; neon_tst +; neon_tst_q +; neon_shift_imm +; neon_shift_imm_q +; neon_shift_imm_narrow_q +; neon_shift_imm_long +; neon_shift_reg +; neon_shift_reg_q +; neon_shift_acc +; neon_shift_acc_q +; neon_sat_shift_imm +; neon_sat_shift_imm_q +; neon_sat_shift_imm_narrow_q +; neon_sat_shift_reg +; neon_sat_shift_reg_q +; neon_ins +; neon_ins_q +; neon_move +; neon_move_q +; neon_move_narrow_q +; neon_permute +; neon_permute_q +; neon_zip +; neon_zip_q +; neon_tbl1 +; neon_tbl1_q +; neon_tbl2 +; neon_tbl2_q +; neon_tbl3 +; neon_tbl3_q +; neon_tbl4 +; neon_tbl4_q +; neon_bsl +; neon_bsl_q +; neon_cls +; neon_cls_q +; neon_cnt +; neon_cnt_q +; neon_ext +; neon_ext_q +; neon_rbit +; neon_rbit_q +; neon_rev +; neon_rev_q +; neon_mul_b +; neon_mul_b_q +; neon_mul_h +; neon_mul_h_q +; neon_mul_s +; neon_mul_s_q +; neon_mul_b_long +; neon_mul_h_long +; neon_mul_s_long +; neon_mul_d_long +; neon_mul_h_scalar +; neon_mul_h_scalar_q +; neon_mul_s_scalar +; neon_mul_s_scalar_q +; neon_mul_h_scalar_long +; neon_mul_s_scalar_long +; neon_sat_mul_b +; neon_sat_mul_b_q +; neon_sat_mul_h +; neon_sat_mul_h_q +; neon_sat_mul_s +; neon_sat_mul_s_q +; neon_sat_mul_b_long +; neon_sat_mul_h_long +; neon_sat_mul_s_long +; neon_sat_mul_h_scalar +; neon_sat_mul_h_scalar_q +; neon_sat_mul_s_scalar +; neon_sat_mul_s_scalar_q +; neon_sat_mul_h_scalar_long +; neon_sat_mul_s_scalar_long +; neon_mla_b +; neon_mla_b_q +; neon_mla_h +; neon_mla_h_q +; neon_mla_s +; neon_mla_s_q +; neon_mla_b_long +; neon_mla_h_long +; neon_mla_s_long +; neon_mla_h_scalar +; neon_mla_h_scalar_q +; neon_mla_s_scalar +; neon_mla_s_scalar_q +; neon_mla_h_scalar_long +; neon_mla_s_scalar_long +; neon_sat_mla_b_long +; neon_sat_mla_h_long +; neon_sat_mla_s_long +; neon_sat_mla_h_scalar_long +; neon_sat_mla_s_scalar_long +; neon_to_gp +; neon_to_gp_q +; neon_from_gp +; neon_from_gp_q +; neon_ldr +; neon_load1_1reg +; neon_load1_1reg_q +; neon_load1_2reg +; neon_load1_2reg_q +; neon_load1_3reg +; neon_load1_3reg_q +; neon_load1_4reg +; neon_load1_4reg_q +; neon_load1_all_lanes +; neon_load1_all_lanes_q +; neon_load1_one_lane +; neon_load1_one_lane_q +; neon_load2_2reg +; neon_load2_2reg_q +; neon_load2_4reg +; neon_load2_4reg_q +; neon_load2_all_lanes +; neon_load2_all_lanes_q +; neon_load2_one_lane +; neon_load2_one_lane_q +; neon_load3_3reg +; neon_load3_3reg_q +; neon_load3_all_lanes +; neon_load3_all_lanes_q +; neon_load3_one_lane +; neon_load3_one_lane_q +; neon_load4_4reg +; neon_load4_4reg_q +; neon_load4_all_lanes +; neon_load4_all_lanes_q +; neon_load4_one_lane +; neon_load4_one_lane_q +; neon_str +; neon_store1_1reg +; neon_store1_1reg_q +; neon_store1_2reg +; neon_store1_2reg_q +; neon_store1_3reg +; neon_store1_3reg_q +; neon_store1_4reg +; neon_store1_4reg_q +; neon_store1_one_lane +; neon_store1_one_lane_q +; neon_store2_2reg +; neon_store2_2reg_q +; neon_store2_4reg +; neon_store2_4reg_q +; neon_store2_one_lane +; neon_store2_one_lane_q +; neon_store3_3reg +; neon_store3_3reg_q +; neon_store3_one_lane +; neon_store3_one_lane_q +; neon_store4_4reg +; neon_store4_4reg_q +; neon_store4_one_lane +; neon_store4_one_lane_q +; neon_fp_abs_s +; neon_fp_abs_s_q +; neon_fp_abs_d +; neon_fp_abs_d_q +; neon_fp_neg_s +; neon_fp_neg_s_q +; neon_fp_neg_d +; neon_fp_neg_d_q +; neon_fp_abd_s +; neon_fp_abd_s_q +; neon_fp_abd_d +; neon_fp_abd_d_q +; neon_fp_addsub_s +; neon_fp_addsub_s_q +; neon_fp_addsub_d +; neon_fp_addsub_d_q +; neon_fp_compare_s +; neon_fp_compare_s_q +; neon_fp_compare_d +; neon_fp_compare_d_q +; neon_fp_minmax_s +; neon_fp_minmax_s_q +; neon_fp_minmax_d +; neon_fp_minmax_d_q +; neon_fp_reduc_add_s +; neon_fp_reduc_add_s_q +; neon_fp_reduc_add_d +; neon_fp_reduc_add_d_q +; neon_fp_reduc_minmax_s +; neon_fp_reduc_minmax_s_q +; neon_fp_reduc_minmax_d +; neon_fp_reduc_minmax_d_q +; neon_fp_cvt_narrow_s_q +; neon_fp_cvt_narrow_d_q +; neon_fp_cvt_widen_h +; neon_fp_cvt_widen_s +; neon_fp_to_int_s +; neon_fp_to_int_s_q +; neon_fp_to_int_d +; neon_fp_to_int_d_q +; neon_int_to_fp_s +; neon_int_to_fp_s_q +; neon_int_to_fp_d +; neon_int_to_fp_d_q +; neon_fp_round_s +; neon_fp_round_s_q +; neon_fp_round_d +; neon_fp_round_d_q +; neon_fp_recpe_s +; neon_fp_recpe_s_q +; neon_fp_recpe_d +; neon_fp_recpe_d_q +; neon_fp_recps_s +; neon_fp_recps_s_q +; neon_fp_recps_d +; neon_fp_recps_d_q +; neon_fp_recpx_s +; neon_fp_recpx_s_q +; neon_fp_recpx_d +; neon_fp_recpx_d_q +; neon_fp_rsqrte_s +; neon_fp_rsqrte_s_q +; neon_fp_rsqrte_d +; neon_fp_rsqrte_d_q +; neon_fp_rsqrts_s +; neon_fp_rsqrts_s_q +; neon_fp_rsqrts_d +; neon_fp_rsqrts_d_q +; neon_fp_mul_s +; neon_fp_mul_s_q +; neon_fp_mul_s_scalar +; neon_fp_mul_s_scalar_q +; neon_fp_mul_d +; neon_fp_mul_d_q +; neon_fp_mul_d_scalar_q +; neon_fp_mla_s +; neon_fp_mla_s_q +; neon_fp_mla_s_scalar +; neon_fp_mla_s_scalar_q +; neon_fp_mla_d +; neon_fp_mla_d_q +; neon_fp_mla_d_scalar_q +; neon_fp_sqrt_s +; neon_fp_sqrt_s_q +; neon_fp_sqrt_d +; neon_fp_sqrt_d_q +; neon_fp_div_s +; neon_fp_div_s_q +; neon_fp_div_d +; neon_fp_div_d_q +; +; The classification below is for Crypto instructions. +; +; crypto_aes +; crypto_sha1_xor +; crypto_sha1_fast +; crypto_sha1_slow +; crypto_sha256_fast +; crypto_sha256_slow + +(define_attr "type" + "adc_imm,\ + adc_reg,\ + adcs_imm,\ + adcs_reg,\ + adr,\ + alu_ext,\ + alu_imm,\ + alu_reg,\ + alu_shift_imm,\ + alu_shift_reg,\ + alus_ext,\ + alus_imm,\ + alus_reg,\ + alus_shift_imm,\ + alus_shift_reg,\ + bfm,\ + block,\ + branch,\ + call,\ + clz,\ + no_insn,\ + csel,\ + crc,\ + extend,\ + f_cvt,\ + f_cvtf2i,\ + f_cvti2f,\ + f_flag,\ + f_loadd,\ + f_loads,\ + f_mcr,\ + f_mcrr,\ + f_minmaxd,\ + f_minmaxs,\ + f_mrc,\ + f_mrrc,\ + f_rintd,\ + f_rints,\ + f_seld,\ + f_sels,\ + f_stored,\ + f_stores,\ + faddd,\ + fadds,\ + fcmpd,\ + fcmps,\ + fconstd,\ + fconsts,\ + fcsel,\ + fdivd,\ + fdivs,\ + ffarithd,\ + ffariths,\ + ffmad,\ + ffmas,\ + float,\ + fmacd,\ + fmacs,\ + fmov,\ + fmuld,\ + fmuls,\ + fsqrts,\ + fsqrtd,\ + load_acq,\ + load_byte,\ + load1,\ + load2,\ + load3,\ + load4,\ + logic_imm,\ + logic_reg,\ + logic_shift_imm,\ + logic_shift_reg,\ + logics_imm,\ + logics_reg,\ + logics_shift_imm,\ + logics_shift_reg,\ + mla,\ + mlas,\ + mov_imm,\ + mov_reg,\ + mov_shift,\ + mov_shift_reg,\ + mrs,\ + mul,\ + muls,\ + multiple,\ + mvn_imm,\ + mvn_reg,\ + mvn_shift,\ + mvn_shift_reg,\ + nop,\ + rbit,\ + rev,\ + sdiv,\ + shift_imm,\ + shift_reg,\ + smlad,\ + smladx,\ + smlal,\ + smlald,\ + smlals,\ + smlalxy,\ + smlawx,\ + smlawy,\ + smlaxy,\ + smlsd,\ + smlsdx,\ + smlsld,\ + smmla,\ + smmul,\ + smmulr,\ + smuad,\ + smuadx,\ + smull,\ + smulls,\ + smulwy,\ + smulxy,\ + smusd,\ + smusdx,\ + store_rel,\ + store1,\ + store2,\ + store3,\ + store4,\ + trap,\ + udiv,\ + umaal,\ + umlal,\ + umlals,\ + umull,\ + umulls,\ + untyped,\ + wmmx_tandc,\ + wmmx_tbcst,\ + wmmx_textrc,\ + wmmx_textrm,\ + wmmx_tinsr,\ + wmmx_tmcr,\ + wmmx_tmcrr,\ + wmmx_tmia,\ + wmmx_tmiaph,\ + wmmx_tmiaxy,\ + wmmx_tmrc,\ + wmmx_tmrrc,\ + wmmx_tmovmsk,\ + wmmx_torc,\ + wmmx_torvsc,\ + wmmx_wabs,\ + wmmx_wabsdiff,\ + wmmx_wacc,\ + wmmx_wadd,\ + wmmx_waddbhus,\ + wmmx_waddsubhx,\ + wmmx_waligni,\ + wmmx_walignr,\ + wmmx_wand,\ + wmmx_wandn,\ + wmmx_wavg2,\ + wmmx_wavg4,\ + wmmx_wcmpeq,\ + wmmx_wcmpgt,\ + wmmx_wmac,\ + wmmx_wmadd,\ + wmmx_wmax,\ + wmmx_wmerge,\ + wmmx_wmiawxy,\ + wmmx_wmiaxy,\ + wmmx_wmin,\ + wmmx_wmov,\ + wmmx_wmul,\ + wmmx_wmulw,\ + wmmx_wldr,\ + wmmx_wor,\ + wmmx_wpack,\ + wmmx_wqmiaxy,\ + wmmx_wqmulm,\ + wmmx_wqmulwm,\ + wmmx_wror,\ + wmmx_wsad,\ + wmmx_wshufh,\ + wmmx_wsll,\ + wmmx_wsra,\ + wmmx_wsrl,\ + wmmx_wstr,\ + wmmx_wsub,\ + wmmx_wsubaddhx,\ + wmmx_wunpckeh,\ + wmmx_wunpckel,\ + wmmx_wunpckih,\ + wmmx_wunpckil,\ + wmmx_wxor,\ +\ + neon_add,\ + neon_add_q,\ + neon_add_widen,\ + neon_add_long,\ + neon_qadd,\ + neon_qadd_q,\ + neon_add_halve,\ + neon_add_halve_q,\ + neon_add_halve_narrow_q,\ +\ + neon_sub,\ + neon_sub_q,\ + neon_sub_widen,\ + neon_sub_long,\ + neon_qsub,\ + neon_qsub_q,\ + neon_sub_halve,\ + neon_sub_halve_q,\ + neon_sub_halve_narrow_q,\ +\ + neon_abs,\ + neon_abs_q,\ + neon_neg,\ + neon_neg_q,\ + neon_qneg,\ + neon_qneg_q,\ + neon_qabs,\ + neon_qabs_q,\ + neon_abd,\ + neon_abd_q,\ + neon_abd_long,\ +\ + neon_minmax,\ + neon_minmax_q,\ + neon_compare,\ + neon_compare_q,\ + neon_compare_zero,\ + neon_compare_zero_q,\ +\ + neon_arith_acc,\ + neon_arith_acc_q,\ + neon_reduc_add,\ + neon_reduc_add_q,\ + neon_reduc_add_long,\ + neon_reduc_add_acc,\ + neon_reduc_add_acc_q,\ + neon_reduc_minmax,\ + neon_reduc_minmax_q,\ + neon_logic,\ + neon_logic_q,\ + neon_tst,\ + neon_tst_q,\ +\ + neon_shift_imm,\ + neon_shift_imm_q,\ + neon_shift_imm_narrow_q,\ + neon_shift_imm_long,\ + neon_shift_reg,\ + neon_shift_reg_q,\ + neon_shift_acc,\ + neon_shift_acc_q,\ + neon_sat_shift_imm,\ + neon_sat_shift_imm_q,\ + neon_sat_shift_imm_narrow_q,\ + neon_sat_shift_reg,\ + neon_sat_shift_reg_q,\ +\ + neon_ins,\ + neon_ins_q,\ + neon_move,\ + neon_move_q,\ + neon_move_narrow_q,\ + neon_permute,\ + neon_permute_q,\ + neon_zip,\ + neon_zip_q,\ + neon_tbl1,\ + neon_tbl1_q,\ + neon_tbl2,\ + neon_tbl2_q,\ + neon_tbl3,\ + neon_tbl3_q,\ + neon_tbl4,\ + neon_tbl4_q,\ +\ + neon_bsl,\ + neon_bsl_q,\ + neon_cls,\ + neon_cls_q,\ + neon_cnt,\ + neon_cnt_q,\ + neon_dup,\ + neon_dup_q,\ + neon_ext,\ + neon_ext_q,\ + neon_rbit,\ + neon_rbit_q,\ + neon_rev,\ + neon_rev_q,\ +\ + neon_mul_b,\ + neon_mul_b_q,\ + neon_mul_h,\ + neon_mul_h_q,\ + neon_mul_s,\ + neon_mul_s_q,\ + neon_mul_b_long,\ + neon_mul_h_long,\ + neon_mul_s_long,\ + neon_mul_d_long,\ + neon_mul_h_scalar,\ + neon_mul_h_scalar_q,\ + neon_mul_s_scalar,\ + neon_mul_s_scalar_q,\ + neon_mul_h_scalar_long,\ + neon_mul_s_scalar_long,\ +\ + neon_sat_mul_b,\ + neon_sat_mul_b_q,\ + neon_sat_mul_h,\ + neon_sat_mul_h_q,\ + neon_sat_mul_s,\ + neon_sat_mul_s_q,\ + neon_sat_mul_b_long,\ + neon_sat_mul_h_long,\ + neon_sat_mul_s_long,\ + neon_sat_mul_h_scalar,\ + neon_sat_mul_h_scalar_q,\ + neon_sat_mul_s_scalar,\ + neon_sat_mul_s_scalar_q,\ + neon_sat_mul_h_scalar_long,\ + neon_sat_mul_s_scalar_long,\ +\ + neon_mla_b,\ + neon_mla_b_q,\ + neon_mla_h,\ + neon_mla_h_q,\ + neon_mla_s,\ + neon_mla_s_q,\ + neon_mla_b_long,\ + neon_mla_h_long,\ + neon_mla_s_long,\ + neon_mla_h_scalar,\ + neon_mla_h_scalar_q,\ + neon_mla_s_scalar,\ + neon_mla_s_scalar_q,\ + neon_mla_h_scalar_long,\ + neon_mla_s_scalar_long,\ +\ + neon_sat_mla_b_long,\ + neon_sat_mla_h_long,\ + neon_sat_mla_s_long,\ + neon_sat_mla_h_scalar_long,\ + neon_sat_mla_s_scalar_long,\ +\ + neon_to_gp,\ + neon_to_gp_q,\ + neon_from_gp,\ + neon_from_gp_q,\ +\ + neon_ldr,\ + neon_load1_1reg,\ + neon_load1_1reg_q,\ + neon_load1_2reg,\ + neon_load1_2reg_q,\ + neon_load1_3reg,\ + neon_load1_3reg_q,\ + neon_load1_4reg,\ + neon_load1_4reg_q,\ + neon_load1_all_lanes,\ + neon_load1_all_lanes_q,\ + neon_load1_one_lane,\ + neon_load1_one_lane_q,\ +\ + neon_load2_2reg,\ + neon_load2_2reg_q,\ + neon_load2_4reg,\ + neon_load2_4reg_q,\ + neon_load2_all_lanes,\ + neon_load2_all_lanes_q,\ + neon_load2_one_lane,\ + neon_load2_one_lane_q,\ +\ + neon_load3_3reg,\ + neon_load3_3reg_q,\ + neon_load3_all_lanes,\ + neon_load3_all_lanes_q,\ + neon_load3_one_lane,\ + neon_load3_one_lane_q,\ +\ + neon_load4_4reg,\ + neon_load4_4reg_q,\ + neon_load4_all_lanes,\ + neon_load4_all_lanes_q,\ + neon_load4_one_lane,\ + neon_load4_one_lane_q,\ +\ + neon_str,\ + neon_store1_1reg,\ + neon_store1_1reg_q,\ + neon_store1_2reg,\ + neon_store1_2reg_q,\ + neon_store1_3reg,\ + neon_store1_3reg_q,\ + neon_store1_4reg,\ + neon_store1_4reg_q,\ + neon_store1_one_lane,\ + neon_store1_one_lane_q,\ +\ + neon_store2_2reg,\ + neon_store2_2reg_q,\ + neon_store2_4reg,\ + neon_store2_4reg_q,\ + neon_store2_one_lane,\ + neon_store2_one_lane_q,\ +\ + neon_store3_3reg,\ + neon_store3_3reg_q,\ + neon_store3_one_lane,\ + neon_store3_one_lane_q,\ +\ + neon_store4_4reg,\ + neon_store4_4reg_q,\ + neon_store4_one_lane,\ + neon_store4_one_lane_q,\ +\ + neon_fp_abs_s,\ + neon_fp_abs_s_q,\ + neon_fp_abs_d,\ + neon_fp_abs_d_q,\ + neon_fp_neg_s,\ + neon_fp_neg_s_q,\ + neon_fp_neg_d,\ + neon_fp_neg_d_q,\ +\ + neon_fp_abd_s,\ + neon_fp_abd_s_q,\ + neon_fp_abd_d,\ + neon_fp_abd_d_q,\ + neon_fp_addsub_s,\ + neon_fp_addsub_s_q,\ + neon_fp_addsub_d,\ + neon_fp_addsub_d_q,\ + neon_fp_compare_s,\ + neon_fp_compare_s_q,\ + neon_fp_compare_d,\ + neon_fp_compare_d_q,\ + neon_fp_minmax_s,\ + neon_fp_minmax_s_q,\ + neon_fp_minmax_d,\ + neon_fp_minmax_d_q,\ +\ + neon_fp_reduc_add_s,\ + neon_fp_reduc_add_s_q,\ + neon_fp_reduc_add_d,\ + neon_fp_reduc_add_d_q,\ + neon_fp_reduc_minmax_s,\ + neon_fp_reduc_minmax_s_q,\ + neon_fp_reduc_minmax_d,\ + neon_fp_reduc_minmax_d_q,\ +\ + neon_fp_cvt_narrow_s_q,\ + neon_fp_cvt_narrow_d_q,\ + neon_fp_cvt_widen_h,\ + neon_fp_cvt_widen_s,\ +\ + neon_fp_to_int_s,\ + neon_fp_to_int_s_q,\ + neon_fp_to_int_d,\ + neon_fp_to_int_d_q,\ + neon_int_to_fp_s,\ + neon_int_to_fp_s_q,\ + neon_int_to_fp_d,\ + neon_int_to_fp_d_q,\ + neon_fp_round_s,\ + neon_fp_round_s_q,\ + neon_fp_round_d,\ + neon_fp_round_d_q,\ +\ + neon_fp_recpe_s,\ + neon_fp_recpe_s_q,\ + neon_fp_recpe_d,\ + neon_fp_recpe_d_q,\ + neon_fp_recps_s,\ + neon_fp_recps_s_q,\ + neon_fp_recps_d,\ + neon_fp_recps_d_q,\ + neon_fp_recpx_s,\ + neon_fp_recpx_s_q,\ + neon_fp_recpx_d,\ + neon_fp_recpx_d_q,\ +\ + neon_fp_rsqrte_s,\ + neon_fp_rsqrte_s_q,\ + neon_fp_rsqrte_d,\ + neon_fp_rsqrte_d_q,\ + neon_fp_rsqrts_s,\ + neon_fp_rsqrts_s_q,\ + neon_fp_rsqrts_d,\ + neon_fp_rsqrts_d_q,\ +\ + neon_fp_mul_s,\ + neon_fp_mul_s_q,\ + neon_fp_mul_s_scalar,\ + neon_fp_mul_s_scalar_q,\ + neon_fp_mul_d,\ + neon_fp_mul_d_q,\ + neon_fp_mul_d_scalar_q,\ +\ + neon_fp_mla_s,\ + neon_fp_mla_s_q,\ + neon_fp_mla_s_scalar,\ + neon_fp_mla_s_scalar_q,\ + neon_fp_mla_d,\ + neon_fp_mla_d_q,\ + neon_fp_mla_d_scalar_q,\ +\ + neon_fp_sqrt_s,\ + neon_fp_sqrt_s_q,\ + neon_fp_sqrt_d,\ + neon_fp_sqrt_d_q,\ + neon_fp_div_s,\ + neon_fp_div_s_q,\ + neon_fp_div_d,\ + neon_fp_div_d_q,\ +\ + crypto_aes,\ + crypto_sha1_xor,\ + crypto_sha1_fast,\ + crypto_sha1_slow,\ + crypto_sha256_fast,\ + crypto_sha256_slow" + (const_string "untyped")) + +; Is this an (integer side) multiply with a 32-bit (or smaller) result? +(define_attr "mul32" "no,yes" + (if_then_else + (eq_attr "type" + "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\ + smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld") + (const_string "yes") + (const_string "no"))) + +; Is this an (integer side) multiply with a 64-bit result? +(define_attr "mul64" "no,yes" + (if_then_else + (eq_attr "type" + "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals") + (const_string "yes") + (const_string "no"))) diff --git a/gcc-4.9/gcc/config/arm/uclinux-eabi.h b/gcc-4.9/gcc/config/arm/uclinux-eabi.h new file mode 100644 index 000000000..b5055ce40 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/uclinux-eabi.h @@ -0,0 +1,67 @@ +/* Definitions for ARM EABI ucLinux + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by Paul Brook + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Override settings that are different to the uclinux-elf or + bpabi defaults. */ + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE | MASK_INTERWORK) + +/* On EABI GNU/Linux, we want both the BPABI builtins and the + GNU/Linux builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + TARGET_BPABI_CPP_BUILTINS(); \ + builtin_define ("__uClinux__"); \ + builtin_define ("__gnu_linux__"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + } \ + while (false) + +#undef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux_eabi -elf2flt" \ + " --pic-veneer --target2=abs" + +/* We default to the "aapcs-linux" ABI so that enums are int-sized by + default. */ +#undef ARM_DEFAULT_ABI +#define ARM_DEFAULT_ABI ARM_ABI_AAPCS_LINUX + +/* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ +#undef CLEAR_INSN_CACHE +#define CLEAR_INSN_CACHE(BEG, END) \ +{ \ + register unsigned long _beg __asm ("a1") = (unsigned long) (BEG); \ + register unsigned long _end __asm ("a2") = (unsigned long) (END); \ + register unsigned long _flg __asm ("a3") = 0; \ + register unsigned long _scno __asm ("r7") = 0xf0002; \ + __asm __volatile ("swi 0x0 @ sys_cacheflush" \ + : "=r" (_beg) \ + : "0" (_beg), "r" (_end), "r" (_flg), "r" (_scno)); \ +} + +#define ARM_TARGET2_DWARF_FORMAT DW_EH_PE_absptr diff --git a/gcc-4.9/gcc/config/arm/uclinux-elf.h b/gcc-4.9/gcc/config/arm/uclinux-elf.h new file mode 100644 index 000000000..5cd4fe527 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/uclinux-elf.h @@ -0,0 +1,84 @@ +/* Definitions for ARM running ucLinux using ELF + Copyright (C) 1999-2014 Free Software Foundation, Inc. + Contributed by Philip Blundell + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* We don't want a PLT. */ +#undef NEED_PLT_RELOC +#define NEED_PLT_RELOC 0 + +#undef TARGET_DEFAULT +#define TARGET_DEFAULT (MASK_SINGLE_PIC_BASE) + +/* NOTE: The remaining definitions in this file are needed because uclinux + does not use config/linux.h. */ + +/* Add GNU/Linux builtins. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__uClinux__"); \ + builtin_define ("__gnu_linux__"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + } \ + while (false) + +/* Do not assume anything about header files. */ +#define NO_IMPLICIT_EXTERN_C + +/* The GNU C++ standard library requires that these macros be defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)" + +#undef SUBTARGET_EXTRA_LINK_SPEC +#define SUBTARGET_EXTRA_LINK_SPEC " -m armelf_linux" + +/* Now we define the strings used to build the spec file. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt1%O%s crti%O%s crtbegin%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef CC1_SPEC +#define CC1_SPEC "%{profile:-p}" + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %G %L %{static:--end-group}%{!static:%G %L}" + +/* Use --as-needed -lgcc_s for eh support. */ +#ifdef HAVE_LD_AS_NEEDED +#define USE_LD_AS_NEEDED 1 +#endif + +#undef LINK_SPEC +#define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X -elf2flt" + +#undef LIB_SPEC +#define LIB_SPEC \ + "%{pthread:-lpthread} \ + %{shared:-lc} \ + %{!shared:%{profile:-lc_p}%{!profile:-lc}}" + +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc-4.9/gcc/config/arm/unknown-elf.h b/gcc-4.9/gcc/config/arm/unknown-elf.h new file mode 100644 index 000000000..ec6f9a488 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/unknown-elf.h @@ -0,0 +1,96 @@ +/* Definitions for non-Linux based ARM systems using ELF + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Catherine Moore + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* elfos.h should have already been included. Now just override + any conflicting definitions and add any extras. */ + +/* Run-time Target Specification. */ + +/* Default to using software floating point. */ +#ifndef TARGET_DEFAULT +#define TARGET_DEFAULT (0) +#endif + +/* Now we define the strings used to build the spec file. */ +#define UNKNOWN_ELF_STARTFILE_SPEC " crti%O%s crtbegin%O%s crt0%O%s" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC UNKNOWN_ELF_STARTFILE_SPEC + +#define UNKNOWN_ELF_ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC UNKNOWN_ELF_ENDFILE_SPEC + +/* The __USES_INITFINI__ define is tested in newlib/libc/sys/arm/crt0.S + to see if it needs to invoked _init() and _fini(). */ +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__USES_INITFINI__" + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Return a nonzero value if DECL has a section attribute. */ +#define IN_NAMED_SECTION_P(DECL) \ + ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ + && DECL_SECTION_NAME (DECL) != NULL_TREE) + +#undef ASM_OUTPUT_ALIGNED_BSS +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + if (IN_NAMED_SECTION_P (DECL)) \ + switch_to_section (get_named_section (DECL, NULL, 0)); \ + else \ + switch_to_section (bss_section); \ + \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + \ + last_assemble_variable_decl = DECL; \ + ASM_DECLARE_OBJECT_NAME (FILE, NAME, DECL); \ + ASM_OUTPUT_SKIP (FILE, SIZE ? (int)(SIZE) : 1); \ + } \ + while (0) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + if ((DECL) != NULL && IN_NAMED_SECTION_P (DECL)) \ + switch_to_section (get_named_section (DECL, NULL, 0)); \ + else \ + switch_to_section (bss_section); \ + \ + ASM_OUTPUT_ALIGN (FILE, floor_log2 (ALIGN / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + fprintf (FILE, "\t.space\t%d\n", SIZE ? (int)(SIZE) : 1); \ + } \ + while (0) + +#ifndef SUBTARGET_CPU_DEFAULT +#define SUBTARGET_CPU_DEFAULT TARGET_CPU_arm7tdmi +#endif + +/* The libgcc udivmod functions may throw exceptions. If newlib is + configured to support long longs in I/O, then printf will depend on + udivmoddi4, which will depend on the exception unwind routines, + which will depend on abort, which is defined in libc. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC "--start-group %G %L --end-group" diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md new file mode 100644 index 000000000..8caa953bc --- /dev/null +++ b/gcc-4.9/gcc/config/arm/unspecs.md @@ -0,0 +1,305 @@ +;; Unspec defintions. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; Contributed by ARM Ltd. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; UNSPEC Usage: +;; Note: sin and cos are no-longer used. +;; Unspec enumerators for Neon are defined in neon.md. +;; Unspec enumerators for iwmmxt2 are defined in iwmmxt2.md + +(define_c_enum "unspec" [ + UNSPEC_PUSH_MULT ; `push multiple' operation: + ; operand 0 is the first register, + ; subsequent registers are in parallel (use ...) + ; expressions. + UNSPEC_PIC_SYM ; A symbol that has been treated properly for pic + ; usage, that is, we will add the pic_register + ; value to it before trying to dereference it. + UNSPEC_PIC_BASE ; Add PC and all but the last operand together, + ; The last operand is the number of a PIC_LABEL + ; that points at the containing instruction. + UNSPEC_PRLG_STK ; A special barrier that prevents frame accesses + ; being scheduled before the stack adjustment insn. + UNSPEC_REGISTER_USE ; As USE insns are not meaningful after reload, + ; this unspec is used to prevent the deletion of + ; instructions setting registers for EH handling + ; and stack frame generation. Operand 0 is the + ; register to "use". + UNSPEC_CHECK_ARCH ; Set CCs to indicate 26-bit or 32-bit mode. + UNSPEC_WSHUFH ; Used by the intrinsic form of the iWMMXt WSHUFH instruction. + UNSPEC_WACC ; Used by the intrinsic form of the iWMMXt WACC instruction. + UNSPEC_TMOVMSK ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction. + UNSPEC_WSAD ; Used by the intrinsic form of the iWMMXt WSAD instruction. + UNSPEC_WSADZ ; Used by the intrinsic form of the iWMMXt WSADZ instruction. + UNSPEC_WMACS ; Used by the intrinsic form of the iWMMXt WMACS instruction. + UNSPEC_WMACU ; Used by the intrinsic form of the iWMMXt WMACU instruction. + UNSPEC_WMACSZ ; Used by the intrinsic form of the iWMMXt WMACSZ instruction. + UNSPEC_WMACUZ ; Used by the intrinsic form of the iWMMXt WMACUZ instruction. + UNSPEC_CLRDI ; Used by the intrinsic form of the iWMMXt CLRDI instruction. + UNSPEC_WALIGNI ; Used by the intrinsic form of the iWMMXt WALIGN instruction. + UNSPEC_TLS ; A symbol that has been treated properly for TLS usage. + UNSPEC_PIC_LABEL ; A label used for PIC access that does not appear in the + ; instruction stream. + UNSPEC_PIC_OFFSET ; A symbolic 12-bit OFFSET that has been treated + ; correctly for PIC usage. + UNSPEC_GOTSYM_OFF ; The offset of the start of the GOT from a + ; a given symbolic address. + UNSPEC_THUMB1_CASESI ; A Thumb1 compressed dispatch-table call. + UNSPEC_RBIT ; rbit operation. + UNSPEC_SYMBOL_OFFSET ; The offset of the start of the symbol from + ; another symbolic address. + UNSPEC_MEMORY_BARRIER ; Represent a memory barrier. + UNSPEC_UNALIGNED_LOAD ; Used to represent ldr/ldrh instructions that access + ; unaligned locations, on architectures which support + ; that. + UNSPEC_UNALIGNED_STORE ; Same for str/strh. + UNSPEC_PIC_UNIFIED ; Create a common pic addressing form. + UNSPEC_LL ; Represent an unpaired load-register-exclusive. + UNSPEC_VRINTZ ; Represent a float to integral float rounding + ; towards zero. + UNSPEC_VRINTP ; Represent a float to integral float rounding + ; towards +Inf. + UNSPEC_VRINTM ; Represent a float to integral float rounding + ; towards -Inf. + UNSPEC_VRINTR ; Represent a float to integral float rounding + ; FPSCR rounding mode. + UNSPEC_VRINTX ; Represent a float to integral float rounding + ; FPSCR rounding mode and signal inexactness. + UNSPEC_VRINTA ; Represent a float to integral float rounding + ; towards nearest, ties away from zero. +]) + +(define_c_enum "unspec" [ + UNSPEC_WADDC ; Used by the intrinsic form of the iWMMXt WADDC instruction. + UNSPEC_WABS ; Used by the intrinsic form of the iWMMXt WABS instruction. + UNSPEC_WQMULWMR ; Used by the intrinsic form of the iWMMXt WQMULWMR instruction. + UNSPEC_WQMULMR ; Used by the intrinsic form of the iWMMXt WQMULMR instruction. + UNSPEC_WQMULWM ; Used by the intrinsic form of the iWMMXt WQMULWM instruction. + UNSPEC_WQMULM ; Used by the intrinsic form of the iWMMXt WQMULM instruction. + UNSPEC_WQMIAxyn ; Used by the intrinsic form of the iWMMXt WMIAxyn instruction. + UNSPEC_WQMIAxy ; Used by the intrinsic form of the iWMMXt WMIAxy instruction. + UNSPEC_TANDC ; Used by the intrinsic form of the iWMMXt TANDC instruction. + UNSPEC_TORC ; Used by the intrinsic form of the iWMMXt TORC instruction. + UNSPEC_TORVSC ; Used by the intrinsic form of the iWMMXt TORVSC instruction. + UNSPEC_TEXTRC ; Used by the intrinsic form of the iWMMXt TEXTRC instruction. +]) + + +;; UNSPEC_VOLATILE Usage: + +(define_c_enum "unspecv" [ + VUNSPEC_BLOCKAGE ; `blockage' insn to prevent scheduling across an + ; insn in the code. + VUNSPEC_EPILOGUE ; `epilogue' insn, used to represent any part of the + ; instruction epilogue sequence that isn't expanded + ; into normal RTL. Used for both normal and sibcall + ; epilogues. + VUNSPEC_THUMB1_INTERWORK ; `prologue_thumb1_interwork' insn, used to swap + ; modes from arm to thumb. + VUNSPEC_ALIGN ; `align' insn. Used at the head of a minipool table + ; for inlined constants. + VUNSPEC_POOL_END ; `end-of-table'. Used to mark the end of a minipool + ; table. + VUNSPEC_POOL_1 ; `pool-entry(1)'. An entry in the constant pool for + ; an 8-bit object. + VUNSPEC_POOL_2 ; `pool-entry(2)'. An entry in the constant pool for + ; a 16-bit object. + VUNSPEC_POOL_4 ; `pool-entry(4)'. An entry in the constant pool for + ; a 32-bit object. + VUNSPEC_POOL_8 ; `pool-entry(8)'. An entry in the constant pool for + ; a 64-bit object. + VUNSPEC_POOL_16 ; `pool-entry(16)'. An entry in the constant pool for + ; a 128-bit object. + VUNSPEC_TMRC ; Used by the iWMMXt TMRC instruction. + VUNSPEC_TMCR ; Used by the iWMMXt TMCR instruction. + VUNSPEC_ALIGN8 ; 8-byte alignment version of VUNSPEC_ALIGN + VUNSPEC_WCMP_EQ ; Used by the iWMMXt WCMPEQ instructions + VUNSPEC_WCMP_GTU ; Used by the iWMMXt WCMPGTU instructions + VUNSPEC_WCMP_GT ; Used by the iwMMXT WCMPGT instructions + VUNSPEC_EH_RETURN ; Use to override the return address for exception + ; handling. + VUNSPEC_ATOMIC_CAS ; Represent an atomic compare swap. + VUNSPEC_ATOMIC_XCHG ; Represent an atomic exchange. + VUNSPEC_ATOMIC_OP ; Represent an atomic operation. + VUNSPEC_LL ; Represent a load-register-exclusive. + VUNSPEC_SC ; Represent a store-register-exclusive. + VUNSPEC_LAX ; Represent a load-register-acquire-exclusive. + VUNSPEC_SLX ; Represent a store-register-release-exclusive. + VUNSPEC_LDA ; Represent a store-register-acquire. + VUNSPEC_STL ; Represent a store-register-release. +]) + +;; Enumerators for NEON unspecs. +(define_c_enum "unspec" [ + UNSPEC_ASHIFT_SIGNED + UNSPEC_ASHIFT_UNSIGNED + UNSPEC_CRC32B + UNSPEC_CRC32H + UNSPEC_CRC32W + UNSPEC_CRC32CB + UNSPEC_CRC32CH + UNSPEC_CRC32CW + UNSPEC_AESD + UNSPEC_AESE + UNSPEC_AESIMC + UNSPEC_AESMC + UNSPEC_SHA1C + UNSPEC_SHA1M + UNSPEC_SHA1P + UNSPEC_SHA1H + UNSPEC_SHA1SU0 + UNSPEC_SHA1SU1 + UNSPEC_SHA256H + UNSPEC_SHA256H2 + UNSPEC_SHA256SU0 + UNSPEC_SHA256SU1 + UNSPEC_VMULLP64 + UNSPEC_LOAD_COUNT + UNSPEC_VABD + UNSPEC_VABDL + UNSPEC_VADD + UNSPEC_VADDHN + UNSPEC_VADDL + UNSPEC_VADDW + UNSPEC_VBSL + UNSPEC_VCAGE + UNSPEC_VCAGT + UNSPEC_VCEQ + UNSPEC_VCGE + UNSPEC_VCGEU + UNSPEC_VCGT + UNSPEC_VCGTU + UNSPEC_VCLS + UNSPEC_VCONCAT + UNSPEC_VCVT + UNSPEC_VCVT_N + UNSPEC_VEXT + UNSPEC_VHADD + UNSPEC_VHSUB + UNSPEC_VLD1 + UNSPEC_VLD1_LANE + UNSPEC_VLD2 + UNSPEC_VLD2_DUP + UNSPEC_VLD2_LANE + UNSPEC_VLD3 + UNSPEC_VLD3A + UNSPEC_VLD3B + UNSPEC_VLD3_DUP + UNSPEC_VLD3_LANE + UNSPEC_VLD4 + UNSPEC_VLD4A + UNSPEC_VLD4B + UNSPEC_VLD4_DUP + UNSPEC_VLD4_LANE + UNSPEC_VMAX + UNSPEC_VMIN + UNSPEC_VMLA + UNSPEC_VMLAL + UNSPEC_VMLA_LANE + UNSPEC_VMLAL_LANE + UNSPEC_VMLS + UNSPEC_VMLSL + UNSPEC_VMLS_LANE + UNSPEC_VMLSL_LANE + UNSPEC_VMOVL + UNSPEC_VMOVN + UNSPEC_VMUL + UNSPEC_VMULL + UNSPEC_VMUL_LANE + UNSPEC_VMULL_LANE + UNSPEC_VPADAL + UNSPEC_VPADD + UNSPEC_VPADDL + UNSPEC_VPMAX + UNSPEC_VPMIN + UNSPEC_VPSMAX + UNSPEC_VPSMIN + UNSPEC_VPUMAX + UNSPEC_VPUMIN + UNSPEC_VQABS + UNSPEC_VQADD + UNSPEC_VQDMLAL + UNSPEC_VQDMLAL_LANE + UNSPEC_VQDMLSL + UNSPEC_VQDMLSL_LANE + UNSPEC_VQDMULH + UNSPEC_VQDMULH_LANE + UNSPEC_VQDMULL + UNSPEC_VQDMULL_LANE + UNSPEC_VQMOVN + UNSPEC_VQMOVUN + UNSPEC_VQNEG + UNSPEC_VQSHL + UNSPEC_VQSHL_N + UNSPEC_VQSHLU_N + UNSPEC_VQSHRN_N + UNSPEC_VQSHRUN_N + UNSPEC_VQSUB + UNSPEC_VRECPE + UNSPEC_VRECPS + UNSPEC_VREV16 + UNSPEC_VREV32 + UNSPEC_VREV64 + UNSPEC_VRSQRTE + UNSPEC_VRSQRTS + UNSPEC_VSHL + UNSPEC_VSHLL_N + UNSPEC_VSHL_N + UNSPEC_VSHR_N + UNSPEC_VSHRN_N + UNSPEC_VSLI + UNSPEC_VSRA_N + UNSPEC_VSRI + UNSPEC_VST1 + UNSPEC_VST1_LANE + UNSPEC_VST2 + UNSPEC_VST2_LANE + UNSPEC_VST3 + UNSPEC_VST3A + UNSPEC_VST3B + UNSPEC_VST3_LANE + UNSPEC_VST4 + UNSPEC_VST4A + UNSPEC_VST4B + UNSPEC_VST4_LANE + UNSPEC_VSTRUCTDUMMY + UNSPEC_VSUB + UNSPEC_VSUBHN + UNSPEC_VSUBL + UNSPEC_VSUBW + UNSPEC_VTBL + UNSPEC_VTBX + UNSPEC_VTRN1 + UNSPEC_VTRN2 + UNSPEC_VTST + UNSPEC_VUZP1 + UNSPEC_VUZP2 + UNSPEC_VZIP1 + UNSPEC_VZIP2 + UNSPEC_MISALIGNED_ACCESS + UNSPEC_VCLE + UNSPEC_VCLT + UNSPEC_NVRINTZ + UNSPEC_NVRINTP + UNSPEC_NVRINTM + UNSPEC_NVRINTX + UNSPEC_NVRINTA + UNSPEC_NVRINTN +]) + diff --git a/gcc-4.9/gcc/config/arm/vec-common.md b/gcc-4.9/gcc/config/arm/vec-common.md new file mode 100644 index 000000000..ba0b58806 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/vec-common.md @@ -0,0 +1,136 @@ +;; Machine Description for shared bits common to IWMMXT and Neon. +;; Copyright (C) 2006-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Vector Moves + +(define_expand "mov" + [(set (match_operand:VALL 0 "nonimmediate_operand" "") + (match_operand:VALL 1 "general_operand" ""))] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ + if (can_create_pseudo_p ()) + { + if (!REG_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + else if (TARGET_NEON && CONSTANT_P (operands[1])) + { + operands[1] = neon_make_constant (operands[1]); + gcc_assert (operands[1] != NULL_RTX); + } + } +}) + +;; Vector arithmetic. Expanders are blank, then unnamed insns implement +;; patterns separately for IWMMXT and Neon. + +(define_expand "add3" + [(set (match_operand:VALL 0 "s_register_operand" "") + (plus:VALL (match_operand:VALL 1 "s_register_operand" "") + (match_operand:VALL 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "sub3" + [(set (match_operand:VALL 0 "s_register_operand" "") + (minus:VALL (match_operand:VALL 1 "s_register_operand" "") + (match_operand:VALL 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "mul3" + [(set (match_operand:VALLW 0 "s_register_operand" "") + (mult:VALLW (match_operand:VALLW 1 "s_register_operand" "") + (match_operand:VALLW 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (mode == V4HImode && TARGET_REALLY_IWMMXT)" +{ +}) + +(define_expand "smin3" + [(set (match_operand:VALLW 0 "s_register_operand" "") + (smin:VALLW (match_operand:VALLW 1 "s_register_operand" "") + (match_operand:VALLW 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "umin3" + [(set (match_operand:VINTW 0 "s_register_operand" "") + (umin:VINTW (match_operand:VINTW 1 "s_register_operand" "") + (match_operand:VINTW 2 "s_register_operand" "")))] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "smax3" + [(set (match_operand:VALLW 0 "s_register_operand" "") + (smax:VALLW (match_operand:VALLW 1 "s_register_operand" "") + (match_operand:VALLW 2 "s_register_operand" "")))] + "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) + || flag_unsafe_math_optimizations)) + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "umax3" + [(set (match_operand:VINTW 0 "s_register_operand" "") + (umax:VINTW (match_operand:VINTW 1 "s_register_operand" "") + (match_operand:VINTW 2 "s_register_operand" "")))] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ +}) + +(define_expand "vec_perm_const" + [(match_operand:VALL 0 "s_register_operand" "") + (match_operand:VALL 1 "s_register_operand" "") + (match_operand:VALL 2 "s_register_operand" "") + (match_operand: 3 "" "")] + "TARGET_NEON + || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" +{ + if (arm_expand_vec_perm_const (operands[0], operands[1], + operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "vec_perm" + [(match_operand:VE 0 "s_register_operand" "") + (match_operand:VE 1 "s_register_operand" "") + (match_operand:VE 2 "s_register_operand" "") + (match_operand:VE 3 "s_register_operand" "")] + "TARGET_NEON && !BYTES_BIG_ENDIAN" +{ + arm_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]); + DONE; +}) diff --git a/gcc-4.9/gcc/config/arm/vfp.md b/gcc-4.9/gcc/config/arm/vfp.md new file mode 100644 index 000000000..e1a48eeea --- /dev/null +++ b/gcc-4.9/gcc/config/arm/vfp.md @@ -0,0 +1,1330 @@ +;; ARM VFP instruction patterns +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ + +;; SImode moves +;; ??? For now do not allow loading constants into vfp regs. This causes +;; problems because small constants get converted into adds. +(define_insn "*arm_movsi_vfp" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))] + "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT + && ( s_register_operand (operands[0], SImode) + || s_register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: case 1: + return \"mov%?\\t%0, %1\"; + case 2: + return \"mvn%?\\t%0, #%B1\"; + case 3: + return \"movw%?\\t%0, %1\"; + case 4: + return \"ldr%?\\t%0, %1\"; + case 5: + return \"str%?\\t%1, %0\"; + case 6: + return \"fmsr%?\\t%0, %1\\t%@ int\"; + case 7: + return \"fmrs%?\\t%0, %1\\t%@ int\"; + case 8: + return \"fcpys%?\\t%0, %1\\t%@ int\"; + case 9: case 10: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") + (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] +) + +;; See thumb2.md:thumb2_movsi_insn for an explanation of the split +;; high/low register alternatives for loads and stores here. +;; The l/Py alternative should come after r/I to ensure that the short variant +;; is chosen with length 2 when the instruction is predicated for +;; arm_restrict_it. +(define_insn "*thumb2_movsi_vfp" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") + (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] + "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT + && ( s_register_operand (operands[0], SImode) + || s_register_operand (operands[1], SImode))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + return \"mov%?\\t%0, %1\"; + case 3: + return \"mvn%?\\t%0, #%B1\"; + case 4: + return \"movw%?\\t%0, %1\"; + case 5: + case 6: + return \"ldr%?\\t%0, %1\"; + case 7: + case 8: + return \"str%?\\t%1, %0\"; + case 9: + return \"fmsr%?\\t%0, %1\\t%@ int\"; + case 10: + return \"fmrs%?\\t%0, %1\\t%@ int\"; + case 11: + return \"fcpys%?\\t%0, %1\\t%@ int\"; + case 12: case 13: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") + (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] +) + + +;; DImode moves + +(define_insn "*movdi_vfp" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv") + (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8 + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)) + && !(TARGET_NEON && CONST_INT_P (operands[1]) + && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + case 3: + return \"#\"; + case 4: + case 5: + case 6: + return output_move_double (operands, true, NULL); + case 7: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 8: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 9: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 10: case 11: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored") + (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8) + (eq_attr "alternative" "2") (const_int 12) + (eq_attr "alternative" "3") (const_int 16) + (eq_attr "alternative" "9") + (if_then_else + (match_test "TARGET_VFP_SINGLE") + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "arm_pool_range" "*,*,*,*,1020,4096,*,*,*,*,1020,*") + (set_attr "thumb2_pool_range" "*,*,*,*,1018,4094,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*") + (set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")] +) + +(define_insn "*movdi_vfp_cortexa8" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv") + (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune == cortexa8 + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode)) + && !(TARGET_NEON && CONST_INT_P (operands[1]) + && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))" + "* + switch (which_alternative) + { + case 0: + case 1: + case 2: + case 3: + return \"#\"; + case 4: + case 5: + case 6: + return output_move_double (operands, true, NULL); + case 7: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 8: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 9: + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 10: case 11: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "type" "multiple,multiple,multiple,multiple,load2,load2,store2,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored") + (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8) + (eq_attr "alternative" "2") (const_int 12) + (eq_attr "alternative" "3") (const_int 16) + (eq_attr "alternative" "4,5,6") + (symbol_ref + "arm_count_output_move_double_insns (operands) \ + * 4")] + (const_int 4))) + (set_attr "predicable" "yes") + (set_attr "arm_pool_range" "*,*,*,*,1018,4094,*,*,*,*,1018,*") + (set_attr "thumb2_pool_range" "*,*,*,*,1018,4094,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*") + (set (attr "ce_count") + (symbol_ref "get_attr_length (insn) / 4")) + (set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")] + ) + +;; HFmode moves +(define_insn "*movhf_vfp_neon" + [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* S register from memory */ + return \"vld1.16\\t{%z0}, %A1\"; + case 1: /* memory from S register */ + return \"vst1.16\\t{%z1}, %A0\"; + case 2: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 3: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 4: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 5: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 6: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 7: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 8: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "neon_load1_1reg,neon_store1_1reg,\ + load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple") + (set_attr "length" "4,4,4,4,4,4,4,4,8")] +) + +;; FP16 without element load/store instructions. +(define_insn "*movhf_vfp" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 2: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 3: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 4: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 5: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 6: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,fmov,mov_reg,f_mcr,f_mrc,multiple") + (set_attr "length" "4,4,4,4,4,4,8")] +) + + +;; SFmode moves +;; Disparage the w<->r cases because reloading an invalid address is +;; preferable to loading the value via integer registers. + +(define_insn "*movsf_vfp" + [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t ,Uv,r ,m,t,r") + (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( s_register_operand (operands[0], SFmode) + || s_register_operand (operands[1], SFmode))" + "* + switch (which_alternative) + { + case 0: + return \"fmsr%?\\t%0, %1\"; + case 1: + return \"fmrs%?\\t%0, %1\"; + case 2: + return \"fconsts%?\\t%0, #%G1\"; + case 3: case 4: + return output_move_vfp (operands); + case 5: + return \"ldr%?\\t%0, %1\\t%@ float\"; + case 6: + return \"str%?\\t%1, %0\\t%@ float\"; + case 7: + return \"fcpys%?\\t%0, %1\"; + case 8: + return \"mov%?\\t%0, %1\\t%@ float\"; + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "type" + "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg") + (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")] +) + +(define_insn "*thumb2_movsf_vfp" + [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t ,Uv,r ,m,t,r") + (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP + && ( s_register_operand (operands[0], SFmode) + || s_register_operand (operands[1], SFmode))" + "* + switch (which_alternative) + { + case 0: + return \"fmsr%?\\t%0, %1\"; + case 1: + return \"fmrs%?\\t%0, %1\"; + case 2: + return \"fconsts%?\\t%0, #%G1\"; + case 3: case 4: + return output_move_vfp (operands); + case 5: + return \"ldr%?\\t%0, %1\\t%@ float\"; + case 6: + return \"str%?\\t%1, %0\\t%@ float\"; + case 7: + return \"fcpys%?\\t%0, %1\"; + case 8: + return \"mov%?\\t%0, %1\\t%@ float\"; + default: + gcc_unreachable (); + } + " + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" + "f_mcr,f_mrc,fconsts,f_loads,f_stores,load1,store1,fmov,mov_reg") + (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] +) + +;; DFmode moves + +(define_insn "*movdf_vfp" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r") + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + case 0: + return \"fmdrr%?\\t%P0, %Q1, %R1\"; + case 1: + return \"fmrrd%?\\t%Q0, %R0, %P1\"; + case 2: + gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: + return output_move_vfp (operands); + case 5: case 6: + return output_move_double (operands, true, NULL); + case 7: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; + else + return \"fcpyd%?\\t%P0, %P1\"; + case 8: + return \"#\"; + default: + gcc_unreachable (); + } + } + " + [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\ + load2,store2,ffarithd,multiple") + (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (match_test "TARGET_VFP_SINGLE") + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "predicable" "yes") + (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")] +) + +(define_insn "*thumb2_movdf_vfp" + [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r") + (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP + && ( register_operand (operands[0], DFmode) + || register_operand (operands[1], DFmode))" + "* + { + switch (which_alternative) + { + case 0: + return \"fmdrr%?\\t%P0, %Q1, %R1\"; + case 1: + return \"fmrrd%?\\t%Q0, %R0, %P1\"; + case 2: + gcc_assert (TARGET_VFP_DOUBLE); + return \"fconstd%?\\t%P0, #%G1\"; + case 3: case 4: + return output_move_vfp (operands); + case 5: case 6: case 8: + return output_move_double (operands, true, NULL); + case 7: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\"; + else + return \"fcpyd%?\\t%P0, %P1\"; + default: + abort (); + } + } + " + [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\ + f_stored,load2,store2,ffarithd,multiple") + (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) + (eq_attr "alternative" "7") + (if_then_else + (match_test "TARGET_VFP_SINGLE") + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*") + (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] +) + + +;; Conditional move patterns + +(define_insn "*movsfcc_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r") + (if_then_else:SF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") + (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fcpys%D3\\t%0, %2 + fcpys%d3\\t%0, %1 + fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1 + fmsr%D3\\t%0, %2 + fmsr%d3\\t%0, %1 + fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1 + fmrs%D3\\t%0, %2 + fmrs%d3\\t%0, %1 + fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,4,4,8,4,4,8") + (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")] +) + +(define_insn "*thumb2_movsfcc_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r") + (if_then_else:SF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") + (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it" + "@ + it\\t%D3\;fcpys%D3\\t%0, %2 + it\\t%d3\;fcpys%d3\\t%0, %1 + ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1 + it\\t%D3\;fmsr%D3\\t%0, %2 + it\\t%d3\;fmsr%d3\\t%0, %1 + ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1 + it\\t%D3\;fmrs%D3\\t%0, %2 + it\\t%d3\;fmrs%d3\\t%0, %1 + ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1" + [(set_attr "conds" "use") + (set_attr "length" "6,6,10,6,6,10,6,6,10") + (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")] +) + +(define_insn "*movdfcc_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r") + (if_then_else:DF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") + (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcpyd%D3\\t%P0, %P2 + fcpyd%d3\\t%P0, %P1 + fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1 + fmdrr%D3\\t%P0, %Q2, %R2 + fmdrr%d3\\t%P0, %Q1, %R1 + fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1 + fmrrd%D3\\t%Q0, %R0, %P2 + fmrrd%d3\\t%Q0, %R0, %P1 + fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" + [(set_attr "conds" "use") + (set_attr "length" "4,4,8,4,4,8,4,4,8") + (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")] +) + +(define_insn "*thumb2_movdfcc_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r") + (if_then_else:DF + (match_operator 3 "arm_comparison_operator" + [(match_operand 4 "cc_register" "") (const_int 0)]) + (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w") + (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it" + "@ + it\\t%D3\;fcpyd%D3\\t%P0, %P2 + it\\t%d3\;fcpyd%d3\\t%P0, %P1 + ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1 + it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2 + it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1 + ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1 + it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2 + it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1 + ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1" + [(set_attr "conds" "use") + (set_attr "length" "6,6,10,6,6,10,6,6,10") + (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")] +) + + +;; Sign manipulation functions + +(define_insn "*abssf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (abs:SF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fabss%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffariths")] +) + +(define_insn "*absdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (abs:DF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fabsd%?\\t%P0, %P1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffarithd")] +) + +(define_insn "*negsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t,?r") + (neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fnegs%?\\t%0, %1 + eor%?\\t%0, %1, #-2147483648" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffariths")] +) + +(define_insn_and_split "*negdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w,?r,?r") + (neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fnegd%?\\t%P0, %P1 + # + #" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed + && arm_general_register_operand (operands[0], DFmode)" + [(set (match_dup 0) (match_dup 1))] + " + if (REGNO (operands[0]) == REGNO (operands[1])) + { + operands[0] = gen_highpart (SImode, operands[0]); + operands[1] = gen_rtx_XOR (SImode, operands[0], GEN_INT (0x80000000)); + } + else + { + rtx in_hi, in_lo, out_hi, out_lo; + + in_hi = gen_rtx_XOR (SImode, gen_highpart (SImode, operands[1]), + GEN_INT (0x80000000)); + in_lo = gen_lowpart (SImode, operands[1]); + out_hi = gen_highpart (SImode, operands[0]); + out_lo = gen_lowpart (SImode, operands[0]); + + if (REGNO (in_lo) == REGNO (out_hi)) + { + emit_insn (gen_rtx_SET (SImode, out_lo, in_lo)); + operands[0] = out_hi; + operands[1] = in_hi; + } + else + { + emit_insn (gen_rtx_SET (SImode, out_hi, in_hi)); + operands[0] = out_lo; + operands[1] = in_lo; + } + } + " + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "length" "4,4,8") + (set_attr "type" "ffarithd")] +) + + +;; Arithmetic insns + +(define_insn "*addsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (plus:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fadds%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fadds")] +) + +(define_insn "*adddf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (plus:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "faddd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "faddd")] +) + + +(define_insn "*subsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fsubs%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fadds")] +) + +(define_insn "*subdf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsubd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "faddd")] +) + + +;; Division insns + +(define_insn "*divsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (div:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fdivs%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fdivs")] +) + +(define_insn "*divdf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (div:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fdivd%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fdivd")] +) + + +;; Multiplication insns + +(define_insn "*mulsf3_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (mult:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmuls%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmuls")] +) + +(define_insn "*muldf3_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (mult:DF (match_operand:DF 1 "s_register_operand" "w") + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmuld%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmuld")] +) + +(define_insn "*mulsf3negsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t")) + (match_operand:SF 2 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fnmuls%?\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmuls")] +) + +(define_insn "*muldf3negdf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w")) + (match_operand:DF 2 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmuld%?\\t%P0, %P1, %P2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmuld")] +) + + +;; Multiply-accumulate insns + +;; 0 = 1 * 2 + 0 +(define_insn "*mulsf3addsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacs")] +) + +(define_insn "*muldf3adddf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (plus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmacd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacd")] +) + +;; 0 = 1 * 2 - 0 +(define_insn "*mulsf3subsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmscs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacs")] +) + +(define_insn "*muldf3subdf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fmscd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacd")] +) + +;; 0 = -(1 * 2) + 0 +(define_insn "*mulsf3negsfaddsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (match_operand:SF 1 "s_register_operand" "0") + (mult:SF (match_operand:SF 2 "s_register_operand" "t") + (match_operand:SF 3 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fnmacs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacs")] +) + +(define_insn "*fmuldf3negdfadddf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (match_operand:DF 1 "s_register_operand" "0") + (mult:DF (match_operand:DF 2 "s_register_operand" "w") + (match_operand:DF 3 "s_register_operand" "w"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmacd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacd")] +) + + +;; 0 = -(1 * 2) - 0 +(define_insn "*mulsf3negsfsubsf_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (minus:SF (mult:SF + (neg:SF (match_operand:SF 2 "s_register_operand" "t")) + (match_operand:SF 3 "s_register_operand" "t")) + (match_operand:SF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fnmscs%?\\t%0, %2, %3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacs")] +) + +(define_insn "*muldf3negdfsubdf_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (minus:DF (mult:DF + (neg:DF (match_operand:DF 2 "s_register_operand" "w")) + (match_operand:DF 3 "s_register_operand" "w")) + (match_operand:DF 1 "s_register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fnmscd%?\\t%P0, %P2, %P3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fmacd")] +) + +;; Fused-multiply-accumulate + +(define_insn "fma4" + [(set (match_operand:SDF 0 "register_operand" "=") + (fma:SDF (match_operand:SDF 1 "register_operand" "") + (match_operand:SDF 2 "register_operand" "") + (match_operand:SDF 3 "register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" + "vfma%?.\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffma")] +) + +(define_insn "*fmsub4" + [(set (match_operand:SDF 0 "register_operand" "=") + (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" + "")) + (match_operand:SDF 2 "register_operand" "") + (match_operand:SDF 3 "register_operand" "0")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" + "vfms%?.\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffma")] +) + +(define_insn "*fnmsub4" + [(set (match_operand:SDF 0 "register_operand" "=") + (fma:SDF (match_operand:SDF 1 "register_operand" "") + (match_operand:SDF 2 "register_operand" "") + (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" + "vfnms%?.\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffma")] +) + +(define_insn "*fnmadd4" + [(set (match_operand:SDF 0 "register_operand" "=") + (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" + "")) + (match_operand:SDF 2 "register_operand" "") + (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA" + "vfnma%?.\\t%0, %1, %2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "ffma")] +) + + +;; Conversion routines + +(define_insn "*extendsfdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fcvtds%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvt")] +) + +(define_insn "*truncdfsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fcvtsd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvt")] +) + +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "vcvtb%?.f32.f16\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvt")] +) + +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" + "vcvtb%?.f16.f32\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvt")] +) + +(define_insn "*truncsisf2_vfp" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "ftosizs%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvtf2i")] +) + +(define_insn "*truncsidf2_vfp" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "ftosizd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvtf2i")] +) + + +(define_insn "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "ftouizs%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvtf2i")] +) + +(define_insn "fixuns_truncdfsi2" + [(set (match_operand:SI 0 "s_register_operand" "=t") + (unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "ftouizd%?\\t%0, %P1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvtf2i")] +) + + +(define_insn "*floatsisf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float:SF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fsitos%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvti2f")] +) + +(define_insn "*floatsidf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (float:DF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsitod%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvti2f")] +) + + +(define_insn "floatunssisf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fuitos%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvti2f")] +) + +(define_insn "floatunssidf2" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fuitod%?\\t%P0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvti2f")] +) + + +;; Sqrt insns. + +(define_insn "*sqrtsf2_vfp" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fsqrts%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fsqrts")] +) + +(define_insn "*sqrtdf2_vfp" + [(set (match_operand:DF 0 "s_register_operand" "=w") + (sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "fsqrtd%?\\t%P0, %P1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fsqrtd")] +) + + +;; Patterns to split/copy vfp condition flags. + +(define_insn "*movcc_vfp" + [(set (reg CC_REGNUM) + (reg VFPCC_REGNUM))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "fmstat%?" + [(set_attr "conds" "set") + (set_attr "type" "f_flag")] +) + +(define_insn_and_split "*cmpsf_split_vfp" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:SF 0 "s_register_operand" "t") + (match_operand:SF 1 "vfp_compare_operand" "tG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_dup 0) + (match_dup 1))) + (set (reg:CCFP CC_REGNUM) + (reg:CCFP VFPCC_REGNUM))] + "" +) + +(define_insn_and_split "*cmpsf_trap_split_vfp" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:SF 0 "s_register_operand" "t") + (match_operand:SF 1 "vfp_compare_operand" "tG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_dup 0) + (match_dup 1))) + (set (reg:CCFPE CC_REGNUM) + (reg:CCFPE VFPCC_REGNUM))] + "" +) + +(define_insn_and_split "*cmpdf_split_vfp" + [(set (reg:CCFP CC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "w") + (match_operand:DF 1 "vfp_compare_operand" "wG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_dup 0) + (match_dup 1))) + (set (reg:CCFP CC_REGNUM) + (reg:CCFP VFPCC_REGNUM))] + "" +) + +(define_insn_and_split "*cmpdf_trap_split_vfp" + [(set (reg:CCFPE CC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w") + (match_operand:DF 1 "vfp_compare_operand" "wG")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "#" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_dup 0) + (match_dup 1))) + (set (reg:CCFPE CC_REGNUM) + (reg:CCFPE VFPCC_REGNUM))] + "" +) + + +;; Comparison patterns + +(define_insn "*cmpsf_vfp" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_operand:SF 0 "s_register_operand" "t,t") + (match_operand:SF 1 "vfp_compare_operand" "t,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fcmps%?\\t%0, %1 + fcmpzs%?\\t%0" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fcmps")] +) + +(define_insn "*cmpsf_trap_vfp" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_operand:SF 0 "s_register_operand" "t,t") + (match_operand:SF 1 "vfp_compare_operand" "t,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "@ + fcmpes%?\\t%0, %1 + fcmpezs%?\\t%0" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fcmps")] +) + +(define_insn "*cmpdf_vfp" + [(set (reg:CCFP VFPCC_REGNUM) + (compare:CCFP (match_operand:DF 0 "s_register_operand" "w,w") + (match_operand:DF 1 "vfp_compare_operand" "w,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcmpd%?\\t%P0, %P1 + fcmpzd%?\\t%P0" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fcmpd")] +) + +(define_insn "*cmpdf_trap_vfp" + [(set (reg:CCFPE VFPCC_REGNUM) + (compare:CCFPE (match_operand:DF 0 "s_register_operand" "w,w") + (match_operand:DF 1 "vfp_compare_operand" "w,G")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" + "@ + fcmped%?\\t%P0, %P1 + fcmpezd%?\\t%P0" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "fcmpd")] +) + +;; Fixed point to floating point conversions. +(define_code_iterator FCVT [unsigned_float float]) +(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) + +(define_insn "*combine_vcvt_f32_" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0")) + (match_operand 2 + "const_double_vcvt_power_of_two_reciprocal" "Dt")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math" + "vcvt%?.f32.\\t%0, %1, %v2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvti2f")] +) + +;; Not the ideal way of implementing this. Ideally we would be able to split +;; this into a move to a DP register and then a vcvt.f64.i32 +(define_insn "*combine_vcvt_f64_" + [(set (match_operand:DF 0 "s_register_operand" "=x,x,w") + (mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r")) + (match_operand 2 + "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math + && !TARGET_VFP_SINGLE" + "@ + vmov%?.f32\\t%0, %1\;vcvt%?.f64.\\t%P0, %P0, %v2 + vmov%?.f32\\t%0, %1\;vcvt%?.f64.\\t%P0, %P0, %v2 + vmov%?.f64\\t%P0, %1, %1\;vcvt%?.f64.\\t%P0, %P0, %v2" + [(set_attr "predicable" "yes") + (set_attr "ce_count" "2") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_cvti2f") + (set_attr "length" "8")] +) + +(define_insn "*combine_vcvtf2i" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (fix:SI (fix:SF (mult:SF (match_operand:SF 1 "s_register_operand" "t") + (match_operand 2 + "const_double_vcvt_power_of_two" "Dp")))))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math" + "vcvt%?.s32.f32\\t%1, %1, %v2\;vmov%?\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "ce_count" "2") + (set_attr "type" "f_cvtf2i") + (set_attr "length" "8")] + ) + +;; Store multiple insn used in function prologue. +(define_insn "*push_multi_vfp" + [(match_parallel 2 "multi_register_push" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")] + UNSPEC_PUSH_MULT))])] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" + "* return vfp_output_fstmd (operands);" + [(set_attr "type" "f_stored")] +) + +;; VRINT round to integral instructions. +;; Invoked for the patterns: btruncsf2, btruncdf2, ceilsf2, ceildf2, +;; roundsf2, rounddf2, floorsf2, floordf2, nearbyintsf2, nearbyintdf2, +;; rintsf2, rintdf2. +(define_insn "2" + [(set (match_operand:SDF 0 "register_operand" "=") + (unspec:SDF [(match_operand:SDF 1 + "register_operand" "")] + VRINT))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 " + "vrint%?.\\t%0, %1" + [(set_attr "predicable" "") + (set_attr "predicable_short_it" "no") + (set_attr "type" "f_rint") + (set_attr "conds" "")] +) + +;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. +;; The 'smax' and 'smin' RTL standard pattern names do not specify which +;; operand will be returned when both operands are zero (i.e. they may not +;; honour signed zeroes), or when either operand is NaN. Therefore GCC +;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring +;; NaNs. + +(define_insn "smax3" + [(set (match_operand:SDF 0 "register_operand" "=") + (smax:SDF (match_operand:SDF 1 "register_operand" "") + (match_operand:SDF 2 "register_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 " + "vmaxnm.\\t%0, %1, %2" + [(set_attr "type" "f_minmax") + (set_attr "conds" "unconditional")] +) + +(define_insn "smin3" + [(set (match_operand:SDF 0 "register_operand" "=") + (smin:SDF (match_operand:SDF 1 "register_operand" "") + (match_operand:SDF 2 "register_operand" "")))] + "TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 " + "vminnm.\\t%0, %1, %2" + [(set_attr "type" "f_minmax") + (set_attr "conds" "unconditional")] +) + +;; Unimplemented insns: +;; fldm* +;; fstm* +;; fmdhr et al (VFPv1) +;; Support for xD (single precision only) variants. +;; fmrrs, fmsrr diff --git a/gcc-4.9/gcc/config/arm/vfp11.md b/gcc-4.9/gcc/config/arm/vfp11.md new file mode 100644 index 000000000..2dbb20100 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/vfp11.md @@ -0,0 +1,93 @@ +;; ARM VFP11 pipeline description +;; Copyright (C) 2003-2014 Free Software Foundation, Inc. +;; Written by CodeSourcery. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "vfp11") + +;; There are 3 pipelines in the VFP11 unit. +;; +;; - A 8-stage FMAC pipeline (7 execute + writeback) with forward from +;; fourth stage for simple operations. +;; +;; - A 5-stage DS pipeline (4 execute + writeback) for divide/sqrt insns. +;; These insns also uses first execute stage of FMAC pipeline. +;; +;; - A 4-stage LS pipeline (execute + 2 memory + writeback) with forward from +;; second memory stage for loads. + +;; We do not model Write-After-Read hazards. +;; We do not do write scheduling with the arm core, so it is only necessary +;; to model the first stage of each pipeline +;; ??? Need to model LS pipeline properly for load/store multiple? +;; We do not model fmstat properly. This could be done by modeling pipelines +;; properly and defining an absence set between a dummy fmstat unit and all +;; other vfp units. + +(define_cpu_unit "fmac" "vfp11") + +(define_cpu_unit "ds" "vfp11") + +(define_cpu_unit "vfp_ls" "vfp11") + +(define_cpu_unit "fmstat" "vfp11") + +(exclusion_set "fmac,ds" "fmstat") + +(define_insn_reservation "vfp_ffarith" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd")) + "fmac") + +(define_insn_reservation "vfp_farith" 8 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,f_cvtf2i,f_cvti2f,\ + fmuls,fmacs,ffmas")) + "fmac") + +(define_insn_reservation "vfp_fmul" 9 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fmuld,fmacd,ffmad")) + "fmac*2") + +(define_insn_reservation "vfp_fdivs" 19 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fdivs, fsqrts")) + "ds*15") + +(define_insn_reservation "vfp_fdivd" 33 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "fdivd, fsqrtd")) + "fmac+ds*29") + +;; Moves to/from arm regs also use the load/store pipeline. +(define_insn_reservation "vfp_fload" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "f_loads,f_loadd,f_mcr,f_mcrr")) + "vfp_ls") + +(define_insn_reservation "vfp_fstore" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "f_stores,f_stored,f_mrc,f_mrrc")) + "vfp_ls") + +(define_insn_reservation "vfp_to_cpsr" 4 + (and (eq_attr "generic_vfp" "yes") + (eq_attr "type" "f_flag")) + "fmstat,vfp_ls*3") + diff --git a/gcc-4.9/gcc/config/arm/vxworks.h b/gcc-4.9/gcc/config/arm/vxworks.h new file mode 100644 index 000000000..8bef16bc4 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/vxworks.h @@ -0,0 +1,109 @@ +/* Definitions of target machine for GCC, + for ARM with targeting the VXWorks run time environment. + Copyright (C) 1999-2014 Free Software Foundation, Inc. + + Contributed by: Mike Stump + Brought up to date by CodeSourcery, LLC. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + if (TARGET_BIG_END) \ + builtin_define ("ARMEB"); \ + else \ + builtin_define ("ARMEL"); \ + \ + if (arm_arch_xscale) \ + builtin_define ("CPU=XSCALE"); \ + else if (arm_arch5) \ + builtin_define ("CPU=ARMARCH5"); \ + else if (arm_arch4) \ + { \ + if (thumb_code) \ + builtin_define ("CPU=ARMARCH4_T"); \ + else \ + builtin_define ("CPU=ARMARCH4"); \ + } \ + VXWORKS_OS_CPP_BUILTINS (); \ + } while (0) + +#undef SUBTARGET_OVERRIDE_OPTIONS +#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS + +/* Subsume the arm/elf.h definition, and add RTP hooks. */ +#undef SUBTARGET_CPP_SPEC +#define SUBTARGET_CPP_SPEC "-D__ELF__" VXWORKS_ADDITIONAL_CPP_SPEC + +#undef CC1_SPEC +#define CC1_SPEC \ +"%{tstrongarm:-mlittle-endian -mcpu=strongarm ; \ + t4: -mlittle-endian -march=armv4 ; \ + t4be: -mbig-endian -march=armv4 ; \ + t4t: -mthumb -mthumb-interwork -mlittle-endian -march=armv4t ; \ + t4tbe: -mthumb -mthumb-interwork -mbig-endian -march=armv4t ; \ + t5: -mlittle-endian -march=armv5 ; \ + t5be: -mbig-endian -march=armv5 ; \ + t5t: -mthumb -mthumb-interwork -mlittle-endian -march=armv5 ; \ + t5tbe: -mthumb -mthumb-interwork -mbig-endian -march=armv5 ; \ + txscale: -mlittle-endian -mcpu=xscale ; \ + txscalebe: -mbig-endian -mcpu=xscale ; \ + : -march=armv4}" + +/* Pass -EB for big-endian targets. */ +#define VXWORKS_ENDIAN_SPEC \ + "%{mbig-endian|t4be|t4tbe|t5be|t5tbe|txscalebe:-EB}" + +#undef SUBTARGET_EXTRA_ASM_SPEC +#define SUBTARGET_EXTRA_ASM_SPEC VXWORKS_ENDIAN_SPEC + +#undef LINK_SPEC +#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_ENDIAN_SPEC + +#undef LIB_SPEC +#define LIB_SPEC VXWORKS_LIB_SPEC + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC + +/* There is no default multilib. */ +#undef MULTILIB_DEFAULTS + +#define FPUTYPE_DEFAULT "vfp" + +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER + +/* We want to be compatible with a version of "2.96" at one point in + the past before this macro was changed. */ +#undef DEFAULT_STRUCTURE_SIZE_BOUNDARY +#define DEFAULT_STRUCTURE_SIZE_BOUNDARY 8 + +/* The kernel loader does not allow relocations to overflow, so we + cannot allow arbitrary relocation addends in kernel modules or RTP + executables. Also, the dynamic loader uses the resolved relocation + value to distinguish references to the text and data segments, so we + cannot allow arbitrary offsets for shared libraries either. */ +#undef ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P +#define ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P 1 + +#undef TARGET_DEFAULT_WORD_RELOCATIONS +#define TARGET_DEFAULT_WORD_RELOCATIONS 1 diff --git a/gcc-4.9/gcc/config/arm/vxworks.opt b/gcc-4.9/gcc/config/arm/vxworks.opt new file mode 100644 index 000000000..ae83422f8 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/vxworks.opt @@ -0,0 +1,59 @@ +; ARM VxWorks options. + +; Copyright (C) 2011-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; See the GCC internals manual (options.texi) for a description of +; this file's format. + +; Please try to keep this file in ASCII collating order. + +t4 +Driver + +t4be +Driver + +t4t +Driver + +t4tbe +Driver + +t5 +Driver + +t5be +Driver + +t5t +Driver + +t5tbe +Driver + +tstrongarm +Driver + +txscale +Driver + +txscalebe +Driver + +; This comment is to ensure we retain the blank line above. diff --git a/gcc-4.9/gcc/config/arm/x-arm b/gcc-4.9/gcc/config/arm/x-arm new file mode 100644 index 000000000..51cff1ed4 --- /dev/null +++ b/gcc-4.9/gcc/config/arm/x-arm @@ -0,0 +1,3 @@ +driver-arm.o: $(srcdir)/config/arm/driver-arm.c \ + $(CONFIG_H) $(SYSTEM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< diff --git a/gcc-4.9/gcc/config/avr/avr-arch.h b/gcc-4.9/gcc/config/avr/avr-arch.h new file mode 100644 index 000000000..6357e997c --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-arch.h @@ -0,0 +1,156 @@ +/* Definitions of types that are used to store AVR architecture and + device information. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by Georg-Johann Lay (avr@gjlay.de) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* This enum supplies indices into the avr_arch_types[] table below. */ + +enum avr_arch +{ + ARCH_UNKNOWN, + ARCH_AVR1, + ARCH_AVR2, + ARCH_AVR25, + ARCH_AVR3, + ARCH_AVR31, + ARCH_AVR35, + ARCH_AVR4, + ARCH_AVR5, + ARCH_AVR51, + ARCH_AVR6, + ARCH_AVRXMEGA2, + ARCH_AVRXMEGA4, + ARCH_AVRXMEGA5, + ARCH_AVRXMEGA6, + ARCH_AVRXMEGA7 +}; + + +/* Architecture-specific properties. */ + +typedef struct +{ + /* Assembler only. */ + int asm_only; + + /* Core have 'MUL*' instructions. */ + int have_mul; + + /* Core have 'CALL' and 'JMP' instructions. */ + int have_jmp_call; + + /* Core have 'MOVW' and 'LPM Rx,Z' instructions. */ + int have_movw_lpmx; + + /* Core have 'ELPM' instructions. */ + int have_elpm; + + /* Core have 'ELPM Rx,Z' instructions. */ + int have_elpmx; + + /* Core have 'EICALL' and 'EIJMP' instructions. */ + int have_eijmp_eicall; + + /* This is an XMEGA core. */ + int xmega_p; + + /* This core has the RAMPD special function register + and thus also the RAMPX, RAMPY and RAMPZ registers. */ + int have_rampd; + + /* Default start of data section address for architecture. */ + int default_data_section_start; + + /* Offset between SFR address and RAM address: + SFR-address = RAM-address - sfr_offset */ + int sfr_offset; + + /* Architecture id to built-in define __AVR_ARCH__ (NULL -> no macro) */ + const char *const macro; + + /* Architecture name. */ + const char *const arch_name; +} avr_arch_t; + + +/* Device-specific properties. */ + +typedef struct +{ + /* Device name. */ + const char *const name; + + /* Index in avr_arch_types[]. */ + enum avr_arch arch; + + /* Must lie outside user's namespace. NULL == no macro. */ + const char *const macro; + + /* Stack pointer have 8 bits width. */ + int short_sp; + + /* Some AVR devices have a core erratum when skipping a 2-word instruction. + Skip instructions are: SBRC, SBRS, SBIC, SBIS, CPSE. + Problems will occur with return address is IRQ executes during the + skip sequence. + + A support ticket from Atmel returned the following information: + + Subject: (ATTicket:644469) On AVR skip-bug core Erratum + From: avr@atmel.com Date: 2011-07-27 + (Please keep the subject when replying to this mail) + + This errata exists only in AT90S8515 and ATmega103 devices. + + For information please refer the following respective errata links + http://www.atmel.com/dyn/resources/prod_documents/doc2494.pdf + http://www.atmel.com/dyn/resources/prod_documents/doc1436.pdf */ + + /* Core Erratum: Must not skip 2-word instruction. */ + int errata_skip; + + /* Start of data section. */ + int data_section_start; + + /* Number of 64k segments in the flash. */ + int n_flash; + + /* Name of device library. */ + const char *const library_name; +} avr_mcu_t; + +/* Map architecture to its texinfo string. */ + +typedef struct +{ + /* Architecture ID. */ + enum avr_arch arch; + + /* textinfo source to describe the archtiecture. */ + const char *texinfo; +} avr_arch_info_t; + +/* Preprocessor macros to define depending on MCU type. */ + +extern const avr_arch_t avr_arch_types[]; +extern const avr_arch_t *avr_current_arch; + +extern const avr_mcu_t avr_mcu_types[]; +extern const avr_mcu_t *avr_current_device; diff --git a/gcc-4.9/gcc/config/avr/avr-c.c b/gcc-4.9/gcc/config/avr/avr-c.c new file mode 100644 index 000000000..101d28092 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-c.c @@ -0,0 +1,402 @@ +/* Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by Anatoly Sokolov (aesok@post.ru) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Not included in avr.c since this requires C front end. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "cpplib.h" +#include "tree.h" +#include "stor-layout.h" +#include "target.h" +#include "c-family/c-common.h" +#include "langhooks.h" + + +/* IDs for all the AVR builtins. */ + +enum avr_builtin_id + { +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME) \ + AVR_BUILTIN_ ## NAME, +#include "builtins.def" +#undef DEF_BUILTIN + + AVR_BUILTIN_COUNT + }; + + +/* Implement `TARGET_RESOLVE_OVERLOADED_PLUGIN'. */ + +static tree +avr_resolve_overloaded_builtin (unsigned int iloc, tree fndecl, void *vargs) +{ + tree type0, type1, fold = NULL_TREE; + enum avr_builtin_id id = AVR_BUILTIN_COUNT; + location_t loc = (location_t) iloc; + vec &args = * (vec*) vargs; + + switch (DECL_FUNCTION_CODE (fndecl)) + { + default: + break; + + case AVR_BUILTIN_ABSFX: + if (args.length() != 1) + { + error_at (loc, "%qs expects 1 argument but %d given", + "absfx", (int) args.length()); + + fold = error_mark_node; + break; + } + + type0 = TREE_TYPE (args[0]); + + if (!FIXED_POINT_TYPE_P (type0)) + { + error_at (loc, "%qs expects a fixed-point value as argument", + "absfx"); + + fold = error_mark_node; + } + + switch (TYPE_MODE (type0)) + { + case QQmode: id = AVR_BUILTIN_ABSHR; break; + case HQmode: id = AVR_BUILTIN_ABSR; break; + case SQmode: id = AVR_BUILTIN_ABSLR; break; + case DQmode: id = AVR_BUILTIN_ABSLLR; break; + + case HAmode: id = AVR_BUILTIN_ABSHK; break; + case SAmode: id = AVR_BUILTIN_ABSK; break; + case DAmode: id = AVR_BUILTIN_ABSLK; break; + case TAmode: id = AVR_BUILTIN_ABSLLK; break; + + case UQQmode: + case UHQmode: + case USQmode: + case UDQmode: + case UHAmode: + case USAmode: + case UDAmode: + case UTAmode: + warning_at (loc, 0, "using %qs with unsigned type has no effect", + "absfx"); + return args[0]; + + default: + error_at (loc, "no matching fixed-point overload found for %qs", + "absfx"); + + fold = error_mark_node; + break; + } + + fold = targetm.builtin_decl (id, true); + + if (fold != error_mark_node) + fold = build_function_call_vec (loc, vNULL, fold, &args, NULL); + + break; // absfx + + case AVR_BUILTIN_ROUNDFX: + if (args.length() != 2) + { + error_at (loc, "%qs expects 2 arguments but %d given", + "roundfx", (int) args.length()); + + fold = error_mark_node; + break; + } + + type0 = TREE_TYPE (args[0]); + type1 = TREE_TYPE (args[1]); + + if (!FIXED_POINT_TYPE_P (type0)) + { + error_at (loc, "%qs expects a fixed-point value as first argument", + "roundfx"); + + fold = error_mark_node; + } + + if (!INTEGRAL_TYPE_P (type1)) + { + error_at (loc, "%qs expects an integer value as second argument", + "roundfx"); + + fold = error_mark_node; + } + + switch (TYPE_MODE (type0)) + { + case QQmode: id = AVR_BUILTIN_ROUNDHR; break; + case HQmode: id = AVR_BUILTIN_ROUNDR; break; + case SQmode: id = AVR_BUILTIN_ROUNDLR; break; + case DQmode: id = AVR_BUILTIN_ROUNDLLR; break; + + case UQQmode: id = AVR_BUILTIN_ROUNDUHR; break; + case UHQmode: id = AVR_BUILTIN_ROUNDUR; break; + case USQmode: id = AVR_BUILTIN_ROUNDULR; break; + case UDQmode: id = AVR_BUILTIN_ROUNDULLR; break; + + case HAmode: id = AVR_BUILTIN_ROUNDHK; break; + case SAmode: id = AVR_BUILTIN_ROUNDK; break; + case DAmode: id = AVR_BUILTIN_ROUNDLK; break; + case TAmode: id = AVR_BUILTIN_ROUNDLLK; break; + + case UHAmode: id = AVR_BUILTIN_ROUNDUHK; break; + case USAmode: id = AVR_BUILTIN_ROUNDUK; break; + case UDAmode: id = AVR_BUILTIN_ROUNDULK; break; + case UTAmode: id = AVR_BUILTIN_ROUNDULLK; break; + + default: + error_at (loc, "no matching fixed-point overload found for %qs", + "roundfx"); + + fold = error_mark_node; + break; + } + + fold = targetm.builtin_decl (id, true); + + if (fold != error_mark_node) + fold = build_function_call_vec (loc, vNULL, fold, &args, NULL); + + break; // roundfx + + case AVR_BUILTIN_COUNTLSFX: + if (args.length() != 1) + { + error_at (loc, "%qs expects 1 argument but %d given", + "countlsfx", (int) args.length()); + + fold = error_mark_node; + break; + } + + type0 = TREE_TYPE (args[0]); + + if (!FIXED_POINT_TYPE_P (type0)) + { + error_at (loc, "%qs expects a fixed-point value as first argument", + "countlsfx"); + + fold = error_mark_node; + } + + switch (TYPE_MODE (type0)) + { + case QQmode: id = AVR_BUILTIN_COUNTLSHR; break; + case HQmode: id = AVR_BUILTIN_COUNTLSR; break; + case SQmode: id = AVR_BUILTIN_COUNTLSLR; break; + case DQmode: id = AVR_BUILTIN_COUNTLSLLR; break; + + case UQQmode: id = AVR_BUILTIN_COUNTLSUHR; break; + case UHQmode: id = AVR_BUILTIN_COUNTLSUR; break; + case USQmode: id = AVR_BUILTIN_COUNTLSULR; break; + case UDQmode: id = AVR_BUILTIN_COUNTLSULLR; break; + + case HAmode: id = AVR_BUILTIN_COUNTLSHK; break; + case SAmode: id = AVR_BUILTIN_COUNTLSK; break; + case DAmode: id = AVR_BUILTIN_COUNTLSLK; break; + case TAmode: id = AVR_BUILTIN_COUNTLSLLK; break; + + case UHAmode: id = AVR_BUILTIN_COUNTLSUHK; break; + case USAmode: id = AVR_BUILTIN_COUNTLSUK; break; + case UDAmode: id = AVR_BUILTIN_COUNTLSULK; break; + case UTAmode: id = AVR_BUILTIN_COUNTLSULLK; break; + + default: + error_at (loc, "no matching fixed-point overload found for %qs", + "countlsfx"); + + fold = error_mark_node; + break; + } + + fold = targetm.builtin_decl (id, true); + + if (fold != error_mark_node) + fold = build_function_call_vec (loc, vNULL, fold, &args, NULL); + + break; // countlsfx + } + + return fold; +} + + +/* Implement `REGISTER_TARGET_PRAGMAS'. */ + +void +avr_register_target_pragmas (void) +{ + int i; + + gcc_assert (ADDR_SPACE_GENERIC == ADDR_SPACE_RAM); + + /* Register address spaces. The order must be the same as in the respective + enum from avr.h (or designated initializers must be used in avr.c). */ + + for (i = 0; i < ADDR_SPACE_COUNT; i++) + { + gcc_assert (i == avr_addrspace[i].id); + + if (!ADDR_SPACE_GENERIC_P (i)) + c_register_addr_space (avr_addrspace[i].name, avr_addrspace[i].id); + } + + targetm.resolve_overloaded_builtin = avr_resolve_overloaded_builtin; +} + + +/* Transform LO into uppercase and write the result to UP. + You must provide enough space for UP. Return UP. */ + +static char* +avr_toupper (char *up, const char *lo) +{ + char *up0 = up; + + for (; *lo; lo++, up++) + *up = TOUPPER (*lo); + + *up = '\0'; + + return up0; +} + +/* Worker function for TARGET_CPU_CPP_BUILTINS. */ + +void +avr_cpu_cpp_builtins (struct cpp_reader *pfile) +{ + int i; + + builtin_define_std ("AVR"); + + if (avr_current_arch->macro) + cpp_define_formatted (pfile, "__AVR_ARCH__=%s", avr_current_arch->macro); + if (avr_current_device->macro) + cpp_define (pfile, avr_current_device->macro); + if (AVR_HAVE_RAMPD) cpp_define (pfile, "__AVR_HAVE_RAMPD__"); + if (AVR_HAVE_RAMPX) cpp_define (pfile, "__AVR_HAVE_RAMPX__"); + if (AVR_HAVE_RAMPY) cpp_define (pfile, "__AVR_HAVE_RAMPY__"); + if (AVR_HAVE_RAMPZ) cpp_define (pfile, "__AVR_HAVE_RAMPZ__"); + if (AVR_HAVE_ELPM) cpp_define (pfile, "__AVR_HAVE_ELPM__"); + if (AVR_HAVE_ELPMX) cpp_define (pfile, "__AVR_HAVE_ELPMX__"); + if (AVR_HAVE_MOVW) cpp_define (pfile, "__AVR_HAVE_MOVW__"); + if (AVR_HAVE_LPMX) cpp_define (pfile, "__AVR_HAVE_LPMX__"); + + if (avr_current_arch->asm_only) + cpp_define (pfile, "__AVR_ASM_ONLY__"); + if (AVR_HAVE_MUL) + { + cpp_define (pfile, "__AVR_ENHANCED__"); + cpp_define (pfile, "__AVR_HAVE_MUL__"); + } + if (avr_current_arch->have_jmp_call) + { + cpp_define (pfile, "__AVR_MEGA__"); + cpp_define (pfile, "__AVR_HAVE_JMP_CALL__"); + } + if (AVR_XMEGA) + cpp_define (pfile, "__AVR_XMEGA__"); + if (avr_current_arch->have_eijmp_eicall) + { + cpp_define (pfile, "__AVR_HAVE_EIJMP_EICALL__"); + cpp_define (pfile, "__AVR_3_BYTE_PC__"); + } + else + { + cpp_define (pfile, "__AVR_2_BYTE_PC__"); + } + + if (AVR_HAVE_8BIT_SP) + cpp_define (pfile, "__AVR_HAVE_8BIT_SP__"); + else + cpp_define (pfile, "__AVR_HAVE_16BIT_SP__"); + + if (avr_sp8) + cpp_define (pfile, "__AVR_SP8__"); + + if (AVR_HAVE_SPH) + cpp_define (pfile, "__AVR_HAVE_SPH__"); + + if (TARGET_NO_INTERRUPTS) + cpp_define (pfile, "__NO_INTERRUPTS__"); + + if (avr_current_device->errata_skip) + { + cpp_define (pfile, "__AVR_ERRATA_SKIP__"); + + if (avr_current_arch->have_jmp_call) + cpp_define (pfile, "__AVR_ERRATA_SKIP_JMP_CALL__"); + } + + cpp_define_formatted (pfile, "__AVR_SFR_OFFSET__=0x%x", + avr_current_arch->sfr_offset); + +#ifdef WITH_AVRLIBC + cpp_define (pfile, "__WITH_AVRLIBC__"); +#endif /* WITH_AVRLIBC */ + + /* Define builtin macros so that the user can easily query whether + non-generic address spaces (and which) are supported or not. + This is only supported for C. For C++, a language extension is needed + (as mentioned in ISO/IEC DTR 18037; Annex F.2) which is not + implemented in GCC up to now. */ + + if (!strcmp (lang_hooks.name, "GNU C")) + { + for (i = 0; i < ADDR_SPACE_COUNT; i++) + if (!ADDR_SPACE_GENERIC_P (i) + /* Only supply __FLASH macro if the address space is reasonable + for this target. The address space qualifier itself is still + supported, but using it will throw an error. */ + && avr_addrspace[i].segment < avr_current_device->n_flash) + { + const char *name = avr_addrspace[i].name; + char *Name = (char*) alloca (1 + strlen (name)); + + cpp_define (pfile, avr_toupper (Name, name)); + } + } + + /* Define builtin macros so that the user can easily query whether or + not a specific builtin is available. */ + +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME) \ + cpp_define (pfile, "__BUILTIN_AVR_" #NAME); +#include "builtins.def" +#undef DEF_BUILTIN + + /* Builtin macros for the __int24 and __uint24 type. */ + + cpp_define_formatted (pfile, "__INT24_MAX__=8388607%s", + INT_TYPE_SIZE == 8 ? "LL" : "L"); + cpp_define (pfile, "__INT24_MIN__=(-__INT24_MAX__-1)"); + cpp_define_formatted (pfile, "__UINT24_MAX__=16777215%s", + INT_TYPE_SIZE == 8 ? "ULL" : "UL"); +} diff --git a/gcc-4.9/gcc/config/avr/avr-devices.c b/gcc-4.9/gcc/config/avr/avr-devices.c new file mode 100644 index 000000000..177f1961f --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-devices.c @@ -0,0 +1,114 @@ +/* Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by Anatoly Sokolov (aesok@post.ru) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef IN_GEN_AVR_MMCU_TEXI +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#endif /* IN_GEN_AVR_MMCU_TEXI */ + +/* List of all known AVR MCU architectures. + Order as of enum avr_arch from avr.h. */ + +const avr_arch_t +avr_arch_types[] = +{ + /* unknown device specified */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, 32, NULL, "avr2" }, + /* + A M J LM E E E X R d S S O A + S U M PO L L I M A a t F ff r + M L P MV P P J E M t a R s c + XW M M M G P a r e h + X P A D t t ID */ + { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, 32, "1", "avr1" }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0060, 32, "2", "avr2" }, + { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0x0060, 32, "25", "avr25" }, + { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0x0060, 32, "3", "avr3" }, + { 0, 0, 1, 0, 1, 0, 0, 0, 0, 0x0060, 32, "31", "avr31" }, + { 0, 0, 1, 1, 0, 0, 0, 0, 0, 0x0060, 32, "35", "avr35" }, + { 0, 1, 0, 1, 0, 0, 0, 0, 0, 0x0060, 32, "4", "avr4" }, + { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0x0060, 32, "5", "avr5" }, + { 0, 1, 1, 1, 1, 1, 0, 0, 0, 0x0060, 32, "51", "avr51" }, + { 0, 1, 1, 1, 1, 1, 1, 0, 0, 0x0060, 32, "6", "avr6" }, + + { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0x2000, 0, "102", "avrxmega2" }, + { 0, 1, 1, 1, 1, 1, 0, 1, 0, 0x2000, 0, "104", "avrxmega4" }, + { 0, 1, 1, 1, 1, 1, 0, 1, 1, 0x2000, 0, "105", "avrxmega5" }, + { 0, 1, 1, 1, 1, 1, 1, 1, 0, 0x2000, 0, "106", "avrxmega6" }, + { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0x2000, 0, "107", "avrxmega7" } +}; + +const avr_arch_info_t +avr_texinfo[] = +{ + { ARCH_AVR1, + "This ISA is implemented by the minimal AVR core and supported " + "for assembler only." }, + { ARCH_AVR2, + "``Classic'' devices with up to 8@tie{}KiB of program memory." }, + { ARCH_AVR25, + "``Classic'' devices with up to 8@tie{}KiB of program memory and with " + "the @code{MOVW} instruction." }, + { ARCH_AVR3, + "``Classic'' devices with 16@tie{}KiB up to 64@tie{}KiB of " + " program memory." }, + { ARCH_AVR31, + "``Classic'' devices with 128@tie{}KiB of program memory." }, + { ARCH_AVR35, + "``Classic'' devices with 16@tie{}KiB up to 64@tie{}KiB of " + "program memory and with the @code{MOVW} instruction." }, + { ARCH_AVR4, + "``Enhanced'' devices with up to 8@tie{}KiB of program memory." }, + { ARCH_AVR5, + "``Enhanced'' devices with 16@tie{}KiB up to 64@tie{}KiB of " + "program memory." }, + { ARCH_AVR51, + "``Enhanced'' devices with 128@tie{}KiB of program memory." }, + { ARCH_AVR6, + "``Enhanced'' devices with 3-byte PC, i.e.@: with more than 128@tie{}KiB " + "of program memory." }, + { ARCH_AVRXMEGA2, + "``XMEGA'' devices with more than 8@tie{}KiB and up to 64@tie{}KiB " + "of program memory." }, + { ARCH_AVRXMEGA4, + "``XMEGA'' devices with more than 64@tie{}KiB and up to 128@tie{}KiB " + "of program memory." }, + { ARCH_AVRXMEGA5, + "``XMEGA'' devices with more than 64@tie{}KiB and up to 128@tie{}KiB " + "of program memory and more than 64@tie{}KiB of RAM." }, + { ARCH_AVRXMEGA6, + "``XMEGA'' devices with more than 128@tie{}KiB of program memory." }, + { ARCH_AVRXMEGA7, + "``XMEGA'' devices with more than 128@tie{}KiB of program memory " + "and more than 64@tie{}KiB of RAM." } +}; + +const avr_mcu_t +avr_mcu_types[] = +{ +#define AVR_MCU(NAME, ARCH, MACRO, SP8, ERR_SKIP, DATA_SEC, N_FLASH, LIBNAME)\ + { NAME, ARCH, MACRO, SP8, ERR_SKIP, DATA_SEC, N_FLASH, LIBNAME }, +#include "avr-mcus.def" +#undef AVR_MCU + /* End of list. */ + { NULL, ARCH_UNKNOWN, NULL, 0, 0, 0, 0, NULL } +}; + diff --git a/gcc-4.9/gcc/config/avr/avr-dimode.md b/gcc-4.9/gcc/config/avr/avr-dimode.md new file mode 100644 index 000000000..639810518 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-dimode.md @@ -0,0 +1,479 @@ +;; Machine description for GNU compiler, +;; for Atmel AVR micro controllers. +;; Copyright (C) 1998-2014 Free Software Foundation, Inc. +;; Contributed by Georg Lay (avr@gjlay.de) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The purpose of this file is to provide a light-weight DImode +;; implementation for AVR. The trouble with DImode is that tree -> RTL +;; lowering leads to really unpleasant code for operations that don't +;; work byte-wise like NEG, PLUS, MINUS, etc. Defining optabs entries for +;; them won't help because the optab machinery assumes these operations +;; are cheap and does not check if a libgcc implementation is available. +;; +;; The DImode insns are all straight forward -- except movdi. The approach +;; of this implementation is to provide DImode insns without the burden of +;; introducing movdi. +;; +;; The caveat is that if there are insns for some mode, there must also be a +;; respective move insn that describes reloads. Therefore, this +;; implementation uses an accumulator-based model with two hard-coded, +;; accumulator-like registers +;; +;; A[] = reg:DI 18 +;; B[] = reg:DI 10 +;; +;; so that no DImode insn contains pseudos or needs reloading. + +(define_constants + [(ACC_A 18) + (ACC_B 10)]) + +;; Supported modes that are 8 bytes wide +(define_mode_iterator ALL8 [DI DQ UDQ DA UDA TA UTA]) + +(define_mode_iterator ALL8U [UDQ UDA UTA]) +(define_mode_iterator ALL8S [ DQ DA TA]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Addition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; "adddi3" +;; "adddq3" "addudq3" +;; "addda3" "adduda3" +;; "addta3" "adduta3" +(define_expand "add3" + [(parallel [(match_operand:ALL8 0 "general_operand" "") + (match_operand:ALL8 1 "general_operand" "") + (match_operand:ALL8 2 "general_operand" "")])] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (mode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + + if (DImode == mode + && s8_operand (operands[2], VOIDmode)) + { + emit_move_insn (gen_rtx_REG (QImode, REG_X), operands[2]); + emit_insn (gen_adddi3_const8_insn ()); + } + else if (const_operand (operands[2], GET_MODE (operands[2]))) + { + emit_insn (gen_add3_const_insn (operands[2])); + } + else + { + emit_move_insn (gen_rtx_REG (mode, ACC_B), operands[2]); + emit_insn (gen_add3_insn ()); + } + + emit_move_insn (operands[0], acc_a); + DONE; + }) + +;; "adddi3_insn" +;; "adddq3_insn" "addudq3_insn" +;; "addda3_insn" "adduda3_insn" +;; "addta3_insn" "adduta3_insn" +(define_insn "add3_insn" + [(set (reg:ALL8 ACC_A) + (plus:ALL8 (reg:ALL8 ACC_A) + (reg:ALL8 ACC_B)))] + "avr_have_dimode" + "%~call __adddi3" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) + +(define_insn "adddi3_const8_insn" + [(set (reg:DI ACC_A) + (plus:DI (reg:DI ACC_A) + (sign_extend:DI (reg:QI REG_X))))] + "avr_have_dimode" + "%~call __adddi3_s8" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) + +;; "adddi3_const_insn" +;; "adddq3_const_insn" "addudq3_const_insn" +;; "addda3_const_insn" "adduda3_const_insn" +;; "addta3_const_insn" "adduta3_const_insn" +(define_insn "add3_const_insn" + [(set (reg:ALL8 ACC_A) + (plus:ALL8 (reg:ALL8 ACC_A) + (match_operand:ALL8 0 "const_operand" "n Ynn")))] + "avr_have_dimode + && !s8_operand (operands[0], VOIDmode)" + { + return avr_out_plus (insn, operands); + } + [(set_attr "adjust_len" "plus") + (set_attr "cc" "clobber")]) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Subtraction +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; "subdi3" +;; "subdq3" "subudq3" +;; "subda3" "subuda3" +;; "subta3" "subuta3" +(define_expand "sub3" + [(parallel [(match_operand:ALL8 0 "general_operand" "") + (match_operand:ALL8 1 "general_operand" "") + (match_operand:ALL8 2 "general_operand" "")])] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (mode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + + if (const_operand (operands[2], GET_MODE (operands[2]))) + { + emit_insn (gen_sub3_const_insn (operands[2])); + } + else + { + emit_move_insn (gen_rtx_REG (mode, ACC_B), operands[2]); + emit_insn (gen_sub3_insn ()); + } + + emit_move_insn (operands[0], acc_a); + DONE; + }) + +;; "subdi3_insn" +;; "subdq3_insn" "subudq3_insn" +;; "subda3_insn" "subuda3_insn" +;; "subta3_insn" "subuta3_insn" +(define_insn "sub3_insn" + [(set (reg:ALL8 ACC_A) + (minus:ALL8 (reg:ALL8 ACC_A) + (reg:ALL8 ACC_B)))] + "avr_have_dimode" + "%~call __subdi3" + [(set_attr "adjust_len" "call") + (set_attr "cc" "set_czn")]) + +;; "subdi3_const_insn" +;; "subdq3_const_insn" "subudq3_const_insn" +;; "subda3_const_insn" "subuda3_const_insn" +;; "subta3_const_insn" "subuta3_const_insn" +(define_insn "sub3_const_insn" + [(set (reg:ALL8 ACC_A) + (minus:ALL8 (reg:ALL8 ACC_A) + (match_operand:ALL8 0 "const_operand" "n Ynn")))] + "avr_have_dimode" + { + return avr_out_plus (insn, operands); + } + [(set_attr "adjust_len" "plus") + (set_attr "cc" "clobber")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Signed Saturating Addition and Subtraction +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "3" + [(set (match_operand:ALL8S 0 "general_operand" "") + (ss_addsub:ALL8S (match_operand:ALL8S 1 "general_operand" "") + (match_operand:ALL8S 2 "general_operand" "")))] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (mode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + + if (const_operand (operands[2], GET_MODE (operands[2]))) + { + emit_insn (gen_3_const_insn (operands[2])); + } + else + { + emit_move_insn (gen_rtx_REG (mode, ACC_B), operands[2]); + emit_insn (gen_3_insn ()); + } + + emit_move_insn (operands[0], acc_a); + DONE; + }) + +(define_insn "3_insn" + [(set (reg:ALL8S ACC_A) + (ss_addsub:ALL8S (reg:ALL8S ACC_A) + (reg:ALL8S ACC_B)))] + "avr_have_dimode" + "%~call __3" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) + +(define_insn "3_const_insn" + [(set (reg:ALL8S ACC_A) + (ss_addsub:ALL8S (reg:ALL8S ACC_A) + (match_operand:ALL8S 0 "const_operand" "n Ynn")))] + "avr_have_dimode" + { + return avr_out_plus (insn, operands); + } + [(set_attr "adjust_len" "plus") + (set_attr "cc" "clobber")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Unsigned Saturating Addition and Subtraction +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "3" + [(set (match_operand:ALL8U 0 "general_operand" "") + (us_addsub:ALL8U (match_operand:ALL8U 1 "general_operand" "") + (match_operand:ALL8U 2 "general_operand" "")))] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (mode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + + if (const_operand (operands[2], GET_MODE (operands[2]))) + { + emit_insn (gen_3_const_insn (operands[2])); + } + else + { + emit_move_insn (gen_rtx_REG (mode, ACC_B), operands[2]); + emit_insn (gen_3_insn ()); + } + + emit_move_insn (operands[0], acc_a); + DONE; + }) + +(define_insn "3_insn" + [(set (reg:ALL8U ACC_A) + (us_addsub:ALL8U (reg:ALL8U ACC_A) + (reg:ALL8U ACC_B)))] + "avr_have_dimode" + "%~call __3" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) + +(define_insn "3_const_insn" + [(set (reg:ALL8U ACC_A) + (us_addsub:ALL8U (reg:ALL8U ACC_A) + (match_operand:ALL8U 0 "const_operand" "n Ynn")))] + "avr_have_dimode" + { + return avr_out_plus (insn, operands); + } + [(set_attr "adjust_len" "plus") + (set_attr "cc" "clobber")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Negation +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "negdi2" + [(parallel [(match_operand:DI 0 "general_operand" "") + (match_operand:DI 1 "general_operand" "")])] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (DImode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + emit_insn (gen_negdi2_insn ()); + emit_move_insn (operands[0], acc_a); + DONE; + }) + +(define_insn "negdi2_insn" + [(set (reg:DI ACC_A) + (neg:DI (reg:DI ACC_A)))] + "avr_have_dimode" + "%~call __negdi2" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Comparison +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "conditional_jump" + [(set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 1 "" "")) + (pc)))] + "avr_have_dimode") + +;; "cbranchdi4" +;; "cbranchdq4" "cbranchudq4" +;; "cbranchda4" "cbranchuda4" +;; "cbranchta4" "cbranchuta4" +(define_expand "cbranch4" + [(parallel [(match_operand:ALL8 1 "register_operand" "") + (match_operand:ALL8 2 "nonmemory_operand" "") + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 3 "" ""))])] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (mode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + + if (s8_operand (operands[2], VOIDmode)) + { + emit_move_insn (gen_rtx_REG (QImode, REG_X), operands[2]); + emit_insn (gen_compare_const8_di2 ()); + } + else if (const_operand (operands[2], GET_MODE (operands[2]))) + { + emit_insn (gen_compare_const_2 (operands[2])); + } + else + { + emit_move_insn (gen_rtx_REG (mode, ACC_B), operands[2]); + emit_insn (gen_compare_2 ()); + } + + emit_jump_insn (gen_conditional_jump (operands[0], operands[3])); + DONE; + }) + +;; "compare_di2" +;; "compare_dq2" "compare_udq2" +;; "compare_da2" "compare_uda2" +;; "compare_ta2" "compare_uta2" +(define_insn "compare_2" + [(set (cc0) + (compare (reg:ALL8 ACC_A) + (reg:ALL8 ACC_B)))] + "avr_have_dimode" + "%~call __cmpdi2" + [(set_attr "adjust_len" "call") + (set_attr "cc" "compare")]) + +(define_insn "compare_const8_di2" + [(set (cc0) + (compare (reg:DI ACC_A) + (sign_extend:DI (reg:QI REG_X))))] + "avr_have_dimode" + "%~call __cmpdi2_s8" + [(set_attr "adjust_len" "call") + (set_attr "cc" "compare")]) + +;; "compare_const_di2" +;; "compare_const_dq2" "compare_const_udq2" +;; "compare_const_da2" "compare_const_uda2" +;; "compare_const_ta2" "compare_const_uta2" +(define_insn "compare_const_2" + [(set (cc0) + (compare (reg:ALL8 ACC_A) + (match_operand:ALL8 0 "const_operand" "n Ynn"))) + (clobber (match_scratch:QI 1 "=&d"))] + "avr_have_dimode + && !s8_operand (operands[0], VOIDmode)" + { + return avr_out_compare64 (insn, operands, NULL); + } + [(set_attr "adjust_len" "compare64") + (set_attr "cc" "compare")]) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Shifts and Rotate +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_code_iterator di_shifts + [ashift ashiftrt lshiftrt rotate]) + +;; Shift functions from libgcc are called without defining these insns, +;; but with them we can describe their reduced register footprint. + +;; "ashldi3" "ashrdi3" "lshrdi3" "rotldi3" +;; "ashldq3" "ashrdq3" "lshrdq3" "rotldq3" +;; "ashlda3" "ashrda3" "lshrda3" "rotlda3" +;; "ashlta3" "ashrta3" "lshrta3" "rotlta3" +;; "ashludq3" "ashrudq3" "lshrudq3" "rotludq3" +;; "ashluda3" "ashruda3" "lshruda3" "rotluda3" +;; "ashluta3" "ashruta3" "lshruta3" "rotluta3" +(define_expand "3" + [(parallel [(match_operand:ALL8 0 "general_operand" "") + (di_shifts:ALL8 (match_operand:ALL8 1 "general_operand" "") + (match_operand:QI 2 "general_operand" ""))])] + "avr_have_dimode" + { + rtx acc_a = gen_rtx_REG (mode, ACC_A); + + emit_move_insn (acc_a, operands[1]); + emit_move_insn (gen_rtx_REG (QImode, 16), operands[2]); + emit_insn (gen_3_insn ()); + emit_move_insn (operands[0], acc_a); + DONE; + }) + +;; "ashldi3_insn" "ashrdi3_insn" "lshrdi3_insn" "rotldi3_insn" +;; "ashldq3_insn" "ashrdq3_insn" "lshrdq3_insn" "rotldq3_insn" +;; "ashlda3_insn" "ashrda3_insn" "lshrda3_insn" "rotlda3_insn" +;; "ashlta3_insn" "ashrta3_insn" "lshrta3_insn" "rotlta3_insn" +;; "ashludq3_insn" "ashrudq3_insn" "lshrudq3_insn" "rotludq3_insn" +;; "ashluda3_insn" "ashruda3_insn" "lshruda3_insn" "rotluda3_insn" +;; "ashluta3_insn" "ashruta3_insn" "lshruta3_insn" "rotluta3_insn" +(define_insn "3_insn" + [(set (reg:ALL8 ACC_A) + (di_shifts:ALL8 (reg:ALL8 ACC_A) + (reg:QI 16)))] + "avr_have_dimode" + "%~call __di3" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) + +;; "umulsidi3" +;; "mulsidi3" +(define_expand "mulsidi3" + [(parallel [(match_operand:DI 0 "register_operand" "") + (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "") + ;; Just to mention the iterator + (clobber (any_extend:SI (match_dup 1)))])] + "avr_have_dimode" + { + emit_move_insn (gen_rtx_REG (SImode, 22), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 18), operands[2]); + emit_insn (gen_mulsidi3_insn()); + // Use emit_move_insn and not open-coded expand because of missing movdi + emit_move_insn (operands[0], gen_rtx_REG (DImode, ACC_A)); + DONE; + }) + +;; "umulsidi3_insn" +;; "mulsidi3_insn" +(define_insn "mulsidi3_insn" + [(set (reg:DI ACC_A) + (mult:DI (any_extend:DI (reg:SI 18)) + (any_extend:DI (reg:SI 22)))) + (clobber (reg:HI REG_X)) + (clobber (reg:HI REG_Z))] + "avr_have_dimode" + "%~call __mulsidi3" + [(set_attr "adjust_len" "call") + (set_attr "cc" "clobber")]) diff --git a/gcc-4.9/gcc/config/avr/avr-fixed.md b/gcc-4.9/gcc/config/avr/avr-fixed.md new file mode 100644 index 000000000..1652415b1 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-fixed.md @@ -0,0 +1,497 @@ +;; This file contains instructions that support fixed-point operations +;; for Atmel AVR micro controllers. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; +;; Contributed by Sean D'Epagnier (sean@depagnier.com) +;; Georg-Johann Lay (avr@gjlay.de) + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_mode_iterator ALL1Q [QQ UQQ]) +(define_mode_iterator ALL2Q [HQ UHQ]) +(define_mode_iterator ALL2A [HA UHA]) +(define_mode_iterator ALL4A [SA USA]) +(define_mode_iterator ALL2QA [HQ UHQ HA UHA]) +(define_mode_iterator ALL4QA [SQ USQ SA USA]) +(define_mode_iterator ALL124QA [ QQ HQ HA SA SQ + UQQ UHQ UHA USA USQ]) + +(define_mode_iterator ALL2S [HQ HA]) +(define_mode_iterator ALL4S [SA SQ]) +(define_mode_iterator ALL24S [ HQ HA SA SQ]) +(define_mode_iterator ALL124S [ QQ HQ HA SA SQ]) +(define_mode_iterator ALL124U [UQQ UHQ UHA USA USQ]) + +;;; Conversions + +(define_mode_iterator FIXED_A + [QQ UQQ + HQ UHQ HA UHA + SQ USQ SA USA + DQ UDQ DA UDA + TA UTA + QI HI SI DI]) + +;; Same so that be can build cross products + +(define_mode_iterator FIXED_B + [QQ UQQ + HQ UHQ HA UHA + SQ USQ SA USA + DQ UDQ DA UDA + TA UTA + QI HI SI DI]) + +(define_insn "fract2" + [(set (match_operand:FIXED_A 0 "register_operand" "=r") + (fract_convert:FIXED_A + (match_operand:FIXED_B 1 "register_operand" "r")))] + "mode != mode" + { + return avr_out_fract (insn, operands, true, NULL); + } + [(set_attr "cc" "clobber") + (set_attr "adjust_len" "sfract")]) + +(define_insn "fractuns2" + [(set (match_operand:FIXED_A 0 "register_operand" "=r") + (unsigned_fract_convert:FIXED_A + (match_operand:FIXED_B 1 "register_operand" "r")))] + "mode != mode" + { + return avr_out_fract (insn, operands, false, NULL); + } + [(set_attr "cc" "clobber") + (set_attr "adjust_len" "ufract")]) + +;****************************************************************************** +;** Saturated Addition and Subtraction +;****************************************************************************** + +;; Fixme: It would be nice if we could expand the 32-bit versions to a +;; transparent libgcc call if $2 is a REG. Problem is that it is +;; not possible to describe that addition is commutative. +;; And defining register classes/constraintrs for the involved hard +;; registers and let IRA do the work, yields inacceptable bloated code. +;; Thus, we have to live with the up to 11 instructions that are output +;; for these 32-bit saturated operations. + +;; "ssaddqq3" "ssaddhq3" "ssaddha3" "ssaddsq3" "ssaddsa3" +;; "sssubqq3" "sssubhq3" "sssubha3" "sssubsq3" "sssubsa3" +(define_insn "3" + [(set (match_operand:ALL124S 0 "register_operand" "=??d,d") + (ss_addsub:ALL124S (match_operand:ALL124S 1 "register_operand" "0,0") + (match_operand:ALL124S 2 "nonmemory_operand" "r,Ynn")))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "cc" "clobber") + (set_attr "adjust_len" "plus")]) + +;; "usadduqq3" "usadduhq3" "usadduha3" "usaddusq3" "usaddusa3" +;; "ussubuqq3" "ussubuhq3" "ussubuha3" "ussubusq3" "ussubusa3" +(define_insn "3" + [(set (match_operand:ALL124U 0 "register_operand" "=??r,d") + (us_addsub:ALL124U (match_operand:ALL124U 1 "register_operand" "0,0") + (match_operand:ALL124U 2 "nonmemory_operand" "r,Ynn")))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "cc" "clobber") + (set_attr "adjust_len" "plus")]) + +;****************************************************************************** +;** Saturated Negation and Absolute Value +;****************************************************************************** + +;; Fixme: This will always result in 0. Dunno why simplify-rtx.c says +;; "unknown" on how to optimize this. libgcc call would be in order, +;; but the performance is *PLAIN* *HORROR* because the optimizers don't +;; manage to optimize out MEMCPY that's sprincled all over fixed-bit.c */ + +(define_expand "usneg2" + [(parallel [(match_operand:ALL124U 0 "register_operand" "") + (match_operand:ALL124U 1 "nonmemory_operand" "")])] + "" + { + emit_move_insn (operands[0], CONST0_RTX (mode)); + DONE; + }) + +(define_insn "ssnegqq2" + [(set (match_operand:QQ 0 "register_operand" "=r") + (ss_neg:QQ (match_operand:QQ 1 "register_operand" "0")))] + "" + "neg %0\;brvc 0f\;dec %0\;0:" + [(set_attr "cc" "clobber") + (set_attr "length" "3")]) + +(define_insn "ssabsqq2" + [(set (match_operand:QQ 0 "register_operand" "=r") + (ss_abs:QQ (match_operand:QQ 1 "register_operand" "0")))] + "" + "sbrc %0,7\;neg %0\;sbrc %0,7\;dec %0" + [(set_attr "cc" "clobber") + (set_attr "length" "4")]) + +;; "ssneghq2" "ssnegha2" "ssnegsq2" "ssnegsa2" +;; "ssabshq2" "ssabsha2" "ssabssq2" "ssabssa2" +(define_expand "2" + [(set (match_dup 2) + (match_operand:ALL24S 1 "register_operand" "")) + (set (match_dup 2) + (ss_abs_neg:ALL24S (match_dup 2))) + (set (match_operand:ALL24S 0 "register_operand" "") + (match_dup 2))] + "" + { + operands[2] = gen_rtx_REG (mode, 26 - GET_MODE_SIZE (mode)); + }) + +;; "*ssneghq2" "*ssnegha2" +;; "*ssabshq2" "*ssabsha2" +(define_insn "*2" + [(set (reg:ALL2S 24) + (ss_abs_neg:ALL2S (reg:ALL2S 24)))] + "" + "%~call ___2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "*ssnegsq2" "*ssnegsa2" +;; "*ssabssq2" "*ssabssa2" +(define_insn "*2" + [(set (reg:ALL4S 22) + (ss_abs_neg:ALL4S (reg:ALL4S 22)))] + "" + "%~call ___4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;****************************************************************************** +; mul + +;; "mulqq3" "muluqq3" +(define_expand "mul3" + [(parallel [(match_operand:ALL1Q 0 "register_operand" "") + (match_operand:ALL1Q 1 "register_operand" "") + (match_operand:ALL1Q 2 "register_operand" "")])] + "" + { + emit_insn (AVR_HAVE_MUL + ? gen_mul3_enh (operands[0], operands[1], operands[2]) + : gen_mul3_nomul (operands[0], operands[1], operands[2])); + DONE; + }) + +(define_insn "mulqq3_enh" + [(set (match_operand:QQ 0 "register_operand" "=r") + (mult:QQ (match_operand:QQ 1 "register_operand" "a") + (match_operand:QQ 2 "register_operand" "a")))] + "AVR_HAVE_MUL" + "fmuls %1,%2\;dec r1\;brvs 0f\;inc r1\;0:\;mov %0,r1\;clr __zero_reg__" + [(set_attr "length" "6") + (set_attr "cc" "clobber")]) + +(define_insn "muluqq3_enh" + [(set (match_operand:UQQ 0 "register_operand" "=r") + (mult:UQQ (match_operand:UQQ 1 "register_operand" "r") + (match_operand:UQQ 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%2\;mov %0,r1\;clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_expand "mulqq3_nomul" + [(set (reg:QQ 24) + (match_operand:QQ 1 "register_operand" "")) + (set (reg:QQ 25) + (match_operand:QQ 2 "register_operand" "")) + ;; "*mulqq3.call" + (parallel [(set (reg:QQ 23) + (mult:QQ (reg:QQ 24) + (reg:QQ 25))) + (clobber (reg:QI 22)) + (clobber (reg:HI 24))]) + (set (match_operand:QQ 0 "register_operand" "") + (reg:QQ 23))] + "!AVR_HAVE_MUL") + +(define_expand "muluqq3_nomul" + [(set (reg:UQQ 22) + (match_operand:UQQ 1 "register_operand" "")) + (set (reg:UQQ 24) + (match_operand:UQQ 2 "register_operand" "")) + ;; "*umulqihi3.call" + (parallel [(set (reg:HI 24) + (mult:HI (zero_extend:HI (reg:QI 22)) + (zero_extend:HI (reg:QI 24)))) + (clobber (reg:QI 21)) + (clobber (reg:HI 22))]) + (set (match_operand:UQQ 0 "register_operand" "") + (reg:UQQ 25))] + "!AVR_HAVE_MUL") + +(define_insn "*mulqq3.call" + [(set (reg:QQ 23) + (mult:QQ (reg:QQ 24) + (reg:QQ 25))) + (clobber (reg:QI 22)) + (clobber (reg:HI 24))] + "!AVR_HAVE_MUL" + "%~call __mulqq3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; "mulhq3" "muluhq3" +;; "mulha3" "muluha3" +(define_expand "mul3" + [(set (reg:ALL2QA 18) + (match_operand:ALL2QA 1 "register_operand" "")) + (set (reg:ALL2QA 26) + (match_operand:ALL2QA 2 "register_operand" "")) + ;; "*mulhq3.call.enh" + (parallel [(set (reg:ALL2QA 24) + (mult:ALL2QA (reg:ALL2QA 18) + (reg:ALL2QA 26))) + (clobber (reg:HI 22))]) + (set (match_operand:ALL2QA 0 "register_operand" "") + (reg:ALL2QA 24))] + "AVR_HAVE_MUL") + +;; "*mulhq3.call" "*muluhq3.call" +;; "*mulha3.call" "*muluha3.call" +(define_insn "*mul3.call" + [(set (reg:ALL2QA 24) + (mult:ALL2QA (reg:ALL2QA 18) + (reg:ALL2QA 26))) + (clobber (reg:HI 22))] + "AVR_HAVE_MUL" + "%~call __mul3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; On the enhanced core, don't clobber either input and use a separate output + +;; "mulsa3" "mulusa3" +(define_expand "mul3" + [(set (reg:ALL4A 16) + (match_operand:ALL4A 1 "register_operand" "")) + (set (reg:ALL4A 20) + (match_operand:ALL4A 2 "register_operand" "")) + (set (reg:ALL4A 24) + (mult:ALL4A (reg:ALL4A 16) + (reg:ALL4A 20))) + (set (match_operand:ALL4A 0 "register_operand" "") + (reg:ALL4A 24))] + "AVR_HAVE_MUL") + +;; "*mulsa3.call" "*mulusa3.call" +(define_insn "*mul3.call" + [(set (reg:ALL4A 24) + (mult:ALL4A (reg:ALL4A 16) + (reg:ALL4A 20)))] + "AVR_HAVE_MUL" + "%~call __mul3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +; / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +; div + +(define_code_iterator usdiv [udiv div]) + +;; "divqq3" "udivuqq3" +(define_expand "3" + [(set (reg:ALL1Q 25) + (match_operand:ALL1Q 1 "register_operand" "")) + (set (reg:ALL1Q 22) + (match_operand:ALL1Q 2 "register_operand" "")) + (parallel [(set (reg:ALL1Q 24) + (usdiv:ALL1Q (reg:ALL1Q 25) + (reg:ALL1Q 22))) + (clobber (reg:QI 25))]) + (set (match_operand:ALL1Q 0 "register_operand" "") + (reg:ALL1Q 24))]) + +;; "*divqq3.call" "*udivuqq3.call" +(define_insn "*3.call" + [(set (reg:ALL1Q 24) + (usdiv:ALL1Q (reg:ALL1Q 25) + (reg:ALL1Q 22))) + (clobber (reg:QI 25))] + "" + "%~call __3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "divhq3" "udivuhq3" +;; "divha3" "udivuha3" +(define_expand "3" + [(set (reg:ALL2QA 26) + (match_operand:ALL2QA 1 "register_operand" "")) + (set (reg:ALL2QA 22) + (match_operand:ALL2QA 2 "register_operand" "")) + (parallel [(set (reg:ALL2QA 24) + (usdiv:ALL2QA (reg:ALL2QA 26) + (reg:ALL2QA 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))]) + (set (match_operand:ALL2QA 0 "register_operand" "") + (reg:ALL2QA 24))]) + +;; "*divhq3.call" "*udivuhq3.call" +;; "*divha3.call" "*udivuha3.call" +(define_insn "*3.call" + [(set (reg:ALL2QA 24) + (usdiv:ALL2QA (reg:ALL2QA 26) + (reg:ALL2QA 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))] + "" + "%~call __3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Note the first parameter gets passed in already offset by 2 bytes + +;; "divsa3" "udivusa3" +(define_expand "3" + [(set (reg:ALL4A 24) + (match_operand:ALL4A 1 "register_operand" "")) + (set (reg:ALL4A 18) + (match_operand:ALL4A 2 "register_operand" "")) + (parallel [(set (reg:ALL4A 22) + (usdiv:ALL4A (reg:ALL4A 24) + (reg:ALL4A 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_operand:ALL4A 0 "register_operand" "") + (reg:ALL4A 22))]) + +;; "*divsa3.call" "*udivusa3.call" +(define_insn "*3.call" + [(set (reg:ALL4A 22) + (usdiv:ALL4A (reg:ALL4A 24) + (reg:ALL4A 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "" + "%~call __3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;****************************************************************************** +;** Rounding +;****************************************************************************** + +;; "roundqq3" "rounduqq3" +;; "roundhq3" "rounduhq3" "roundha3" "rounduha3" +;; "roundsq3" "roundusq3" "roundsa3" "roundusa3" +(define_expand "round3" + [(set (match_dup 4) + (match_operand:ALL124QA 1 "register_operand" "")) + (set (reg:QI 24) + (match_dup 5)) + (parallel [(set (match_dup 3) + (unspec:ALL124QA [(match_dup 4) + (reg:QI 24)] UNSPEC_ROUND)) + (clobber (match_dup 4))]) + (set (match_operand:ALL124QA 0 "register_operand" "") + (match_dup 3)) + (use (match_operand:HI 2 "nonmemory_operand" ""))] + "" + { + if (CONST_INT_P (operands[2]) + && !(optimize_size + && 4 == GET_MODE_SIZE (mode))) + { + emit_insn (gen_round3_const (operands[0], operands[1], operands[2])); + DONE; + } + + // Input and output of the libgcc function + const unsigned int regno_in[] = { -1, 22, 22, -1, 18 }; + const unsigned int regno_out[] = { -1, 24, 24, -1, 22 }; + + operands[3] = gen_rtx_REG (mode, regno_out[(size_t) GET_MODE_SIZE (mode)]); + operands[4] = gen_rtx_REG (mode, regno_in[(size_t) GET_MODE_SIZE (mode)]); + operands[5] = simplify_gen_subreg (QImode, force_reg (HImode, operands[2]), HImode, 0); + // $2 is no more needed, but is referenced for expand. + operands[2] = const0_rtx; + }) + +;; Expand rounding with known rounding points inline so that the addend / mask +;; will be consumed by operation with immediate operands and there is no +;; need for a shift with variable offset. + +;; "roundqq3_const" "rounduqq3_const" +;; "roundhq3_const" "rounduhq3_const" "roundha3_const" "rounduha3_const" +;; "roundsq3_const" "roundusq3_const" "roundsa3_const" "roundusa3_const" +(define_insn "round3_const" + [(set (match_operand:ALL124QA 0 "register_operand" "=d") + (unspec:ALL124QA [(match_operand:ALL124QA 1 "register_operand" "0") + (match_operand:HI 2 "const_int_operand" "n") + (const_int 0)] + UNSPEC_ROUND))] + "" + { + return avr_out_round (insn, operands); + } + [(set_attr "cc" "clobber") + (set_attr "adjust_len" "round")]) + + +;; "*roundqq3.libgcc" "*rounduqq3.libgcc" +(define_insn "*round3.libgcc" + [(set (reg:ALL1Q 24) + (unspec:ALL1Q [(reg:ALL1Q 22) + (reg:QI 24)] UNSPEC_ROUND)) + (clobber (reg:ALL1Q 22))] + "" + "%~call __round3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "*roundhq3.libgcc" "*rounduhq3.libgcc" +;; "*roundha3.libgcc" "*rounduha3.libgcc" +(define_insn "*round3.libgcc" + [(set (reg:ALL2QA 24) + (unspec:ALL2QA [(reg:ALL2QA 22) + (reg:QI 24)] UNSPEC_ROUND)) + (clobber (reg:ALL2QA 22))] + "" + "%~call __round3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "*roundsq3.libgcc" "*roundusq3.libgcc" +;; "*roundsa3.libgcc" "*roundusa3.libgcc" +(define_insn "*round3.libgcc" + [(set (reg:ALL4QA 22) + (unspec:ALL4QA [(reg:ALL4QA 18) + (reg:QI 24)] UNSPEC_ROUND)) + (clobber (reg:ALL4QA 18))] + "" + "%~call __round3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) diff --git a/gcc-4.9/gcc/config/avr/avr-log.c b/gcc-4.9/gcc/config/avr/avr-log.c new file mode 100644 index 000000000..8e27cec6d --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-log.c @@ -0,0 +1,351 @@ +/* Subroutines for log output for Atmel AVR back end. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by Georg-Johann Lay (avr@gjlay.de) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "print-tree.h" +#include "output.h" +#include "input.h" +#include "function.h" +#include "tm_p.h" +#include "tree-pass.h" /* for current_pass */ + +/* This file supplies some functions for AVR back-end developers + with a printf-like interface. The functions are called through + macros avr_edump or avr_fdump from avr-protos.h: + + avr_edump (const char *fmt, ...); + + avr_fdump (FILE *stream, const char *fmt, ...); + + avr_edump (fmt, ...) is a shortcut for avr_fdump (stderr, fmt, ...) + + == known %-codes == + + b: bool + r: rtx + t: tree + T: tree (brief) + C: enum rtx_code + m: enum machine_mode + R: enum reg_class + L: insn list + H: location_t + + == no arguments == + + A: call abort() + f: current_function_name() + F: caller (via __FUNCTION__) + P: Pass name and number + ?: Print caller, current function and pass info + !: Ditto, but only print if in a pass with static pass number, + else return. + + == same as printf == + + %: % + c: char + s: string + d: int (decimal) + x: int (hex) +*/ + +/* Set according to -mlog= option. */ +avr_log_t avr_log; + +/* The caller as of __FUNCTION__ */ +static const char *avr_log_caller = "?"; + +/* The worker function implementing the %-codes */ +static void avr_log_vadump (FILE*, const char*, va_list); + +/* As we have no variadic macros, avr_edump maps to a call to + avr_log_set_caller_e which saves __FUNCTION__ to avr_log_caller and + returns a function pointer to avr_log_fdump_e. avr_log_fdump_e + gets the printf-like arguments and calls avr_log_vadump, the + worker function. avr_fdump works the same way. */ + +/* Provide avr_log_fdump_e/f so that avr_log_set_caller_e/_f can return + their address. */ + +static int +avr_log_fdump_e (const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + avr_log_vadump (stderr, fmt, ap); + va_end (ap); + + return 1; +} + +static int +avr_log_fdump_f (FILE *stream, const char *fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + if (stream) + avr_log_vadump (stream, fmt, ap); + va_end (ap); + + return 1; +} + +/* Macros avr_edump/avr_fdump map to calls of the following two functions, + respectively. You don't need to call them directly. */ + +int (* +avr_log_set_caller_e (const char *caller) + )(const char*, ...) +{ + avr_log_caller = caller; + + return avr_log_fdump_e; +} + +int (* +avr_log_set_caller_f (const char *caller) + )(FILE*, const char*, ...) +{ + avr_log_caller = caller; + + return avr_log_fdump_f; +} + + +/* Worker function implementing the %-codes and forwarding to + respective print/dump function. */ + +static void +avr_log_vadump (FILE *file, const char *fmt, va_list ap) +{ + char bs[3] = {'\\', '?', '\0'}; + + while (*fmt) + { + switch (*fmt++) + { + default: + fputc (*(fmt-1), file); + break; + + case '\\': + bs[1] = *fmt++; + fputs (bs, file); + break; + + case '%': + switch (*fmt++) + { + case '%': + fputc ('%', file); + break; + + case 't': + { + tree t = va_arg (ap, tree); + if (NULL_TREE == t) + fprintf (file, ""); + else + { + if (stderr == file) + debug_tree (t); + else + { + print_node (file, "", t, 0); + putc ('\n', file); + } + } + break; + } + + case 'T': + print_node_brief (file, "", va_arg (ap, tree), 3); + break; + + case 'd': + fprintf (file, "%d", va_arg (ap, int)); + break; + + case 'x': + fprintf (file, "%x", va_arg (ap, int)); + break; + + case 'b': + fprintf (file, "%s", va_arg (ap, int) ? "true" : "false"); + break; + + case 'c': + fputc (va_arg (ap, int), file); + break; + + case 'r': + print_inline_rtx (file, va_arg (ap, rtx), 0); + break; + + case 'L': + { + rtx insn = va_arg (ap, rtx); + + while (insn) + { + print_inline_rtx (file, insn, 0); + fprintf (file, "\n"); + insn = NEXT_INSN (insn); + } + break; + } + + case 'f': + if (cfun && cfun->decl) + fputs (current_function_name(), file); + break; + + case 's': + { + const char *str = va_arg (ap, char*); + fputs (str ? str : "(null)", file); + } + break; + + case 'm': + fputs (GET_MODE_NAME ((enum machine_mode) va_arg (ap, int)), + file); + break; + + case 'C': + fputs (rtx_name[va_arg (ap, int)], file); + break; + + case 'R': + fputs (reg_class_names[va_arg (ap, int)], file); + break; + + case 'F': + fputs (avr_log_caller, file); + break; + + case 'H': + { + location_t loc = va_arg (ap, location_t); + + if (BUILTINS_LOCATION == loc) + fprintf (file, ""); + else if (UNKNOWN_LOCATION == loc) + fprintf (file, ""); + else + fprintf (file, "%s:%d", + LOCATION_FILE (loc), LOCATION_LINE (loc)); + + break; + } + + case '!': + if (!current_pass) + return; + /* FALLTHRU */ + + case '?': + avr_log_fdump_f (file, "%F[%f:%P]"); + break; + + case 'P': + if (current_pass) + fprintf (file, "%s(%d)", + current_pass->name, + current_pass->static_pass_number); + else + fprintf (file, "pass=?"); + + break; + + case 'A': + fflush (file); + abort(); + + default: + /* Unknown %-code: Stop printing */ + + fprintf (file, "??? %%%c ???%s\n", *(fmt-1), fmt); + fmt = ""; + + break; + } + break; /* % */ + } + } + + fflush (file); +} + + +/* Called from avr.c:avr_option_override(). + Parse argument of -mlog= and set respective fields in avr_log. */ + +void +avr_log_set_avr_log (void) +{ + bool all = TARGET_ALL_DEBUG != 0; + + if (all || avr_log_details) + { + /* Adding , at beginning and end of string makes searching easier. */ + + char *str = (char*) alloca (3 + strlen (avr_log_details)); + bool info; + + str[0] = ','; + strcat (stpcpy (str+1, avr_log_details), ","); + + all |= NULL != strstr (str, ",all,"); + info = NULL != strstr (str, ",?,"); + + if (info) + fprintf (stderr, "\n-mlog="); + +#define SET_DUMP_DETAIL(S) \ + do { \ + avr_log.S = (all || NULL != strstr (str, "," #S ",")); \ + if (info) \ + fprintf (stderr, #S ","); \ + } while (0) + + SET_DUMP_DETAIL (address_cost); + SET_DUMP_DETAIL (builtin); + SET_DUMP_DETAIL (constraints); + SET_DUMP_DETAIL (legitimate_address_p); + SET_DUMP_DETAIL (legitimize_address); + SET_DUMP_DETAIL (legitimize_reload_address); + SET_DUMP_DETAIL (progmem); + SET_DUMP_DETAIL (rtx_costs); + +#undef SET_DUMP_DETAIL + + if (info) + fprintf (stderr, "?\n\n"); + } +} diff --git a/gcc-4.9/gcc/config/avr/avr-mcus.def b/gcc-4.9/gcc/config/avr/avr-mcus.def new file mode 100644 index 000000000..d068f5e80 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-mcus.def @@ -0,0 +1,323 @@ +/* AVR MCUs. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* List of all known AVR MCU types. If updated, cd to $(builddir)/gcc and run + + $ make avr-mcus + + This will regenerate / update the following source files: + + - $(srcdir)/config/avr/t-multilib + - $(srcdir)/config/avr/avr-tables.opt + - $(srcdir)/doc/avr-mmcu.texi + + After that, rebuild everything and check-in the new sources to the repo. + The device list below has to be kept in sync with AVR-LibC. + + + Before including this file, define a macro: + + AVR_MCU (NAME, ARCH, MACRO, SHORT_SP, ERRATA_SKIP, DATA_SEC, N_FLASH, + LIBRARY_NAME) + + where the arguments are the fields of avr_mcu_t: + + NAME Accept -mmcu= + + ARCH Specifies the multilib variant together with SHORT_SP + + MACRO If NULL, this is a core and not a device. If non-NULL, + supply respective built-in macro. + + SHORT_SP The device / multilib has an 8-bit stack pointer (no SPH). + + ERRATA_SKIP Apply work-around for the "skip 32-bit instruction" + silicon bug: Don't skip 32-bit instrctions. + + DATA_SEC First address of SRAM, used in -Tdata= by the driver. + + N_FLASH Number of 64 KiB flash segments, rounded up. + + LIBRARY_NAME Used by the driver to linke startup code from avr-libc + as of crt.o + + "avr2" must be first for the "0" default to work as intended. */ + +/* Classic, <= 8K. */ +AVR_MCU ("avr2", ARCH_AVR2, NULL, 0, 1, 0x0060, 6, "s8515") +AVR_MCU ("at90s2313", ARCH_AVR2, "__AVR_AT90S2313__", 1, 0, 0x0060, 1, "s2313") +AVR_MCU ("at90s2323", ARCH_AVR2, "__AVR_AT90S2323__", 1, 0, 0x0060, 1, "s2323") +AVR_MCU ("at90s2333", ARCH_AVR2, "__AVR_AT90S2333__", 1, 0, 0x0060, 1, "s2333") +AVR_MCU ("at90s2343", ARCH_AVR2, "__AVR_AT90S2343__", 1, 0, 0x0060, 1, "s2343") +AVR_MCU ("attiny22", ARCH_AVR2, "__AVR_ATtiny22__", 1, 0, 0x0060, 1, "tn22") +AVR_MCU ("attiny26", ARCH_AVR2, "__AVR_ATtiny26__", 1, 0, 0x0060, 1, "tn26") +AVR_MCU ("at90s4414", ARCH_AVR2, "__AVR_AT90S4414__", 0, 0, 0x0060, 1, "s4414") +AVR_MCU ("at90s4433", ARCH_AVR2, "__AVR_AT90S4433__", 1, 0, 0x0060, 1, "s4433") +AVR_MCU ("at90s4434", ARCH_AVR2, "__AVR_AT90S4434__", 0, 0, 0x0060, 1, "s4434") +AVR_MCU ("at90s8515", ARCH_AVR2, "__AVR_AT90S8515__", 0, 1, 0x0060, 1, "s8515") +AVR_MCU ("at90c8534", ARCH_AVR2, "__AVR_AT90C8534__", 0, 0, 0x0060, 1, "c8534") +AVR_MCU ("at90s8535", ARCH_AVR2, "__AVR_AT90S8535__", 0, 0, 0x0060, 1, "s8535") +/* Classic + MOVW, <= 8K. */ +AVR_MCU ("avr25", ARCH_AVR25, NULL, 0, 0, 0x0060, 1, "tn85") +AVR_MCU ("ata6289", ARCH_AVR25, "__AVR_ATA6289__", 0, 0, 0x0100, 1, "a6289") +AVR_MCU ("ata5272", ARCH_AVR25, "__AVR_ATA5272__", 0, 0, 0x0100, 1, "a5272") +AVR_MCU ("attiny13", ARCH_AVR25, "__AVR_ATtiny13__", 1, 0, 0x0060, 1, "tn13") +AVR_MCU ("attiny13a", ARCH_AVR25, "__AVR_ATtiny13A__", 1, 0, 0x0060, 1, "tn13a") +AVR_MCU ("attiny2313", ARCH_AVR25, "__AVR_ATtiny2313__", 1, 0, 0x0060, 1, "tn2313") +AVR_MCU ("attiny2313a", ARCH_AVR25, "__AVR_ATtiny2313A__", 1, 0, 0x0060, 1, "tn2313a") +AVR_MCU ("attiny24", ARCH_AVR25, "__AVR_ATtiny24__", 1, 0, 0x0060, 1, "tn24") +AVR_MCU ("attiny24a", ARCH_AVR25, "__AVR_ATtiny24A__", 1, 0, 0x0060, 1, "tn24a") +AVR_MCU ("attiny4313", ARCH_AVR25, "__AVR_ATtiny4313__", 0, 0, 0x0060, 1, "tn4313") +AVR_MCU ("attiny44", ARCH_AVR25, "__AVR_ATtiny44__", 0, 0, 0x0060, 1, "tn44") +AVR_MCU ("attiny44a", ARCH_AVR25, "__AVR_ATtiny44A__", 0, 0, 0x0060, 1, "tn44a") +AVR_MCU ("attiny84", ARCH_AVR25, "__AVR_ATtiny84__", 0, 0, 0x0060, 1, "tn84") +AVR_MCU ("attiny84a", ARCH_AVR25, "__AVR_ATtiny84A__", 0, 0, 0x0060, 1, "tn84") +AVR_MCU ("attiny25", ARCH_AVR25, "__AVR_ATtiny25__", 1, 0, 0x0060, 1, "tn25") +AVR_MCU ("attiny45", ARCH_AVR25, "__AVR_ATtiny45__", 0, 0, 0x0060, 1, "tn45") +AVR_MCU ("attiny85", ARCH_AVR25, "__AVR_ATtiny85__", 0, 0, 0x0060, 1, "tn85") +AVR_MCU ("attiny261", ARCH_AVR25, "__AVR_ATtiny261__", 1, 0, 0x0060, 1, "tn261") +AVR_MCU ("attiny261a", ARCH_AVR25, "__AVR_ATtiny261A__", 1, 0, 0x0060, 1, "tn261a") +AVR_MCU ("attiny461", ARCH_AVR25, "__AVR_ATtiny461__", 0, 0, 0x0060, 1, "tn461") +AVR_MCU ("attiny461a", ARCH_AVR25, "__AVR_ATtiny461A__", 0, 0, 0x0060, 1, "tn461a") +AVR_MCU ("attiny861", ARCH_AVR25, "__AVR_ATtiny861__", 0, 0, 0x0060, 1, "tn861") +AVR_MCU ("attiny861a", ARCH_AVR25, "__AVR_ATtiny861A__", 0, 0, 0x0060, 1, "tn861a") +AVR_MCU ("attiny43u", ARCH_AVR25, "__AVR_ATtiny43U__", 0, 0, 0x0060, 1, "tn43u") +AVR_MCU ("attiny87", ARCH_AVR25, "__AVR_ATtiny87__", 0, 0, 0x0100, 1, "tn87") +AVR_MCU ("attiny48", ARCH_AVR25, "__AVR_ATtiny48__", 0, 0, 0x0100, 1, "tn48") +AVR_MCU ("attiny88", ARCH_AVR25, "__AVR_ATtiny88__", 0, 0, 0x0100, 1, "tn88") +AVR_MCU ("at86rf401", ARCH_AVR25, "__AVR_AT86RF401__", 0, 0, 0x0060, 1, "86401") +/* Classic, > 8K, <= 64K. */ +AVR_MCU ("avr3", ARCH_AVR3, NULL, 0, 0, 0x0060, 1, "43355") +AVR_MCU ("at43usb355", ARCH_AVR3, "__AVR_AT43USB355__", 0, 0, 0x0060, 1, "43355") +AVR_MCU ("at76c711", ARCH_AVR3, "__AVR_AT76C711__", 0, 0, 0x0060, 1, "76711") +/* Classic, == 128K. */ +AVR_MCU ("avr31", ARCH_AVR31, NULL, 0, 1, 0x0060, 2, "m103") +AVR_MCU ("atmega103", ARCH_AVR31, "__AVR_ATmega103__", 0, 1, 0x0060, 2, "m103") +AVR_MCU ("at43usb320", ARCH_AVR31, "__AVR_AT43USB320__", 0, 0, 0x0060, 2, "43320") +/* Classic + MOVW + JMP/CALL. */ +AVR_MCU ("avr35", ARCH_AVR35, NULL, 0, 0, 0x0100, 1, "usb162") +AVR_MCU ("ata5505", ARCH_AVR35, "__AVR_ATA5505__", 0, 0, 0x0100, 1, "a5505") +AVR_MCU ("at90usb82", ARCH_AVR35, "__AVR_AT90USB82__", 0, 0, 0x0100, 1, "usb82") +AVR_MCU ("at90usb162", ARCH_AVR35, "__AVR_AT90USB162__", 0, 0, 0x0100, 1, "usb162") +AVR_MCU ("atmega8u2", ARCH_AVR35, "__AVR_ATmega8U2__", 0, 0, 0x0100, 1, "m8u2") +AVR_MCU ("atmega16u2", ARCH_AVR35, "__AVR_ATmega16U2__", 0, 0, 0x0100, 1, "m16u2") +AVR_MCU ("atmega32u2", ARCH_AVR35, "__AVR_ATmega32U2__", 0, 0, 0x0100, 1, "m32u2") +AVR_MCU ("attiny167", ARCH_AVR35, "__AVR_ATtiny167__", 0, 0, 0x0100, 1, "tn167") +AVR_MCU ("attiny1634", ARCH_AVR35, "__AVR_ATtiny1634__", 0, 0, 0x0100, 1, "tn1634") +/* Enhanced, <= 8K. */ +AVR_MCU ("avr4", ARCH_AVR4, NULL, 0, 0, 0x0060, 1, "m8") +AVR_MCU ("ata6285", ARCH_AVR4, "__AVR_ATA6285__", 0, 0, 0x0100, 1, "a6285") +AVR_MCU ("ata6286", ARCH_AVR4, "__AVR_ATA6286__", 0, 0, 0x0100, 1, "a6286") +AVR_MCU ("atmega8", ARCH_AVR4, "__AVR_ATmega8__", 0, 0, 0x0060, 1, "m8") +AVR_MCU ("atmega8a", ARCH_AVR4, "__AVR_ATmega8A__", 0, 0, 0x0060, 1, "m8a") +AVR_MCU ("atmega48", ARCH_AVR4, "__AVR_ATmega48__", 0, 0, 0x0100, 1, "m48") +AVR_MCU ("atmega48a", ARCH_AVR4, "__AVR_ATmega48A__", 0, 0, 0x0100, 1, "m48a") +AVR_MCU ("atmega48p", ARCH_AVR4, "__AVR_ATmega48P__", 0, 0, 0x0100, 1, "m48p") +AVR_MCU ("atmega48pa", ARCH_AVR4, "__AVR_ATmega48PA__", 0, 0, 0x0100, 1, "m48pa") +AVR_MCU ("atmega88", ARCH_AVR4, "__AVR_ATmega88__", 0, 0, 0x0100, 1, "m88") +AVR_MCU ("atmega88a", ARCH_AVR4, "__AVR_ATmega88A__", 0, 0, 0x0100, 1, "m88a") +AVR_MCU ("atmega88p", ARCH_AVR4, "__AVR_ATmega88P__", 0, 0, 0x0100, 1, "m88p") +AVR_MCU ("atmega88pa", ARCH_AVR4, "__AVR_ATmega88PA__", 0, 0, 0x0100, 1, "m88pa") +AVR_MCU ("atmega8515", ARCH_AVR4, "__AVR_ATmega8515__", 0, 0, 0x0060, 1, "m8515") +AVR_MCU ("atmega8535", ARCH_AVR4, "__AVR_ATmega8535__", 0, 0, 0x0060, 1, "m8535") +AVR_MCU ("atmega8hva", ARCH_AVR4, "__AVR_ATmega8HVA__", 0, 0, 0x0100, 1, "m8hva") +AVR_MCU ("at90pwm1", ARCH_AVR4, "__AVR_AT90PWM1__", 0, 0, 0x0100, 1, "90pwm1") +AVR_MCU ("at90pwm2", ARCH_AVR4, "__AVR_AT90PWM2__", 0, 0, 0x0100, 1, "90pwm2") +AVR_MCU ("at90pwm2b", ARCH_AVR4, "__AVR_AT90PWM2B__", 0, 0, 0x0100, 1, "90pwm2b") +AVR_MCU ("at90pwm3", ARCH_AVR4, "__AVR_AT90PWM3__", 0, 0, 0x0100, 1, "90pwm3") +AVR_MCU ("at90pwm3b", ARCH_AVR4, "__AVR_AT90PWM3B__", 0, 0, 0x0100, 1, "90pwm3b") +AVR_MCU ("at90pwm81", ARCH_AVR4, "__AVR_AT90PWM81__", 0, 0, 0x0100, 1, "90pwm81") +/* Enhanced, > 8K, <= 64K. */ +AVR_MCU ("avr5", ARCH_AVR5, NULL, 0, 0, 0x0060, 1, "m16") +AVR_MCU ("ata5790", ARCH_AVR5, "__AVR_ATA5790__", 0, 0, 0x0100, 1, "a5790") +AVR_MCU ("ata5790n", ARCH_AVR5, "__AVR_ATA5790N__", 0, 0, 0x0100, 1, "a5790n") +AVR_MCU ("ata5795", ARCH_AVR5, "__AVR_ATA5795__", 0, 0, 0x0100, 1, "a5795") +AVR_MCU ("atmega16", ARCH_AVR5, "__AVR_ATmega16__", 0, 0, 0x0060, 1, "m16") +AVR_MCU ("atmega16a", ARCH_AVR5, "__AVR_ATmega16A__", 0, 0, 0x0060, 1, "m16a") +AVR_MCU ("atmega161", ARCH_AVR5, "__AVR_ATmega161__", 0, 0, 0x0060, 1, "m161") +AVR_MCU ("atmega162", ARCH_AVR5, "__AVR_ATmega162__", 0, 0, 0x0100, 1, "m162") +AVR_MCU ("atmega163", ARCH_AVR5, "__AVR_ATmega163__", 0, 0, 0x0060, 1, "m163") +AVR_MCU ("atmega164a", ARCH_AVR5, "__AVR_ATmega164A__", 0, 0, 0x0100, 1, "m164a") +AVR_MCU ("atmega164p", ARCH_AVR5, "__AVR_ATmega164P__", 0, 0, 0x0100, 1, "m164p") +AVR_MCU ("atmega164pa", ARCH_AVR5, "__AVR_ATmega164PA__", 0, 0, 0x0100, 1, "m164pa") +AVR_MCU ("atmega165", ARCH_AVR5, "__AVR_ATmega165__", 0, 0, 0x0100, 1, "m165") +AVR_MCU ("atmega165a", ARCH_AVR5, "__AVR_ATmega165A__", 0, 0, 0x0100, 1, "m165a") +AVR_MCU ("atmega165p", ARCH_AVR5, "__AVR_ATmega165P__", 0, 0, 0x0100, 1, "m165p") +AVR_MCU ("atmega165pa", ARCH_AVR5, "__AVR_ATmega165PA__", 0, 0, 0x0100, 1, "m165pa") +AVR_MCU ("atmega168", ARCH_AVR5, "__AVR_ATmega168__", 0, 0, 0x0100, 1, "m168") +AVR_MCU ("atmega168a", ARCH_AVR5, "__AVR_ATmega168A__", 0, 0, 0x0100, 1, "m168a") +AVR_MCU ("atmega168p", ARCH_AVR5, "__AVR_ATmega168P__", 0, 0, 0x0100, 1, "m168p") +AVR_MCU ("atmega168pa", ARCH_AVR5, "__AVR_ATmega168PA__", 0, 0, 0x0100, 1, "m168pa") +AVR_MCU ("atmega169", ARCH_AVR5, "__AVR_ATmega169__", 0, 0, 0x0100, 1, "m169") +AVR_MCU ("atmega169a", ARCH_AVR5, "__AVR_ATmega169A__", 0, 0, 0x0100, 1, "m169a") +AVR_MCU ("atmega169p", ARCH_AVR5, "__AVR_ATmega169P__", 0, 0, 0x0100, 1, "m169p") +AVR_MCU ("atmega169pa", ARCH_AVR5, "__AVR_ATmega169PA__", 0, 0, 0x0100, 1, "m169pa") +AVR_MCU ("atmega16hvb", ARCH_AVR5, "__AVR_ATmega16HVB__", 0, 0, 0x0100, 1, "m16hvb") +AVR_MCU ("atmega16hvbrevb", ARCH_AVR5, "__AVR_ATmega16HVBREVB__", 0, 0, 0x0100, 1, "m16hvbrevb") +AVR_MCU ("atmega16m1", ARCH_AVR5, "__AVR_ATmega16M1__", 0, 0, 0x0100, 1, "m16m1") +AVR_MCU ("atmega16u4", ARCH_AVR5, "__AVR_ATmega16U4__", 0, 0, 0x0100, 1, "m16u4") +AVR_MCU ("atmega26hvg", ARCH_AVR5, "__AVR_ATmega26HVG__", 0, 0, 0x0100, 1, "m26hvg") +AVR_MCU ("atmega32a", ARCH_AVR5, "__AVR_ATmega32A__", 0, 0, 0x0060, 1, "m32a") +AVR_MCU ("atmega32", ARCH_AVR5, "__AVR_ATmega32__", 0, 0, 0x0060, 1, "m32") +AVR_MCU ("atmega323", ARCH_AVR5, "__AVR_ATmega323__", 0, 0, 0x0060, 1, "m323") +AVR_MCU ("atmega324a", ARCH_AVR5, "__AVR_ATmega324A__", 0, 0, 0x0100, 1, "m324a") +AVR_MCU ("atmega324p", ARCH_AVR5, "__AVR_ATmega324P__", 0, 0, 0x0100, 1, "m324p") +AVR_MCU ("atmega324pa", ARCH_AVR5, "__AVR_ATmega324PA__", 0, 0, 0x0100, 1, "m324pa") +AVR_MCU ("atmega325", ARCH_AVR5, "__AVR_ATmega325__", 0, 0, 0x0100, 1, "m325") +AVR_MCU ("atmega325a", ARCH_AVR5, "__AVR_ATmega325A__", 0, 0, 0x0100, 1, "m325a") +AVR_MCU ("atmega325p", ARCH_AVR5, "__AVR_ATmega325P__", 0, 0, 0x0100, 1, "m325p") +AVR_MCU ("atmega3250", ARCH_AVR5, "__AVR_ATmega3250__", 0, 0, 0x0100, 1, "m3250") +AVR_MCU ("atmega3250a", ARCH_AVR5, "__AVR_ATmega3250A__", 0, 0, 0x0100, 1, "m3250a") +AVR_MCU ("atmega3250p", ARCH_AVR5, "__AVR_ATmega3250P__", 0, 0, 0x0100, 1, "m3250p") +AVR_MCU ("atmega3250pa", ARCH_AVR5, "__AVR_ATmega3250PA__", 0, 0, 0x0100, 1, "m3250pa") +AVR_MCU ("atmega328", ARCH_AVR5, "__AVR_ATmega328__", 0, 0, 0x0100, 1, "m328") +AVR_MCU ("atmega328p", ARCH_AVR5, "__AVR_ATmega328P__", 0, 0, 0x0100, 1, "m328p") +AVR_MCU ("atmega329", ARCH_AVR5, "__AVR_ATmega329__", 0, 0, 0x0100, 1, "m329") +AVR_MCU ("atmega329a", ARCH_AVR5, "__AVR_ATmega329A__", 0, 0, 0x0100, 1, "m329a") +AVR_MCU ("atmega329p", ARCH_AVR5, "__AVR_ATmega329P__", 0, 0, 0x0100, 1, "m329p") +AVR_MCU ("atmega329pa", ARCH_AVR5, "__AVR_ATmega329PA__", 0, 0, 0x0100, 1, "m329pa") +AVR_MCU ("atmega3290", ARCH_AVR5, "__AVR_ATmega3290__", 0, 0, 0x0100, 1, "m3290") +AVR_MCU ("atmega3290a", ARCH_AVR5, "__AVR_ATmega3290A__", 0, 0, 0x0100, 1, "m3290a") +AVR_MCU ("atmega3290p", ARCH_AVR5, "__AVR_ATmega3290P__", 0, 0, 0x0100, 1, "m3290p") +AVR_MCU ("atmega3290pa", ARCH_AVR5, "__AVR_ATmega3290PA__", 0, 0, 0x0100, 1, "m3290pa") +AVR_MCU ("atmega32c1", ARCH_AVR5, "__AVR_ATmega32C1__", 0, 0, 0x0100, 1, "m32c1") +AVR_MCU ("atmega32m1", ARCH_AVR5, "__AVR_ATmega32M1__", 0, 0, 0x0100, 1, "m32m1") +AVR_MCU ("atmega32u4", ARCH_AVR5, "__AVR_ATmega32U4__", 0, 0, 0x0100, 1, "m32u4") +AVR_MCU ("atmega32u6", ARCH_AVR5, "__AVR_ATmega32U6__", 0, 0, 0x0100, 1, "m32u6") +AVR_MCU ("atmega406", ARCH_AVR5, "__AVR_ATmega406__", 0, 0, 0x0100, 1, "m406") +AVR_MCU ("atmega64", ARCH_AVR5, "__AVR_ATmega64__", 0, 0, 0x0100, 1, "m64") +AVR_MCU ("atmega64a", ARCH_AVR5, "__AVR_ATmega64A__", 0, 0, 0x0100, 1, "m64a") +AVR_MCU ("atmega640", ARCH_AVR5, "__AVR_ATmega640__", 0, 0, 0x0200, 1, "m640") +AVR_MCU ("atmega644", ARCH_AVR5, "__AVR_ATmega644__", 0, 0, 0x0100, 1, "m644") +AVR_MCU ("atmega644a", ARCH_AVR5, "__AVR_ATmega644A__", 0, 0, 0x0100, 1, "m644a") +AVR_MCU ("atmega644p", ARCH_AVR5, "__AVR_ATmega644P__", 0, 0, 0x0100, 1, "m644p") +AVR_MCU ("atmega644pa", ARCH_AVR5, "__AVR_ATmega644PA__", 0, 0, 0x0100, 1, "m644pa") +AVR_MCU ("atmega645", ARCH_AVR5, "__AVR_ATmega645__", 0, 0, 0x0100, 1, "m645") +AVR_MCU ("atmega645a", ARCH_AVR5, "__AVR_ATmega645A__", 0, 0, 0x0100, 1, "m645a") +AVR_MCU ("atmega645p", ARCH_AVR5, "__AVR_ATmega645P__", 0, 0, 0x0100, 1, "m645p") +AVR_MCU ("atmega6450", ARCH_AVR5, "__AVR_ATmega6450__", 0, 0, 0x0100, 1, "m6450") +AVR_MCU ("atmega6450a", ARCH_AVR5, "__AVR_ATmega6450A__", 0, 0, 0x0100, 1, "m6450a") +AVR_MCU ("atmega6450p", ARCH_AVR5, "__AVR_ATmega6450P__", 0, 0, 0x0100, 1, "m6450p") +AVR_MCU ("atmega649", ARCH_AVR5, "__AVR_ATmega649__", 0, 0, 0x0100, 1, "m649") +AVR_MCU ("atmega649a", ARCH_AVR5, "__AVR_ATmega649A__", 0, 0, 0x0100, 1, "m649a") +AVR_MCU ("atmega649p", ARCH_AVR5, "__AVR_ATmega649P__", 0, 0, 0x0100, 1, "m649p") +AVR_MCU ("atmega6490", ARCH_AVR5, "__AVR_ATmega6490__", 0, 0, 0x0100, 1, "m6490") +AVR_MCU ("atmega16hva", ARCH_AVR5, "__AVR_ATmega16HVA__", 0, 0, 0x0100, 1, "m16hva") +AVR_MCU ("atmega16hva2", ARCH_AVR5, "__AVR_ATmega16HVA2__", 0, 0, 0x0100, 1, "m16hva2") +AVR_MCU ("atmega32hvb", ARCH_AVR5, "__AVR_ATmega32HVB__", 0, 0, 0x0100, 1, "m32hvb") +AVR_MCU ("atmega6490a", ARCH_AVR5, "__AVR_ATmega6490A__", 0, 0, 0x0100, 1, "m6490a") +AVR_MCU ("atmega6490p", ARCH_AVR5, "__AVR_ATmega6490P__", 0, 0, 0x0100, 1, "m6490p") +AVR_MCU ("atmega64c1", ARCH_AVR5, "__AVR_ATmega64C1__", 0, 0, 0x0100, 1, "m64c1") +AVR_MCU ("atmega64m1", ARCH_AVR5, "__AVR_ATmega64M1__", 0, 0, 0x0100, 1, "m64m1") +AVR_MCU ("atmega64hve", ARCH_AVR5, "__AVR_ATmega64HVE__", 0, 0, 0x0100, 1, "m64hve") +AVR_MCU ("atmega64rfa2", ARCH_AVR5, "__AVR_ATmega64RFA2__", 0, 0, 0x0200, 1, "m64rfa2") +AVR_MCU ("atmega64rfr2", ARCH_AVR5, "__AVR_ATmega64RFR2__", 0, 0, 0x0200, 1, "m64rfr2") +AVR_MCU ("atmega32hvbrevb", ARCH_AVR5, "__AVR_ATmega32HVBREVB__", 0, 0, 0x0100, 1, "m32hvbrevb") +AVR_MCU ("atmega48hvf", ARCH_AVR5, "__AVR_ATmega48HVF__", 0, 0, 0x0100, 1, "m48hvf") +AVR_MCU ("at90can32", ARCH_AVR5, "__AVR_AT90CAN32__", 0, 0, 0x0100, 1, "can32") +AVR_MCU ("at90can64", ARCH_AVR5, "__AVR_AT90CAN64__", 0, 0, 0x0100, 1, "can64") +AVR_MCU ("at90pwm161", ARCH_AVR5, "__AVR_AT90PWM161__", 0, 0, 0x0100, 1, "90pwm161") +AVR_MCU ("at90pwm216", ARCH_AVR5, "__AVR_AT90PWM216__", 0, 0, 0x0100, 1, "90pwm216") +AVR_MCU ("at90pwm316", ARCH_AVR5, "__AVR_AT90PWM316__", 0, 0, 0x0100, 1, "90pwm316") +AVR_MCU ("at90scr100", ARCH_AVR5, "__AVR_AT90SCR100__", 0, 0, 0x0100, 1, "90scr100") +AVR_MCU ("at90usb646", ARCH_AVR5, "__AVR_AT90USB646__", 0, 0, 0x0100, 1, "usb646") +AVR_MCU ("at90usb647", ARCH_AVR5, "__AVR_AT90USB647__", 0, 0, 0x0100, 1, "usb647") +AVR_MCU ("at94k", ARCH_AVR5, "__AVR_AT94K__", 0, 0, 0x0060, 1, "at94k") +AVR_MCU ("m3000", ARCH_AVR5, "__AVR_M3000__", 0, 0, 0x1000, 1, "m3000") +/* Enhanced, == 128K. */ +AVR_MCU ("avr51", ARCH_AVR51, NULL, 0, 0, 0x0100, 2, "m128") +AVR_MCU ("atmega128", ARCH_AVR51, "__AVR_ATmega128__", 0, 0, 0x0100, 2, "m128") +AVR_MCU ("atmega128a", ARCH_AVR51, "__AVR_ATmega128A__", 0, 0, 0x0100, 2, "m128a") +AVR_MCU ("atmega1280", ARCH_AVR51, "__AVR_ATmega1280__", 0, 0, 0x0200, 2, "m1280") +AVR_MCU ("atmega1281", ARCH_AVR51, "__AVR_ATmega1281__", 0, 0, 0x0200, 2, "m1281") +AVR_MCU ("atmega1284", ARCH_AVR51, "__AVR_ATmega1284__", 0, 0, 0x0100, 2, "m1284") +AVR_MCU ("atmega1284p", ARCH_AVR51, "__AVR_ATmega1284P__", 0, 0, 0x0100, 2, "m1284p") +AVR_MCU ("atmega128rfa1", ARCH_AVR51, "__AVR_ATmega128RFA1__", 0, 0, 0x0200, 2, "m128rfa1") +AVR_MCU ("at90can128", ARCH_AVR51, "__AVR_AT90CAN128__", 0, 0, 0x0100, 2, "can128") +AVR_MCU ("at90usb1286", ARCH_AVR51, "__AVR_AT90USB1286__", 0, 0, 0x0100, 2, "usb1286") +AVR_MCU ("at90usb1287", ARCH_AVR51, "__AVR_AT90USB1287__", 0, 0, 0x0100, 2, "usb1287") +/* 3-Byte PC. */ +AVR_MCU ("avr6", ARCH_AVR6, NULL, 0, 0, 0x0200, 4, "m2561") +AVR_MCU ("atmega2560", ARCH_AVR6, "__AVR_ATmega2560__", 0, 0, 0x0200, 4, "m2560") +AVR_MCU ("atmega2561", ARCH_AVR6, "__AVR_ATmega2561__", 0, 0, 0x0200, 4, "m2561") +/* Xmega, 16K <= Flash < 64K, RAM <= 64K */ +AVR_MCU ("avrxmega2", ARCH_AVRXMEGA2, NULL, 0, 0, 0x2000, 1, "x32a4") +AVR_MCU ("atxmega16a4", ARCH_AVRXMEGA2, "__AVR_ATxmega16A4__", 0, 0, 0x2000, 1, "x16a4") +AVR_MCU ("atxmega16d4", ARCH_AVRXMEGA2, "__AVR_ATxmega16D4__", 0, 0, 0x2000, 1, "x16d4") +AVR_MCU ("atxmega32a4", ARCH_AVRXMEGA2, "__AVR_ATxmega32A4__", 0, 0, 0x2000, 1, "x32a4") +AVR_MCU ("atxmega32d4", ARCH_AVRXMEGA2, "__AVR_ATxmega32D4__", 0, 0, 0x2000, 1, "x32d4") +AVR_MCU ("atxmega32x1", ARCH_AVRXMEGA2, "__AVR_ATxmega32X1__", 0, 0, 0x2000, 1, "x32x1") +AVR_MCU ("atmxt112sl", ARCH_AVRXMEGA2, "__AVR_ATMXT112SL__", 0, 0, 0x2000, 1, "mxt112sl") +AVR_MCU ("atmxt224", ARCH_AVRXMEGA2, "__AVR_ATMXT224__", 0, 0, 0x2000, 1, "mxt224") +AVR_MCU ("atmxt224e", ARCH_AVRXMEGA2, "__AVR_ATMXT224E__", 0, 0, 0x2000, 1, "mxt224e") +AVR_MCU ("atmxt336s", ARCH_AVRXMEGA2, "__AVR_ATMXT336S__", 0, 0, 0x2000, 1, "mxt336s") +AVR_MCU ("atxmega16a4u", ARCH_AVRXMEGA2, "__AVR_ATxmega16A4U__", 0, 0, 0x2000, 1, "x16a4u") +AVR_MCU ("atxmega16c4", ARCH_AVRXMEGA2, "__AVR_ATxmega16C4__", 0, 0, 0x2000, 1, "x16c4") +AVR_MCU ("atxmega32a4u", ARCH_AVRXMEGA2, "__AVR_ATxmega32A4U__", 0, 0, 0x2000, 1, "x32a4u") +AVR_MCU ("atxmega32c4", ARCH_AVRXMEGA2, "__AVR_ATxmega32C4__", 0, 0, 0x2000, 1, "x32c4") +AVR_MCU ("atxmega32e5", ARCH_AVRXMEGA2, "__AVR_ATxmega32E5__", 0, 0, 0x2000, 1, "x32e5") +/* Xmega, 64K < Flash <= 128K, RAM <= 64K */ +AVR_MCU ("avrxmega4", ARCH_AVRXMEGA4, NULL, 0, 0, 0x2000, 2, "x64a4") +AVR_MCU ("atxmega64a3", ARCH_AVRXMEGA4, "__AVR_ATxmega64A3__", 0, 0, 0x2000, 2, "x64a3") +AVR_MCU ("atxmega64d3", ARCH_AVRXMEGA4, "__AVR_ATxmega64D3__", 0, 0, 0x2000, 2, "x64d3") +AVR_MCU ("atxmega64a3u", ARCH_AVRXMEGA4, "__AVR_ATxmega64A3U__", 0, 0, 0x2000, 2, "x64a3u") +AVR_MCU ("atxmega64a4u", ARCH_AVRXMEGA4, "__AVR_ATxmega64A4U__", 0, 0, 0x2000, 2, "x64a4u") +AVR_MCU ("atxmega64b1", ARCH_AVRXMEGA4, "__AVR_ATxmega64B1__", 0, 0, 0x2000, 2, "x64b1") +AVR_MCU ("atxmega64b3", ARCH_AVRXMEGA4, "__AVR_ATxmega64B3__", 0, 0, 0x2000, 2, "x64b3") +AVR_MCU ("atxmega64c3", ARCH_AVRXMEGA4, "__AVR_ATxmega64C3__", 0, 0, 0x2000, 2, "x64c3") +AVR_MCU ("atxmega64d4", ARCH_AVRXMEGA4, "__AVR_ATxmega64D4__", 0, 0, 0x2000, 2, "x64d4") +/* Xmega, 64K < Flash <= 128K, RAM > 64K */ +AVR_MCU ("avrxmega5", ARCH_AVRXMEGA5, NULL, 0, 0, 0x2000, 2, "x64a1") +AVR_MCU ("atxmega64a1", ARCH_AVRXMEGA5, "__AVR_ATxmega64A1__", 0, 0, 0x2000, 2, "x64a1") +AVR_MCU ("atxmega64a1u", ARCH_AVRXMEGA5, "__AVR_ATxmega64A1U__", 0, 0, 0x2000, 2, "x64a1u") +/* Xmega, 128K < Flash, RAM <= 64K */ +AVR_MCU ("avrxmega6", ARCH_AVRXMEGA6, NULL, 0, 0, 0x2000, 6, "x128a3") +AVR_MCU ("atxmega128a3", ARCH_AVRXMEGA6, "__AVR_ATxmega128A3__", 0, 0, 0x2000, 3, "x128a3") +AVR_MCU ("atxmega128d3", ARCH_AVRXMEGA6, "__AVR_ATxmega128D3__", 0, 0, 0x2000, 3, "x128d3") +AVR_MCU ("atxmega192a3", ARCH_AVRXMEGA6, "__AVR_ATxmega192A3__", 0, 0, 0x2000, 4, "x192a3") +AVR_MCU ("atxmega192d3", ARCH_AVRXMEGA6, "__AVR_ATxmega192D3__", 0, 0, 0x2000, 4, "x192d3") +AVR_MCU ("atxmega256a3", ARCH_AVRXMEGA6, "__AVR_ATxmega256A3__", 0, 0, 0x2000, 5, "x256a3") +AVR_MCU ("atxmega256a3b", ARCH_AVRXMEGA6, "__AVR_ATxmega256A3B__", 0, 0, 0x2000, 5, "x256a3b") +AVR_MCU ("atxmega256a3bu", ARCH_AVRXMEGA6, "__AVR_ATxmega256A3BU__", 0, 0, 0x2000, 5, "x256a3bu") +AVR_MCU ("atxmega256d3", ARCH_AVRXMEGA6, "__AVR_ATxmega256D3__", 0, 0, 0x2000, 5, "x256d3") +AVR_MCU ("atxmega128a3u", ARCH_AVRXMEGA6, "__AVR_ATxmega128A3U__", 0, 0, 0x2000, 3, "x128a3u") +AVR_MCU ("atxmega128b1", ARCH_AVRXMEGA6, "__AVR_ATxmega128B1__", 0, 0, 0x2000, 3, "x128b1") +AVR_MCU ("atxmega128b3", ARCH_AVRXMEGA6, "__AVR_ATxmega128B3__", 0, 0, 0x2000, 3, "x128b3") +AVR_MCU ("atxmega128c3", ARCH_AVRXMEGA6, "__AVR_ATxmega128C3__", 0, 0, 0x2000, 3, "x128c3") +AVR_MCU ("atxmega128d4", ARCH_AVRXMEGA6, "__AVR_ATxmega128D4__", 0, 0, 0x2000, 3, "x128d4") +AVR_MCU ("atmxt540s", ARCH_AVRXMEGA6, "__AVR_ATMXT540S__", 0, 0, 0x2000, 2, "mxt540s") +AVR_MCU ("atmxt540sreva", ARCH_AVRXMEGA6, "__AVR_ATMXT540SREVA__", 0, 0, 0x2000, 2, "mxt540sreva") +AVR_MCU ("atxmega192a3u", ARCH_AVRXMEGA6, "__AVR_ATxmega192A3U__", 0, 0, 0x2000, 4, "x192a3u") +AVR_MCU ("atxmega192c3", ARCH_AVRXMEGA6, "__AVR_ATxmega192C3__", 0, 0, 0x2000, 4, "x192c3") +AVR_MCU ("atxmega256a3u", ARCH_AVRXMEGA6, "__AVR_ATxmega256A3U__", 0, 0, 0x2000, 5, "x256a3u") +AVR_MCU ("atxmega256c3", ARCH_AVRXMEGA6, "__AVR_ATxmega256C3__", 0, 0, 0x2000, 5, "x256c3") +AVR_MCU ("atxmega384c3", ARCH_AVRXMEGA6, "__AVR_ATxmega384C3__", 0, 0, 0x2000, 6, "x384c3") +AVR_MCU ("atxmega384d3", ARCH_AVRXMEGA6, "__AVR_ATxmega384D3__", 0, 0, 0x2000, 6, "x384d3") +/* Xmega, 128K < Flash, RAM > 64K RAM. */ +AVR_MCU ("avrxmega7", ARCH_AVRXMEGA7, NULL, 0, 0, 0x2000, 3, "x128a1") +AVR_MCU ("atxmega128a1", ARCH_AVRXMEGA7, "__AVR_ATxmega128A1__", 0, 0, 0x2000, 3, "x128a1") +AVR_MCU ("atxmega128a1u", ARCH_AVRXMEGA7, "__AVR_ATxmega128A1U__", 0, 0, 0x2000, 3, "x128a1u") +AVR_MCU ("atxmega128a4u", ARCH_AVRXMEGA7, "__AVR_ATxmega128A4U__", 0, 0, 0x2000, 3, "x128a4u") +/* Assembler only. */ +AVR_MCU ("avr1", ARCH_AVR1, NULL, 0, 0, 0x0060, 1, "s1200") +AVR_MCU ("at90s1200", ARCH_AVR1, "__AVR_AT90S1200__", 0, 0, 0x0060, 1, "s1200") +AVR_MCU ("attiny11", ARCH_AVR1, "__AVR_ATtiny11__", 0, 0, 0x0060, 1, "tn11") +AVR_MCU ("attiny12", ARCH_AVR1, "__AVR_ATtiny12__", 0, 0, 0x0060, 1, "tn12") +AVR_MCU ("attiny15", ARCH_AVR1, "__AVR_ATtiny15__", 0, 0, 0x0060, 1, "tn15") +AVR_MCU ("attiny28", ARCH_AVR1, "__AVR_ATtiny28__", 0, 0, 0x0060, 1, "tn28") diff --git a/gcc-4.9/gcc/config/avr/avr-modes.def b/gcc-4.9/gcc/config/avr/avr-modes.def new file mode 100644 index 000000000..7d380b068 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-modes.def @@ -0,0 +1,33 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +FRACTIONAL_INT_MODE (PSI, 24, 3); + +/* Make TA and UTA 64 bits wide. + 128 bit wide modes would be insane on a 8-bit machine. + This needs special treatment in avr.c and avr-lib.h. */ + +ADJUST_BYTESIZE (TA, 8); +ADJUST_ALIGNMENT (TA, 1); +ADJUST_IBIT (TA, 16); +ADJUST_FBIT (TA, 47); + +ADJUST_BYTESIZE (UTA, 8); +ADJUST_ALIGNMENT (UTA, 1); +ADJUST_IBIT (UTA, 16); +ADJUST_FBIT (UTA, 48); diff --git a/gcc-4.9/gcc/config/avr/avr-protos.h b/gcc-4.9/gcc/config/avr/avr-protos.h new file mode 100644 index 000000000..c5ce78429 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-protos.h @@ -0,0 +1,164 @@ +/* Prototypes for exported functions defined in avr.c + + Copyright (C) 2000-2014 Free Software Foundation, Inc. + Contributed by Denis Chertykov (chertykov@gmail.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + +extern int avr_function_arg_regno_p (int r); +extern void avr_cpu_cpp_builtins (struct cpp_reader * pfile); +extern enum reg_class avr_regno_reg_class (int r); +extern void asm_globalize_label (FILE *file, const char *name); +extern void avr_adjust_reg_alloc_order (void); +extern int avr_initial_elimination_offset (int from, int to); +extern int avr_simple_epilogue (void); +extern int avr_hard_regno_rename_ok (unsigned int, unsigned int); +extern rtx avr_return_addr_rtx (int count, rtx tem); +extern void avr_register_target_pragmas (void); +extern void avr_init_expanders (void); + +#ifdef TREE_CODE +extern void avr_asm_output_aligned_decl_common (FILE*, const_tree, const char*, unsigned HOST_WIDE_INT, unsigned int, bool); +extern void asm_output_external (FILE *file, tree decl, char *name); +extern int avr_progmem_p (tree decl, tree attributes); + +#ifdef RTX_CODE /* inside TREE_CODE */ +extern void avr_init_cumulative_args (CUMULATIVE_ARGS*, tree, rtx, tree); +#endif /* RTX_CODE inside TREE_CODE */ + +#endif /* TREE_CODE */ + +#ifdef RTX_CODE +extern int avr_hard_regno_call_part_clobbered (unsigned, enum machine_mode); +extern const char *output_movqi (rtx insn, rtx operands[], int *l); +extern const char *output_movhi (rtx insn, rtx operands[], int *l); +extern const char *output_movsisf (rtx insn, rtx operands[], int *l); +extern const char *avr_out_tstsi (rtx, rtx*, int*); +extern const char *avr_out_tsthi (rtx, rtx*, int*); +extern const char *avr_out_tstpsi (rtx, rtx*, int*); +extern const char *avr_out_compare (rtx, rtx*, int*); +extern const char *avr_out_compare64 (rtx, rtx*, int*); +extern const char *ret_cond_branch (rtx x, int len, int reverse); +extern const char *avr_out_movpsi (rtx, rtx*, int*); + +extern const char *ashlqi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashlhi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashlsi3_out (rtx insn, rtx operands[], int *len); + +extern const char *ashrqi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashrhi3_out (rtx insn, rtx operands[], int *len); +extern const char *ashrsi3_out (rtx insn, rtx operands[], int *len); + +extern const char *lshrqi3_out (rtx insn, rtx operands[], int *len); +extern const char *lshrhi3_out (rtx insn, rtx operands[], int *len); +extern const char *lshrsi3_out (rtx insn, rtx operands[], int *len); + +extern const char *avr_out_ashlpsi3 (rtx, rtx*, int*); +extern const char *avr_out_ashrpsi3 (rtx, rtx*, int*); +extern const char *avr_out_lshrpsi3 (rtx, rtx*, int*); + +extern bool avr_rotate_bytes (rtx operands[]); + +extern const char* avr_out_fract (rtx, rtx[], bool, int*); +extern rtx avr_to_int_mode (rtx); + +extern void avr_expand_prologue (void); +extern void avr_expand_epilogue (bool); +extern bool avr_emit_movmemhi (rtx*); +extern int avr_epilogue_uses (int regno); +extern int avr_starting_frame_offset (void); + +extern void avr_output_addr_vec_elt (FILE *stream, int value); +extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]); +extern const char* avr_out_bitop (rtx, rtx*, int*); +extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true); +extern const char* avr_out_round (rtx, rtx*, int* =NULL); +extern const char* avr_out_addto_sp (rtx*, int*); +extern const char* avr_out_xload (rtx, rtx*, int*); +extern const char* avr_out_movmem (rtx, rtx*, int*); +extern const char* avr_out_insert_bits (rtx*, int*); +extern bool avr_popcount_each_byte (rtx, int, int); +extern bool avr_has_nibble_0xf (rtx); + +extern int extra_constraint_Q (rtx x); +extern int avr_adjust_insn_length (rtx insn, int len); +extern const char* output_reload_inhi (rtx*, rtx, int*); +extern const char* output_reload_insisf (rtx*, rtx, int*); +extern const char* avr_out_reload_inpsi (rtx*, rtx, int*); +extern const char* avr_out_lpm (rtx, rtx*, int*); +extern void avr_notice_update_cc (rtx body, rtx insn); +extern int reg_unused_after (rtx insn, rtx reg); +extern int _reg_unused_after (rtx insn, rtx reg); +extern int avr_jump_mode (rtx x, rtx insn); +extern int test_hard_reg_class (enum reg_class rclass, rtx x); +extern int jump_over_one_insn_p (rtx insn, rtx dest); + +extern int avr_hard_regno_mode_ok (int regno, enum machine_mode mode); +extern void avr_final_prescan_insn (rtx insn, rtx *operand, int num_operands); +extern int avr_simplify_comparison_p (enum machine_mode mode, + RTX_CODE op, rtx x); +extern RTX_CODE avr_normalize_condition (RTX_CODE condition); +extern void out_shift_with_cnt (const char *templ, rtx insn, + rtx operands[], int *len, int t_len); +extern enum reg_class avr_mode_code_base_reg_class (enum machine_mode, addr_space_t, RTX_CODE, RTX_CODE); +extern bool avr_regno_mode_code_ok_for_base_p (int, enum machine_mode, addr_space_t, RTX_CODE, RTX_CODE); +extern rtx avr_incoming_return_addr_rtx (void); +extern rtx avr_legitimize_reload_address (rtx*, enum machine_mode, int, int, int, int, rtx (*)(rtx,int)); +extern bool avr_mem_flash_p (rtx); +extern bool avr_mem_memx_p (rtx); +extern bool avr_load_libgcc_p (rtx); +extern bool avr_xload_libgcc_p (enum machine_mode); + +extern rtx lpm_reg_rtx; +extern rtx lpm_addr_reg_rtx; +extern rtx tmp_reg_rtx; +extern rtx zero_reg_rtx; +extern rtx all_regs_rtx[32]; +extern rtx rampz_rtx; + +#endif /* RTX_CODE */ + +#ifdef REAL_VALUE_TYPE +extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n); +#endif + +extern bool avr_have_dimode; + +/* From avr-log.c */ + +#define avr_edump (avr_log_set_caller_e (__FUNCTION__)) +#define avr_fdump (avr_log_set_caller_f (__FUNCTION__)) + +extern int (*avr_log_set_caller_e (const char*))(const char*, ...); +extern int (*avr_log_set_caller_f (const char*))(FILE*, const char*, ...); + +extern void avr_log_set_avr_log (void); + +typedef struct +{ + unsigned address_cost :1; + unsigned builtin :1; + unsigned constraints :1; + unsigned legitimate_address_p :1; + unsigned legitimize_address :1; + unsigned legitimize_reload_address :1; + unsigned progmem :1; + unsigned rtx_costs :1; +} avr_log_t; + +extern avr_log_t avr_log; diff --git a/gcc-4.9/gcc/config/avr/avr-stdint.h b/gcc-4.9/gcc/config/avr/avr-stdint.h new file mode 100644 index 000000000..3ecc26895 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-stdint.h @@ -0,0 +1,66 @@ +/* Definitions for types on systems using newlib. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* + The intention of this file is to supply definitions that work with + avr-gcc's -mint8 that sets int to an 8-bit type. + + This file is intended to yield the same results as newlib-stdint.h, + but there are some differences to newlib-stdint.h: + + - AVR is an 8-bit architecture that cannot access 16-bit values + atomically, this SIG_ATOMIC_TYPE is "char". + + - For the same reason, [u]int_fast8_t is defined as 8-bit type. + +*/ + +#define SIG_ATOMIC_TYPE "char" + +#define INT8_TYPE "signed char" +#define INT16_TYPE (INT_TYPE_SIZE == 16 ? "int" : "long int") +#define INT32_TYPE (INT_TYPE_SIZE == 16 ? "long int" : "long long int") +#define INT64_TYPE (INT_TYPE_SIZE == 16 ? "long long int" : 0) +#define UINT8_TYPE "unsigned char" +#define UINT16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : "long unsigned int") +#define UINT32_TYPE (INT_TYPE_SIZE == 16 ? "long unsigned int" : "long long unsigned int") +#define UINT64_TYPE (INT_TYPE_SIZE == 16 ? "long long unsigned int" : 0) + +#define INT_LEAST8_TYPE INT8_TYPE +#define INT_LEAST16_TYPE INT16_TYPE +#define INT_LEAST32_TYPE INT32_TYPE +#define INT_LEAST64_TYPE INT64_TYPE +#define UINT_LEAST8_TYPE UINT8_TYPE +#define UINT_LEAST16_TYPE UINT16_TYPE +#define UINT_LEAST32_TYPE UINT32_TYPE +#define UINT_LEAST64_TYPE UINT64_TYPE + +#define INT_FAST8_TYPE INT8_TYPE +#define INT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "int" : INT16_TYPE) +#define INT_FAST32_TYPE INT32_TYPE +#define INT_FAST64_TYPE INT64_TYPE +#define UINT_FAST8_TYPE UINT8_TYPE +#define UINT_FAST16_TYPE (INT_TYPE_SIZE == 16 ? "unsigned int" : UINT16_TYPE) +#define UINT_FAST32_TYPE UINT32_TYPE +#define UINT_FAST64_TYPE UINT64_TYPE + +#define INTPTR_TYPE PTRDIFF_TYPE +#ifndef UINTPTR_TYPE +#define UINTPTR_TYPE SIZE_TYPE +#endif diff --git a/gcc-4.9/gcc/config/avr/avr-tables.opt b/gcc-4.9/gcc/config/avr/avr-tables.opt new file mode 100644 index 000000000..b5c6d8290 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr-tables.opt @@ -0,0 +1,766 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from avr-mcus.def. + +; Copyright (C) 2011-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +Enum +Name(avr_mcu) Type(int) +Known MCU names: + +EnumValue +Enum(avr_mcu) String(avr2) Value(0) + +EnumValue +Enum(avr_mcu) String(at90s2313) Value(1) + +EnumValue +Enum(avr_mcu) String(at90s2323) Value(2) + +EnumValue +Enum(avr_mcu) String(at90s2333) Value(3) + +EnumValue +Enum(avr_mcu) String(at90s2343) Value(4) + +EnumValue +Enum(avr_mcu) String(attiny22) Value(5) + +EnumValue +Enum(avr_mcu) String(attiny26) Value(6) + +EnumValue +Enum(avr_mcu) String(at90s4414) Value(7) + +EnumValue +Enum(avr_mcu) String(at90s4433) Value(8) + +EnumValue +Enum(avr_mcu) String(at90s4434) Value(9) + +EnumValue +Enum(avr_mcu) String(at90s8515) Value(10) + +EnumValue +Enum(avr_mcu) String(at90c8534) Value(11) + +EnumValue +Enum(avr_mcu) String(at90s8535) Value(12) + +EnumValue +Enum(avr_mcu) String(avr25) Value(13) + +EnumValue +Enum(avr_mcu) String(ata6289) Value(14) + +EnumValue +Enum(avr_mcu) String(ata5272) Value(15) + +EnumValue +Enum(avr_mcu) String(attiny13) Value(16) + +EnumValue +Enum(avr_mcu) String(attiny13a) Value(17) + +EnumValue +Enum(avr_mcu) String(attiny2313) Value(18) + +EnumValue +Enum(avr_mcu) String(attiny2313a) Value(19) + +EnumValue +Enum(avr_mcu) String(attiny24) Value(20) + +EnumValue +Enum(avr_mcu) String(attiny24a) Value(21) + +EnumValue +Enum(avr_mcu) String(attiny4313) Value(22) + +EnumValue +Enum(avr_mcu) String(attiny44) Value(23) + +EnumValue +Enum(avr_mcu) String(attiny44a) Value(24) + +EnumValue +Enum(avr_mcu) String(attiny84) Value(25) + +EnumValue +Enum(avr_mcu) String(attiny84a) Value(26) + +EnumValue +Enum(avr_mcu) String(attiny25) Value(27) + +EnumValue +Enum(avr_mcu) String(attiny45) Value(28) + +EnumValue +Enum(avr_mcu) String(attiny85) Value(29) + +EnumValue +Enum(avr_mcu) String(attiny261) Value(30) + +EnumValue +Enum(avr_mcu) String(attiny261a) Value(31) + +EnumValue +Enum(avr_mcu) String(attiny461) Value(32) + +EnumValue +Enum(avr_mcu) String(attiny461a) Value(33) + +EnumValue +Enum(avr_mcu) String(attiny861) Value(34) + +EnumValue +Enum(avr_mcu) String(attiny861a) Value(35) + +EnumValue +Enum(avr_mcu) String(attiny43u) Value(36) + +EnumValue +Enum(avr_mcu) String(attiny87) Value(37) + +EnumValue +Enum(avr_mcu) String(attiny48) Value(38) + +EnumValue +Enum(avr_mcu) String(attiny88) Value(39) + +EnumValue +Enum(avr_mcu) String(at86rf401) Value(40) + +EnumValue +Enum(avr_mcu) String(avr3) Value(41) + +EnumValue +Enum(avr_mcu) String(at43usb355) Value(42) + +EnumValue +Enum(avr_mcu) String(at76c711) Value(43) + +EnumValue +Enum(avr_mcu) String(avr31) Value(44) + +EnumValue +Enum(avr_mcu) String(atmega103) Value(45) + +EnumValue +Enum(avr_mcu) String(at43usb320) Value(46) + +EnumValue +Enum(avr_mcu) String(avr35) Value(47) + +EnumValue +Enum(avr_mcu) String(ata5505) Value(48) + +EnumValue +Enum(avr_mcu) String(at90usb82) Value(49) + +EnumValue +Enum(avr_mcu) String(at90usb162) Value(50) + +EnumValue +Enum(avr_mcu) String(atmega8u2) Value(51) + +EnumValue +Enum(avr_mcu) String(atmega16u2) Value(52) + +EnumValue +Enum(avr_mcu) String(atmega32u2) Value(53) + +EnumValue +Enum(avr_mcu) String(attiny167) Value(54) + +EnumValue +Enum(avr_mcu) String(attiny1634) Value(55) + +EnumValue +Enum(avr_mcu) String(avr4) Value(56) + +EnumValue +Enum(avr_mcu) String(ata6285) Value(57) + +EnumValue +Enum(avr_mcu) String(ata6286) Value(58) + +EnumValue +Enum(avr_mcu) String(atmega8) Value(59) + +EnumValue +Enum(avr_mcu) String(atmega8a) Value(60) + +EnumValue +Enum(avr_mcu) String(atmega48) Value(61) + +EnumValue +Enum(avr_mcu) String(atmega48a) Value(62) + +EnumValue +Enum(avr_mcu) String(atmega48p) Value(63) + +EnumValue +Enum(avr_mcu) String(atmega48pa) Value(64) + +EnumValue +Enum(avr_mcu) String(atmega88) Value(65) + +EnumValue +Enum(avr_mcu) String(atmega88a) Value(66) + +EnumValue +Enum(avr_mcu) String(atmega88p) Value(67) + +EnumValue +Enum(avr_mcu) String(atmega88pa) Value(68) + +EnumValue +Enum(avr_mcu) String(atmega8515) Value(69) + +EnumValue +Enum(avr_mcu) String(atmega8535) Value(70) + +EnumValue +Enum(avr_mcu) String(atmega8hva) Value(71) + +EnumValue +Enum(avr_mcu) String(at90pwm1) Value(72) + +EnumValue +Enum(avr_mcu) String(at90pwm2) Value(73) + +EnumValue +Enum(avr_mcu) String(at90pwm2b) Value(74) + +EnumValue +Enum(avr_mcu) String(at90pwm3) Value(75) + +EnumValue +Enum(avr_mcu) String(at90pwm3b) Value(76) + +EnumValue +Enum(avr_mcu) String(at90pwm81) Value(77) + +EnumValue +Enum(avr_mcu) String(avr5) Value(78) + +EnumValue +Enum(avr_mcu) String(ata5790) Value(79) + +EnumValue +Enum(avr_mcu) String(ata5790n) Value(80) + +EnumValue +Enum(avr_mcu) String(ata5795) Value(81) + +EnumValue +Enum(avr_mcu) String(atmega16) Value(82) + +EnumValue +Enum(avr_mcu) String(atmega16a) Value(83) + +EnumValue +Enum(avr_mcu) String(atmega161) Value(84) + +EnumValue +Enum(avr_mcu) String(atmega162) Value(85) + +EnumValue +Enum(avr_mcu) String(atmega163) Value(86) + +EnumValue +Enum(avr_mcu) String(atmega164a) Value(87) + +EnumValue +Enum(avr_mcu) String(atmega164p) Value(88) + +EnumValue +Enum(avr_mcu) String(atmega164pa) Value(89) + +EnumValue +Enum(avr_mcu) String(atmega165) Value(90) + +EnumValue +Enum(avr_mcu) String(atmega165a) Value(91) + +EnumValue +Enum(avr_mcu) String(atmega165p) Value(92) + +EnumValue +Enum(avr_mcu) String(atmega165pa) Value(93) + +EnumValue +Enum(avr_mcu) String(atmega168) Value(94) + +EnumValue +Enum(avr_mcu) String(atmega168a) Value(95) + +EnumValue +Enum(avr_mcu) String(atmega168p) Value(96) + +EnumValue +Enum(avr_mcu) String(atmega168pa) Value(97) + +EnumValue +Enum(avr_mcu) String(atmega169) Value(98) + +EnumValue +Enum(avr_mcu) String(atmega169a) Value(99) + +EnumValue +Enum(avr_mcu) String(atmega169p) Value(100) + +EnumValue +Enum(avr_mcu) String(atmega169pa) Value(101) + +EnumValue +Enum(avr_mcu) String(atmega16hvb) Value(102) + +EnumValue +Enum(avr_mcu) String(atmega16hvbrevb) Value(103) + +EnumValue +Enum(avr_mcu) String(atmega16m1) Value(104) + +EnumValue +Enum(avr_mcu) String(atmega16u4) Value(105) + +EnumValue +Enum(avr_mcu) String(atmega26hvg) Value(106) + +EnumValue +Enum(avr_mcu) String(atmega32a) Value(107) + +EnumValue +Enum(avr_mcu) String(atmega32) Value(108) + +EnumValue +Enum(avr_mcu) String(atmega323) Value(109) + +EnumValue +Enum(avr_mcu) String(atmega324a) Value(110) + +EnumValue +Enum(avr_mcu) String(atmega324p) Value(111) + +EnumValue +Enum(avr_mcu) String(atmega324pa) Value(112) + +EnumValue +Enum(avr_mcu) String(atmega325) Value(113) + +EnumValue +Enum(avr_mcu) String(atmega325a) Value(114) + +EnumValue +Enum(avr_mcu) String(atmega325p) Value(115) + +EnumValue +Enum(avr_mcu) String(atmega3250) Value(116) + +EnumValue +Enum(avr_mcu) String(atmega3250a) Value(117) + +EnumValue +Enum(avr_mcu) String(atmega3250p) Value(118) + +EnumValue +Enum(avr_mcu) String(atmega3250pa) Value(119) + +EnumValue +Enum(avr_mcu) String(atmega328) Value(120) + +EnumValue +Enum(avr_mcu) String(atmega328p) Value(121) + +EnumValue +Enum(avr_mcu) String(atmega329) Value(122) + +EnumValue +Enum(avr_mcu) String(atmega329a) Value(123) + +EnumValue +Enum(avr_mcu) String(atmega329p) Value(124) + +EnumValue +Enum(avr_mcu) String(atmega329pa) Value(125) + +EnumValue +Enum(avr_mcu) String(atmega3290) Value(126) + +EnumValue +Enum(avr_mcu) String(atmega3290a) Value(127) + +EnumValue +Enum(avr_mcu) String(atmega3290p) Value(128) + +EnumValue +Enum(avr_mcu) String(atmega3290pa) Value(129) + +EnumValue +Enum(avr_mcu) String(atmega32c1) Value(130) + +EnumValue +Enum(avr_mcu) String(atmega32m1) Value(131) + +EnumValue +Enum(avr_mcu) String(atmega32u4) Value(132) + +EnumValue +Enum(avr_mcu) String(atmega32u6) Value(133) + +EnumValue +Enum(avr_mcu) String(atmega406) Value(134) + +EnumValue +Enum(avr_mcu) String(atmega64) Value(135) + +EnumValue +Enum(avr_mcu) String(atmega64a) Value(136) + +EnumValue +Enum(avr_mcu) String(atmega640) Value(137) + +EnumValue +Enum(avr_mcu) String(atmega644) Value(138) + +EnumValue +Enum(avr_mcu) String(atmega644a) Value(139) + +EnumValue +Enum(avr_mcu) String(atmega644p) Value(140) + +EnumValue +Enum(avr_mcu) String(atmega644pa) Value(141) + +EnumValue +Enum(avr_mcu) String(atmega645) Value(142) + +EnumValue +Enum(avr_mcu) String(atmega645a) Value(143) + +EnumValue +Enum(avr_mcu) String(atmega645p) Value(144) + +EnumValue +Enum(avr_mcu) String(atmega6450) Value(145) + +EnumValue +Enum(avr_mcu) String(atmega6450a) Value(146) + +EnumValue +Enum(avr_mcu) String(atmega6450p) Value(147) + +EnumValue +Enum(avr_mcu) String(atmega649) Value(148) + +EnumValue +Enum(avr_mcu) String(atmega649a) Value(149) + +EnumValue +Enum(avr_mcu) String(atmega649p) Value(150) + +EnumValue +Enum(avr_mcu) String(atmega6490) Value(151) + +EnumValue +Enum(avr_mcu) String(atmega16hva) Value(152) + +EnumValue +Enum(avr_mcu) String(atmega16hva2) Value(153) + +EnumValue +Enum(avr_mcu) String(atmega32hvb) Value(154) + +EnumValue +Enum(avr_mcu) String(atmega6490a) Value(155) + +EnumValue +Enum(avr_mcu) String(atmega6490p) Value(156) + +EnumValue +Enum(avr_mcu) String(atmega64c1) Value(157) + +EnumValue +Enum(avr_mcu) String(atmega64m1) Value(158) + +EnumValue +Enum(avr_mcu) String(atmega64hve) Value(159) + +EnumValue +Enum(avr_mcu) String(atmega64rfa2) Value(160) + +EnumValue +Enum(avr_mcu) String(atmega64rfr2) Value(161) + +EnumValue +Enum(avr_mcu) String(atmega32hvbrevb) Value(162) + +EnumValue +Enum(avr_mcu) String(atmega48hvf) Value(163) + +EnumValue +Enum(avr_mcu) String(at90can32) Value(164) + +EnumValue +Enum(avr_mcu) String(at90can64) Value(165) + +EnumValue +Enum(avr_mcu) String(at90pwm161) Value(166) + +EnumValue +Enum(avr_mcu) String(at90pwm216) Value(167) + +EnumValue +Enum(avr_mcu) String(at90pwm316) Value(168) + +EnumValue +Enum(avr_mcu) String(at90scr100) Value(169) + +EnumValue +Enum(avr_mcu) String(at90usb646) Value(170) + +EnumValue +Enum(avr_mcu) String(at90usb647) Value(171) + +EnumValue +Enum(avr_mcu) String(at94k) Value(172) + +EnumValue +Enum(avr_mcu) String(m3000) Value(173) + +EnumValue +Enum(avr_mcu) String(avr51) Value(174) + +EnumValue +Enum(avr_mcu) String(atmega128) Value(175) + +EnumValue +Enum(avr_mcu) String(atmega128a) Value(176) + +EnumValue +Enum(avr_mcu) String(atmega1280) Value(177) + +EnumValue +Enum(avr_mcu) String(atmega1281) Value(178) + +EnumValue +Enum(avr_mcu) String(atmega1284) Value(179) + +EnumValue +Enum(avr_mcu) String(atmega1284p) Value(180) + +EnumValue +Enum(avr_mcu) String(atmega128rfa1) Value(181) + +EnumValue +Enum(avr_mcu) String(at90can128) Value(182) + +EnumValue +Enum(avr_mcu) String(at90usb1286) Value(183) + +EnumValue +Enum(avr_mcu) String(at90usb1287) Value(184) + +EnumValue +Enum(avr_mcu) String(avr6) Value(185) + +EnumValue +Enum(avr_mcu) String(atmega2560) Value(186) + +EnumValue +Enum(avr_mcu) String(atmega2561) Value(187) + +EnumValue +Enum(avr_mcu) String(avrxmega2) Value(188) + +EnumValue +Enum(avr_mcu) String(atxmega16a4) Value(189) + +EnumValue +Enum(avr_mcu) String(atxmega16d4) Value(190) + +EnumValue +Enum(avr_mcu) String(atxmega32a4) Value(191) + +EnumValue +Enum(avr_mcu) String(atxmega32d4) Value(192) + +EnumValue +Enum(avr_mcu) String(atxmega32x1) Value(193) + +EnumValue +Enum(avr_mcu) String(atmxt112sl) Value(194) + +EnumValue +Enum(avr_mcu) String(atmxt224) Value(195) + +EnumValue +Enum(avr_mcu) String(atmxt224e) Value(196) + +EnumValue +Enum(avr_mcu) String(atmxt336s) Value(197) + +EnumValue +Enum(avr_mcu) String(atxmega16a4u) Value(198) + +EnumValue +Enum(avr_mcu) String(atxmega16c4) Value(199) + +EnumValue +Enum(avr_mcu) String(atxmega32a4u) Value(200) + +EnumValue +Enum(avr_mcu) String(atxmega32c4) Value(201) + +EnumValue +Enum(avr_mcu) String(atxmega32e5) Value(202) + +EnumValue +Enum(avr_mcu) String(avrxmega4) Value(203) + +EnumValue +Enum(avr_mcu) String(atxmega64a3) Value(204) + +EnumValue +Enum(avr_mcu) String(atxmega64d3) Value(205) + +EnumValue +Enum(avr_mcu) String(atxmega64a3u) Value(206) + +EnumValue +Enum(avr_mcu) String(atxmega64a4u) Value(207) + +EnumValue +Enum(avr_mcu) String(atxmega64b1) Value(208) + +EnumValue +Enum(avr_mcu) String(atxmega64b3) Value(209) + +EnumValue +Enum(avr_mcu) String(atxmega64c3) Value(210) + +EnumValue +Enum(avr_mcu) String(atxmega64d4) Value(211) + +EnumValue +Enum(avr_mcu) String(avrxmega5) Value(212) + +EnumValue +Enum(avr_mcu) String(atxmega64a1) Value(213) + +EnumValue +Enum(avr_mcu) String(atxmega64a1u) Value(214) + +EnumValue +Enum(avr_mcu) String(avrxmega6) Value(215) + +EnumValue +Enum(avr_mcu) String(atxmega128a3) Value(216) + +EnumValue +Enum(avr_mcu) String(atxmega128d3) Value(217) + +EnumValue +Enum(avr_mcu) String(atxmega192a3) Value(218) + +EnumValue +Enum(avr_mcu) String(atxmega192d3) Value(219) + +EnumValue +Enum(avr_mcu) String(atxmega256a3) Value(220) + +EnumValue +Enum(avr_mcu) String(atxmega256a3b) Value(221) + +EnumValue +Enum(avr_mcu) String(atxmega256a3bu) Value(222) + +EnumValue +Enum(avr_mcu) String(atxmega256d3) Value(223) + +EnumValue +Enum(avr_mcu) String(atxmega128a3u) Value(224) + +EnumValue +Enum(avr_mcu) String(atxmega128b1) Value(225) + +EnumValue +Enum(avr_mcu) String(atxmega128b3) Value(226) + +EnumValue +Enum(avr_mcu) String(atxmega128c3) Value(227) + +EnumValue +Enum(avr_mcu) String(atxmega128d4) Value(228) + +EnumValue +Enum(avr_mcu) String(atmxt540s) Value(229) + +EnumValue +Enum(avr_mcu) String(atmxt540sreva) Value(230) + +EnumValue +Enum(avr_mcu) String(atxmega192a3u) Value(231) + +EnumValue +Enum(avr_mcu) String(atxmega192c3) Value(232) + +EnumValue +Enum(avr_mcu) String(atxmega256a3u) Value(233) + +EnumValue +Enum(avr_mcu) String(atxmega256c3) Value(234) + +EnumValue +Enum(avr_mcu) String(atxmega384c3) Value(235) + +EnumValue +Enum(avr_mcu) String(atxmega384d3) Value(236) + +EnumValue +Enum(avr_mcu) String(avrxmega7) Value(237) + +EnumValue +Enum(avr_mcu) String(atxmega128a1) Value(238) + +EnumValue +Enum(avr_mcu) String(atxmega128a1u) Value(239) + +EnumValue +Enum(avr_mcu) String(atxmega128a4u) Value(240) + +EnumValue +Enum(avr_mcu) String(avr1) Value(241) + +EnumValue +Enum(avr_mcu) String(at90s1200) Value(242) + +EnumValue +Enum(avr_mcu) String(attiny11) Value(243) + +EnumValue +Enum(avr_mcu) String(attiny12) Value(244) + +EnumValue +Enum(avr_mcu) String(attiny15) Value(245) + +EnumValue +Enum(avr_mcu) String(attiny28) Value(246) + diff --git a/gcc-4.9/gcc/config/avr/avr.c b/gcc-4.9/gcc/config/avr/avr.c new file mode 100644 index 000000000..8ca7de0b3 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr.c @@ -0,0 +1,12522 @@ +/* Subroutines for insn-output.c for ATMEL AVR micro controllers + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Denis Chertykov (chertykov@gmail.com) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "insn-codes.h" +#include "flags.h" +#include "reload.h" +#include "tree.h" +#include "print-tree.h" +#include "calls.h" +#include "stor-layout.h" +#include "stringpool.h" +#include "output.h" +#include "expr.h" +#include "c-family/c-common.h" +#include "diagnostic-core.h" +#include "obstack.h" +#include "function.h" +#include "recog.h" +#include "optabs.h" +#include "ggc.h" +#include "langhooks.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" +#include "params.h" +#include "df.h" + +/* Maximal allowed offset for an address in the LD command */ +#define MAX_LD_OFFSET(MODE) (64 - (signed)GET_MODE_SIZE (MODE)) + +/* Return true if STR starts with PREFIX and false, otherwise. */ +#define STR_PREFIX_P(STR,PREFIX) (0 == strncmp (STR, PREFIX, strlen (PREFIX))) + +/* The 4 bits starting at SECTION_MACH_DEP are reserved to store the + address space where data is to be located. + As the only non-generic address spaces are all located in flash, + this can be used to test if data shall go into some .progmem* section. + This must be the rightmost field of machine dependent section flags. */ +#define AVR_SECTION_PROGMEM (0xf * SECTION_MACH_DEP) + +/* Similar 4-bit region for SYMBOL_REF_FLAGS. */ +#define AVR_SYMBOL_FLAG_PROGMEM (0xf * SYMBOL_FLAG_MACH_DEP) + +/* Similar 4-bit region in SYMBOL_REF_FLAGS: + Set address-space AS in SYMBOL_REF_FLAGS of SYM */ +#define AVR_SYMBOL_SET_ADDR_SPACE(SYM,AS) \ + do { \ + SYMBOL_REF_FLAGS (sym) &= ~AVR_SYMBOL_FLAG_PROGMEM; \ + SYMBOL_REF_FLAGS (sym) |= (AS) * SYMBOL_FLAG_MACH_DEP; \ + } while (0) + +/* Read address-space from SYMBOL_REF_FLAGS of SYM */ +#define AVR_SYMBOL_GET_ADDR_SPACE(SYM) \ + ((SYMBOL_REF_FLAGS (sym) & AVR_SYMBOL_FLAG_PROGMEM) \ + / SYMBOL_FLAG_MACH_DEP) + +/* Known address spaces. The order must be the same as in the respective + enum from avr.h (or designated initialized must be used). */ +const avr_addrspace_t avr_addrspace[ADDR_SPACE_COUNT] = +{ + { ADDR_SPACE_RAM, 0, 2, "", 0, NULL }, + { ADDR_SPACE_FLASH, 1, 2, "__flash", 0, ".progmem.data" }, + { ADDR_SPACE_FLASH1, 1, 2, "__flash1", 1, ".progmem1.data" }, + { ADDR_SPACE_FLASH2, 1, 2, "__flash2", 2, ".progmem2.data" }, + { ADDR_SPACE_FLASH3, 1, 2, "__flash3", 3, ".progmem3.data" }, + { ADDR_SPACE_FLASH4, 1, 2, "__flash4", 4, ".progmem4.data" }, + { ADDR_SPACE_FLASH5, 1, 2, "__flash5", 5, ".progmem5.data" }, + { ADDR_SPACE_MEMX, 1, 3, "__memx", 0, ".progmemx.data" }, +}; + + +/* Holding RAM addresses of some SFRs used by the compiler and that + are unique over all devices in an architecture like 'avr4'. */ + +typedef struct +{ + /* SREG: The processor status */ + int sreg; + + /* RAMPX, RAMPY, RAMPD and CCP of XMEGA */ + int ccp; + int rampd; + int rampx; + int rampy; + + /* RAMPZ: The high byte of 24-bit address used with ELPM */ + int rampz; + + /* SP: The stack pointer and its low and high byte */ + int sp_l; + int sp_h; +} avr_addr_t; + +static avr_addr_t avr_addr; + + +/* Prototypes for local helper functions. */ + +static const char* out_movqi_r_mr (rtx, rtx[], int*); +static const char* out_movhi_r_mr (rtx, rtx[], int*); +static const char* out_movsi_r_mr (rtx, rtx[], int*); +static const char* out_movqi_mr_r (rtx, rtx[], int*); +static const char* out_movhi_mr_r (rtx, rtx[], int*); +static const char* out_movsi_mr_r (rtx, rtx[], int*); + +static int get_sequence_length (rtx insns); +static int sequent_regs_live (void); +static const char *ptrreg_to_str (int); +static const char *cond_string (enum rtx_code); +static int avr_num_arg_regs (enum machine_mode, const_tree); +static int avr_operand_rtx_cost (rtx, enum machine_mode, enum rtx_code, + int, bool); +static void output_reload_in_const (rtx*, rtx, int*, bool); +static struct machine_function * avr_init_machine_status (void); + + +/* Prototypes for hook implementors if needed before their implementation. */ + +static bool avr_rtx_costs (rtx, int, int, int, int*, bool); + + +/* Allocate registers from r25 to r8 for parameters for function calls. */ +#define FIRST_CUM_REG 26 + +/* Implicit target register of LPM instruction (R0) */ +extern GTY(()) rtx lpm_reg_rtx; +rtx lpm_reg_rtx; + +/* (Implicit) address register of LPM instruction (R31:R30 = Z) */ +extern GTY(()) rtx lpm_addr_reg_rtx; +rtx lpm_addr_reg_rtx; + +/* Temporary register RTX (reg:QI TMP_REGNO) */ +extern GTY(()) rtx tmp_reg_rtx; +rtx tmp_reg_rtx; + +/* Zeroed register RTX (reg:QI ZERO_REGNO) */ +extern GTY(()) rtx zero_reg_rtx; +rtx zero_reg_rtx; + +/* RTXs for all general purpose registers as QImode */ +extern GTY(()) rtx all_regs_rtx[32]; +rtx all_regs_rtx[32]; + +/* SREG, the processor status */ +extern GTY(()) rtx sreg_rtx; +rtx sreg_rtx; + +/* RAMP* special function registers */ +extern GTY(()) rtx rampd_rtx; +extern GTY(()) rtx rampx_rtx; +extern GTY(()) rtx rampy_rtx; +extern GTY(()) rtx rampz_rtx; +rtx rampd_rtx; +rtx rampx_rtx; +rtx rampy_rtx; +rtx rampz_rtx; + +/* RTX containing the strings "" and "e", respectively */ +static GTY(()) rtx xstring_empty; +static GTY(()) rtx xstring_e; + +/* Current architecture. */ +const avr_arch_t *avr_current_arch; + +/* Current device. */ +const avr_mcu_t *avr_current_device; + +/* Section to put switch tables in. */ +static GTY(()) section *progmem_swtable_section; + +/* Unnamed sections associated to __attribute__((progmem)) aka. PROGMEM + or to address space __flash* or __memx. Only used as singletons inside + avr_asm_select_section, but it must not be local there because of GTY. */ +static GTY(()) section *progmem_section[ADDR_SPACE_COUNT]; + +/* Condition for insns/expanders from avr-dimode.md. */ +bool avr_have_dimode = true; + +/* To track if code will use .bss and/or .data. */ +bool avr_need_clear_bss_p = false; +bool avr_need_copy_data_p = false; + + +/* Transform UP into lowercase and write the result to LO. + You must provide enough space for LO. Return LO. */ + +static char* +avr_tolower (char *lo, const char *up) +{ + char *lo0 = lo; + + for (; *up; up++, lo++) + *lo = TOLOWER (*up); + + *lo = '\0'; + + return lo0; +} + + +/* Custom function to count number of set bits. */ + +static inline int +avr_popcount (unsigned int val) +{ + int pop = 0; + + while (val) + { + val &= val-1; + pop++; + } + + return pop; +} + + +/* Constraint helper function. XVAL is a CONST_INT or a CONST_DOUBLE. + Return true if the least significant N_BYTES bytes of XVAL all have a + popcount in POP_MASK and false, otherwise. POP_MASK represents a subset + of integers which contains an integer N iff bit N of POP_MASK is set. */ + +bool +avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask) +{ + int i; + + enum machine_mode mode = GET_MODE (xval); + + if (VOIDmode == mode) + mode = SImode; + + for (i = 0; i < n_bytes; i++) + { + rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i); + unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode); + + if (0 == (pop_mask & (1 << avr_popcount (val8)))) + return false; + } + + return true; +} + + +/* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get + the bit representation of X by "casting" it to CONST_INT. */ + +rtx +avr_to_int_mode (rtx x) +{ + enum machine_mode mode = GET_MODE (x); + + return VOIDmode == mode + ? x + : simplify_gen_subreg (int_mode_for_mode (mode), x, mode, 0); +} + + +/* Implement `TARGET_OPTION_OVERRIDE'. */ + +static void +avr_option_override (void) +{ + flag_delete_null_pointer_checks = 0; + + /* caller-save.c looks for call-clobbered hard registers that are assigned + to pseudos that cross calls and tries so save-restore them around calls + in order to reduce the number of stack slots needed. + + This might lead to situations where reload is no more able to cope + with the challenge of AVR's very few address registers and fails to + perform the requested spills. */ + + if (avr_strict_X) + flag_caller_saves = 0; + + /* Unwind tables currently require a frame pointer for correctness, + see toplev.c:process_options(). */ + + if ((flag_unwind_tables + || flag_non_call_exceptions + || flag_asynchronous_unwind_tables) + && !ACCUMULATE_OUTGOING_ARGS) + { + flag_omit_frame_pointer = 0; + } + + if (flag_pic == 1) + warning (OPT_fpic, "-fpic is not supported"); + if (flag_pic == 2) + warning (OPT_fPIC, "-fPIC is not supported"); + if (flag_pie == 1) + warning (OPT_fpie, "-fpie is not supported"); + if (flag_pie == 2) + warning (OPT_fPIE, "-fPIE is not supported"); + + avr_current_device = &avr_mcu_types[avr_mcu_index]; + avr_current_arch = &avr_arch_types[avr_current_device->arch]; + + /* RAM addresses of some SFRs common to all devices in respective arch. */ + + /* SREG: Status Register containing flags like I (global IRQ) */ + avr_addr.sreg = 0x3F + avr_current_arch->sfr_offset; + + /* RAMPZ: Address' high part when loading via ELPM */ + avr_addr.rampz = 0x3B + avr_current_arch->sfr_offset; + + avr_addr.rampy = 0x3A + avr_current_arch->sfr_offset; + avr_addr.rampx = 0x39 + avr_current_arch->sfr_offset; + avr_addr.rampd = 0x38 + avr_current_arch->sfr_offset; + avr_addr.ccp = 0x34 + avr_current_arch->sfr_offset; + + /* SP: Stack Pointer (SP_H:SP_L) */ + avr_addr.sp_l = 0x3D + avr_current_arch->sfr_offset; + avr_addr.sp_h = avr_addr.sp_l + 1; + + init_machine_status = avr_init_machine_status; + + avr_log_set_avr_log(); +} + +/* Function to set up the backend function structure. */ + +static struct machine_function * +avr_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + + +/* Implement `INIT_EXPANDERS'. */ +/* The function works like a singleton. */ + +void +avr_init_expanders (void) +{ + int regno; + + for (regno = 0; regno < 32; regno ++) + all_regs_rtx[regno] = gen_rtx_REG (QImode, regno); + + lpm_reg_rtx = all_regs_rtx[LPM_REGNO]; + tmp_reg_rtx = all_regs_rtx[TMP_REGNO]; + zero_reg_rtx = all_regs_rtx[ZERO_REGNO]; + + lpm_addr_reg_rtx = gen_rtx_REG (HImode, REG_Z); + + sreg_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.sreg)); + rampd_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampd)); + rampx_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampx)); + rampy_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampy)); + rampz_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampz)); + + xstring_empty = gen_rtx_CONST_STRING (VOIDmode, ""); + xstring_e = gen_rtx_CONST_STRING (VOIDmode, "e"); +} + + +/* Implement `REGNO_REG_CLASS'. */ +/* Return register class for register R. */ + +enum reg_class +avr_regno_reg_class (int r) +{ + static const enum reg_class reg_class_tab[] = + { + R0_REG, + /* r1 - r15 */ + NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, + NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, + NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, + NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, NO_LD_REGS, + /* r16 - r23 */ + SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS, + SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS, SIMPLE_LD_REGS, + /* r24, r25 */ + ADDW_REGS, ADDW_REGS, + /* X: r26, 27 */ + POINTER_X_REGS, POINTER_X_REGS, + /* Y: r28, r29 */ + POINTER_Y_REGS, POINTER_Y_REGS, + /* Z: r30, r31 */ + POINTER_Z_REGS, POINTER_Z_REGS, + /* SP: SPL, SPH */ + STACK_REG, STACK_REG + }; + + if (r <= 33) + return reg_class_tab[r]; + + return ALL_REGS; +} + + +/* Implement `TARGET_SCALAR_MODE_SUPPORTED_P'. */ + +static bool +avr_scalar_mode_supported_p (enum machine_mode mode) +{ + if (ALL_FIXED_POINT_MODE_P (mode)) + return true; + + if (PSImode == mode) + return true; + + return default_scalar_mode_supported_p (mode); +} + + +/* Return TRUE if DECL is a VAR_DECL located in flash and FALSE, otherwise. */ + +static bool +avr_decl_flash_p (tree decl) +{ + if (TREE_CODE (decl) != VAR_DECL + || TREE_TYPE (decl) == error_mark_node) + { + return false; + } + + return !ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))); +} + + +/* Return TRUE if DECL is a VAR_DECL located in the 24-bit flash + address space and FALSE, otherwise. */ + +static bool +avr_decl_memx_p (tree decl) +{ + if (TREE_CODE (decl) != VAR_DECL + || TREE_TYPE (decl) == error_mark_node) + { + return false; + } + + return (ADDR_SPACE_MEMX == TYPE_ADDR_SPACE (TREE_TYPE (decl))); +} + + +/* Return TRUE if X is a MEM rtx located in flash and FALSE, otherwise. */ + +bool +avr_mem_flash_p (rtx x) +{ + return (MEM_P (x) + && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))); +} + + +/* Return TRUE if X is a MEM rtx located in the 24-bit flash + address space and FALSE, otherwise. */ + +bool +avr_mem_memx_p (rtx x) +{ + return (MEM_P (x) + && ADDR_SPACE_MEMX == MEM_ADDR_SPACE (x)); +} + + +/* A helper for the subsequent function attribute used to dig for + attribute 'name' in a FUNCTION_DECL or FUNCTION_TYPE */ + +static inline int +avr_lookup_function_attribute1 (const_tree func, const char *name) +{ + if (FUNCTION_DECL == TREE_CODE (func)) + { + if (NULL_TREE != lookup_attribute (name, DECL_ATTRIBUTES (func))) + { + return true; + } + + func = TREE_TYPE (func); + } + + gcc_assert (TREE_CODE (func) == FUNCTION_TYPE + || TREE_CODE (func) == METHOD_TYPE); + + return NULL_TREE != lookup_attribute (name, TYPE_ATTRIBUTES (func)); +} + +/* Return nonzero if FUNC is a naked function. */ + +static int +avr_naked_function_p (tree func) +{ + return avr_lookup_function_attribute1 (func, "naked"); +} + +/* Return nonzero if FUNC is an interrupt function as specified + by the "interrupt" attribute. */ + +static int +avr_interrupt_function_p (tree func) +{ + return avr_lookup_function_attribute1 (func, "interrupt"); +} + +/* Return nonzero if FUNC is a signal function as specified + by the "signal" attribute. */ + +static int +avr_signal_function_p (tree func) +{ + return avr_lookup_function_attribute1 (func, "signal"); +} + +/* Return nonzero if FUNC is an OS_task function. */ + +static int +avr_OS_task_function_p (tree func) +{ + return avr_lookup_function_attribute1 (func, "OS_task"); +} + +/* Return nonzero if FUNC is an OS_main function. */ + +static int +avr_OS_main_function_p (tree func) +{ + return avr_lookup_function_attribute1 (func, "OS_main"); +} + + +/* Implement `TARGET_SET_CURRENT_FUNCTION'. */ +/* Sanity cheching for above function attributes. */ + +static void +avr_set_current_function (tree decl) +{ + location_t loc; + const char *isr; + + if (decl == NULL_TREE + || current_function_decl == NULL_TREE + || current_function_decl == error_mark_node + || ! cfun->machine + || cfun->machine->attributes_checked_p) + return; + + loc = DECL_SOURCE_LOCATION (decl); + + cfun->machine->is_naked = avr_naked_function_p (decl); + cfun->machine->is_signal = avr_signal_function_p (decl); + cfun->machine->is_interrupt = avr_interrupt_function_p (decl); + cfun->machine->is_OS_task = avr_OS_task_function_p (decl); + cfun->machine->is_OS_main = avr_OS_main_function_p (decl); + + isr = cfun->machine->is_interrupt ? "interrupt" : "signal"; + + /* Too much attributes make no sense as they request conflicting features. */ + + if (cfun->machine->is_OS_task + cfun->machine->is_OS_main + + (cfun->machine->is_signal || cfun->machine->is_interrupt) > 1) + error_at (loc, "function attributes %qs, %qs and %qs are mutually" + " exclusive", "OS_task", "OS_main", isr); + + /* 'naked' will hide effects of 'OS_task' and 'OS_main'. */ + + if (cfun->machine->is_naked + && (cfun->machine->is_OS_task || cfun->machine->is_OS_main)) + warning_at (loc, OPT_Wattributes, "function attributes %qs and %qs have" + " no effect on %qs function", "OS_task", "OS_main", "naked"); + + if (cfun->machine->is_interrupt || cfun->machine->is_signal) + { + tree args = TYPE_ARG_TYPES (TREE_TYPE (decl)); + tree ret = TREE_TYPE (TREE_TYPE (decl)); + const char *name; + + name = DECL_ASSEMBLER_NAME_SET_P (decl) + ? IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) + : IDENTIFIER_POINTER (DECL_NAME (decl)); + + /* Skip a leading '*' that might still prefix the assembler name, + e.g. in non-LTO runs. */ + + name = default_strip_name_encoding (name); + + /* Silently ignore 'signal' if 'interrupt' is present. AVR-LibC startet + using this when it switched from SIGNAL and INTERRUPT to ISR. */ + + if (cfun->machine->is_interrupt) + cfun->machine->is_signal = 0; + + /* Interrupt handlers must be void __vector (void) functions. */ + + if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE) + error_at (loc, "%qs function cannot have arguments", isr); + + if (TREE_CODE (ret) != VOID_TYPE) + error_at (loc, "%qs function cannot return a value", isr); + + /* If the function has the 'signal' or 'interrupt' attribute, ensure + that the name of the function is "__vector_NN" so as to catch + when the user misspells the vector name. */ + + if (!STR_PREFIX_P (name, "__vector")) + warning_at (loc, 0, "%qs appears to be a misspelled %s handler", + name, isr); + } + + /* Don't print the above diagnostics more than once. */ + + cfun->machine->attributes_checked_p = 1; +} + + +/* Implement `ACCUMULATE_OUTGOING_ARGS'. */ + +int +avr_accumulate_outgoing_args (void) +{ + if (!cfun) + return TARGET_ACCUMULATE_OUTGOING_ARGS; + + /* FIXME: For setjmp and in avr_builtin_setjmp_frame_value we don't know + what offset is correct. In some cases it is relative to + virtual_outgoing_args_rtx and in others it is relative to + virtual_stack_vars_rtx. For example code see + gcc.c-torture/execute/built-in-setjmp.c + gcc.c-torture/execute/builtins/sprintf-chk.c */ + + return (TARGET_ACCUMULATE_OUTGOING_ARGS + && !(cfun->calls_setjmp + || cfun->has_nonlocal_label)); +} + + +/* Report contribution of accumulated outgoing arguments to stack size. */ + +static inline int +avr_outgoing_args_size (void) +{ + return ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0; +} + + +/* Implement `STARTING_FRAME_OFFSET'. */ +/* This is the offset from the frame pointer register to the first stack slot + that contains a variable living in the frame. */ + +int +avr_starting_frame_offset (void) +{ + return 1 + avr_outgoing_args_size (); +} + + +/* Return the number of hard registers to push/pop in the prologue/epilogue + of the current function, and optionally store these registers in SET. */ + +static int +avr_regs_to_save (HARD_REG_SET *set) +{ + int reg, count; + int int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal; + + if (set) + CLEAR_HARD_REG_SET (*set); + count = 0; + + /* No need to save any registers if the function never returns or + has the "OS_task" or "OS_main" attribute. */ + + if (TREE_THIS_VOLATILE (current_function_decl) + || cfun->machine->is_OS_task + || cfun->machine->is_OS_main) + return 0; + + for (reg = 0; reg < 32; reg++) + { + /* Do not push/pop __tmp_reg__, __zero_reg__, as well as + any global register variables. */ + + if (fixed_regs[reg]) + continue; + + if ((int_or_sig_p && !crtl->is_leaf && call_used_regs[reg]) + || (df_regs_ever_live_p (reg) + && (int_or_sig_p || !call_used_regs[reg]) + /* Don't record frame pointer registers here. They are treated + indivitually in prologue. */ + && !(frame_pointer_needed + && (reg == REG_Y || reg == (REG_Y+1))))) + { + if (set) + SET_HARD_REG_BIT (*set, reg); + count++; + } + } + return count; +} + + +/* Implement `TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS' */ + +static bool +avr_allocate_stack_slots_for_args (void) +{ + return !cfun->machine->is_naked; +} + + +/* Return true if register FROM can be eliminated via register TO. */ + +static bool +avr_can_eliminate (const int from, const int to) +{ + return ((frame_pointer_needed && to == FRAME_POINTER_REGNUM) + || !frame_pointer_needed); +} + + +/* Implement `TARGET_WARN_FUNC_RETURN'. */ + +static bool +avr_warn_func_return (tree decl) +{ + /* Naked functions are implemented entirely in assembly, including the + return sequence, so suppress warnings about this. */ + + return !avr_naked_function_p (decl); +} + +/* Compute offset between arg_pointer and frame_pointer. */ + +int +avr_initial_elimination_offset (int from, int to) +{ + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return 0; + else + { + int offset = frame_pointer_needed ? 2 : 0; + int avr_pc_size = AVR_HAVE_EIJMP_EICALL ? 3 : 2; + + offset += avr_regs_to_save (NULL); + return (get_frame_size () + avr_outgoing_args_size() + + avr_pc_size + 1 + offset); + } +} + + +/* Helper for the function below. */ + +static void +avr_adjust_type_node (tree *node, enum machine_mode mode, int sat_p) +{ + *node = make_node (FIXED_POINT_TYPE); + TYPE_SATURATING (*node) = sat_p; + TYPE_UNSIGNED (*node) = UNSIGNED_FIXED_POINT_MODE_P (mode); + TYPE_IBIT (*node) = GET_MODE_IBIT (mode); + TYPE_FBIT (*node) = GET_MODE_FBIT (mode); + TYPE_PRECISION (*node) = GET_MODE_BITSIZE (mode); + TYPE_ALIGN (*node) = 8; + SET_TYPE_MODE (*node, mode); + + layout_type (*node); +} + + +/* Implement `TARGET_BUILD_BUILTIN_VA_LIST'. */ + +static tree +avr_build_builtin_va_list (void) +{ + /* avr-modes.def adjusts [U]TA to be 64-bit modes with 48 fractional bits. + This is more appropriate for the 8-bit machine AVR than 128-bit modes. + The ADJUST_IBIT/FBIT are handled in toplev:init_adjust_machine_modes() + which is auto-generated by genmodes, but the compiler assigns [U]DAmode + to the long long accum modes instead of the desired [U]TAmode. + + Fix this now, right after node setup in tree.c:build_common_tree_nodes(). + This must run before c-cppbuiltin.c:builtin_define_fixed_point_constants() + which built-in defines macros like __ULLACCUM_FBIT__ that are used by + libgcc to detect IBIT and FBIT. */ + + avr_adjust_type_node (&ta_type_node, TAmode, 0); + avr_adjust_type_node (&uta_type_node, UTAmode, 0); + avr_adjust_type_node (&sat_ta_type_node, TAmode, 1); + avr_adjust_type_node (&sat_uta_type_node, UTAmode, 1); + + unsigned_long_long_accum_type_node = uta_type_node; + long_long_accum_type_node = ta_type_node; + sat_unsigned_long_long_accum_type_node = sat_uta_type_node; + sat_long_long_accum_type_node = sat_ta_type_node; + + /* Dispatch to the default handler. */ + + return std_build_builtin_va_list (); +} + + +/* Implement `TARGET_BUILTIN_SETJMP_FRAME_VALUE'. */ +/* Actual start of frame is virtual_stack_vars_rtx this is offset from + frame pointer by +STARTING_FRAME_OFFSET. + Using saved frame = virtual_stack_vars_rtx - STARTING_FRAME_OFFSET + avoids creating add/sub of offset in nonlocal goto and setjmp. */ + +static rtx +avr_builtin_setjmp_frame_value (void) +{ + rtx xval = gen_reg_rtx (Pmode); + emit_insn (gen_subhi3 (xval, virtual_stack_vars_rtx, + gen_int_mode (STARTING_FRAME_OFFSET, Pmode))); + return xval; +} + + +/* Return contents of MEM at frame pointer + stack size + 1 (+2 if 3-byte PC). + This is return address of function. */ + +rtx +avr_return_addr_rtx (int count, rtx tem) +{ + rtx r; + + /* Can only return this function's return address. Others not supported. */ + if (count) + return NULL; + + if (AVR_3_BYTE_PC) + { + r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+2"); + warning (0, "% contains only 2 bytes" + " of address"); + } + else + r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+1"); + + r = gen_rtx_PLUS (Pmode, tem, r); + r = gen_frame_mem (Pmode, memory_address (Pmode, r)); + r = gen_rtx_ROTATE (HImode, r, GEN_INT (8)); + return r; +} + +/* Return 1 if the function epilogue is just a single "ret". */ + +int +avr_simple_epilogue (void) +{ + return (! frame_pointer_needed + && get_frame_size () == 0 + && avr_outgoing_args_size() == 0 + && avr_regs_to_save (NULL) == 0 + && ! cfun->machine->is_interrupt + && ! cfun->machine->is_signal + && ! cfun->machine->is_naked + && ! TREE_THIS_VOLATILE (current_function_decl)); +} + +/* This function checks sequence of live registers. */ + +static int +sequent_regs_live (void) +{ + int reg; + int live_seq = 0; + int cur_seq = 0; + + for (reg = 0; reg < 18; ++reg) + { + if (fixed_regs[reg]) + { + /* Don't recognize sequences that contain global register + variables. */ + + if (live_seq != 0) + return 0; + else + continue; + } + + if (!call_used_regs[reg]) + { + if (df_regs_ever_live_p (reg)) + { + ++live_seq; + ++cur_seq; + } + else + cur_seq = 0; + } + } + + if (!frame_pointer_needed) + { + if (df_regs_ever_live_p (REG_Y)) + { + ++live_seq; + ++cur_seq; + } + else + cur_seq = 0; + + if (df_regs_ever_live_p (REG_Y+1)) + { + ++live_seq; + ++cur_seq; + } + else + cur_seq = 0; + } + else + { + cur_seq += 2; + live_seq += 2; + } + return (cur_seq == live_seq) ? live_seq : 0; +} + +/* Obtain the length sequence of insns. */ + +int +get_sequence_length (rtx insns) +{ + rtx insn; + int length; + + for (insn = insns, length = 0; insn; insn = NEXT_INSN (insn)) + length += get_attr_length (insn); + + return length; +} + + +/* Implement `INCOMING_RETURN_ADDR_RTX'. */ + +rtx +avr_incoming_return_addr_rtx (void) +{ + /* The return address is at the top of the stack. Note that the push + was via post-decrement, which means the actual address is off by one. */ + return gen_frame_mem (HImode, plus_constant (Pmode, stack_pointer_rtx, 1)); +} + +/* Helper for expand_prologue. Emit a push of a byte register. */ + +static void +emit_push_byte (unsigned regno, bool frame_related_p) +{ + rtx mem, reg, insn; + + mem = gen_rtx_POST_DEC (HImode, stack_pointer_rtx); + mem = gen_frame_mem (QImode, mem); + reg = gen_rtx_REG (QImode, regno); + + insn = emit_insn (gen_rtx_SET (VOIDmode, mem, reg)); + if (frame_related_p) + RTX_FRAME_RELATED_P (insn) = 1; + + cfun->machine->stack_usage++; +} + + +/* Helper for expand_prologue. Emit a push of a SFR via tmp_reg. + SFR is a MEM representing the memory location of the SFR. + If CLR_P then clear the SFR after the push using zero_reg. */ + +static void +emit_push_sfr (rtx sfr, bool frame_related_p, bool clr_p) +{ + rtx insn; + + gcc_assert (MEM_P (sfr)); + + /* IN __tmp_reg__, IO(SFR) */ + insn = emit_move_insn (tmp_reg_rtx, sfr); + if (frame_related_p) + RTX_FRAME_RELATED_P (insn) = 1; + + /* PUSH __tmp_reg__ */ + emit_push_byte (TMP_REGNO, frame_related_p); + + if (clr_p) + { + /* OUT IO(SFR), __zero_reg__ */ + insn = emit_move_insn (sfr, const0_rtx); + if (frame_related_p) + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +static void +avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set) +{ + rtx insn; + bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal; + int live_seq = sequent_regs_live (); + + HOST_WIDE_INT size_max + = (HOST_WIDE_INT) GET_MODE_MASK (AVR_HAVE_8BIT_SP ? QImode : Pmode); + + bool minimize = (TARGET_CALL_PROLOGUES + && size < size_max + && live_seq + && !isr_p + && !cfun->machine->is_OS_task + && !cfun->machine->is_OS_main); + + if (minimize + && (frame_pointer_needed + || avr_outgoing_args_size() > 8 + || (AVR_2_BYTE_PC && live_seq > 6) + || live_seq > 7)) + { + rtx pattern; + int first_reg, reg, offset; + + emit_move_insn (gen_rtx_REG (HImode, REG_X), + gen_int_mode (size, HImode)); + + pattern = gen_call_prologue_saves (gen_int_mode (live_seq, HImode), + gen_int_mode (live_seq+size, HImode)); + insn = emit_insn (pattern); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Describe the effect of the unspec_volatile call to prologue_saves. + Note that this formulation assumes that add_reg_note pushes the + notes to the front. Thus we build them in the reverse order of + how we want dwarf2out to process them. */ + + /* The function does always set frame_pointer_rtx, but whether that + is going to be permanent in the function is frame_pointer_needed. */ + + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, (frame_pointer_needed + ? frame_pointer_rtx + : stack_pointer_rtx), + plus_constant (Pmode, stack_pointer_rtx, + -(size + live_seq)))); + + /* Note that live_seq always contains r28+r29, but the other + registers to be saved are all below 18. */ + + first_reg = 18 - (live_seq - 2); + + for (reg = 29, offset = -live_seq + 1; + reg >= first_reg; + reg = (reg == 28 ? 17 : reg - 1), ++offset) + { + rtx m, r; + + m = gen_rtx_MEM (QImode, plus_constant (Pmode, stack_pointer_rtx, + offset)); + r = gen_rtx_REG (QImode, reg); + add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, m, r)); + } + + cfun->machine->stack_usage += size + live_seq; + } + else /* !minimize */ + { + int reg; + + for (reg = 0; reg < 32; ++reg) + if (TEST_HARD_REG_BIT (set, reg)) + emit_push_byte (reg, true); + + if (frame_pointer_needed + && (!(cfun->machine->is_OS_task || cfun->machine->is_OS_main))) + { + /* Push frame pointer. Always be consistent about the + ordering of pushes -- epilogue_restores expects the + register pair to be pushed low byte first. */ + + emit_push_byte (REG_Y, true); + emit_push_byte (REG_Y + 1, true); + } + + if (frame_pointer_needed + && size == 0) + { + insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (size != 0) + { + /* Creating a frame can be done by direct manipulation of the + stack or via the frame pointer. These two methods are: + fp = sp + fp -= size + sp = fp + or + sp -= size + fp = sp (*) + the optimum method depends on function type, stack and + frame size. To avoid a complex logic, both methods are + tested and shortest is selected. + + There is also the case where SIZE != 0 and no frame pointer is + needed; this can occur if ACCUMULATE_OUTGOING_ARGS is on. + In that case, insn (*) is not needed in that case. + We use the X register as scratch. This is save because in X + is call-clobbered. + In an interrupt routine, the case of SIZE != 0 together with + !frame_pointer_needed can only occur if the function is not a + leaf function and thus X has already been saved. */ + + int irq_state = -1; + HOST_WIDE_INT size_cfa = size, neg_size; + rtx fp_plus_insns, fp, my_fp; + + gcc_assert (frame_pointer_needed + || !isr_p + || !crtl->is_leaf); + + fp = my_fp = (frame_pointer_needed + ? frame_pointer_rtx + : gen_rtx_REG (Pmode, REG_X)); + + if (AVR_HAVE_8BIT_SP) + { + /* The high byte (r29) does not change: + Prefer SUBI (1 cycle) over SBIW (2 cycles, same size). */ + + my_fp = all_regs_rtx[FRAME_POINTER_REGNUM]; + } + + /* Cut down size and avoid size = 0 so that we don't run + into ICE like PR52488 in the remainder. */ + + if (size > size_max) + { + /* Don't error so that insane code from newlib still compiles + and does not break building newlib. As PR51345 is implemented + now, there are multilib variants with -msp8. + + If user wants sanity checks he can use -Wstack-usage= + or similar options. + + For CFA we emit the original, non-saturated size so that + the generic machinery is aware of the real stack usage and + will print the above diagnostic as expected. */ + + size = size_max; + } + + size = trunc_int_for_mode (size, GET_MODE (my_fp)); + neg_size = trunc_int_for_mode (-size, GET_MODE (my_fp)); + + /************ Method 1: Adjust frame pointer ************/ + + start_sequence (); + + /* Normally, the dwarf2out frame-related-expr interpreter does + not expect to have the CFA change once the frame pointer is + set up. Thus, we avoid marking the move insn below and + instead indicate that the entire operation is complete after + the frame pointer subtraction is done. */ + + insn = emit_move_insn (fp, stack_pointer_rtx); + if (frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, fp, stack_pointer_rtx)); + } + + insn = emit_move_insn (my_fp, plus_constant (GET_MODE (my_fp), + my_fp, neg_size)); + + if (frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, fp, + plus_constant (Pmode, fp, + -size_cfa))); + } + + /* Copy to stack pointer. Note that since we've already + changed the CFA to the frame pointer this operation + need not be annotated if frame pointer is needed. + Always move through unspec, see PR50063. + For meaning of irq_state see movhi_sp_r insn. */ + + if (cfun->machine->is_interrupt) + irq_state = 1; + + if (TARGET_NO_INTERRUPTS + || cfun->machine->is_signal + || cfun->machine->is_OS_main) + irq_state = 0; + + if (AVR_HAVE_8BIT_SP) + irq_state = 2; + + insn = emit_insn (gen_movhi_sp_r (stack_pointer_rtx, + fp, GEN_INT (irq_state))); + if (!frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -size_cfa))); + } + + fp_plus_insns = get_insns (); + end_sequence (); + + /************ Method 2: Adjust Stack pointer ************/ + + /* Stack adjustment by means of RCALL . and/or PUSH __TMP_REG__ + can only handle specific offsets. */ + + if (avr_sp_immediate_operand (gen_int_mode (-size, HImode), HImode)) + { + rtx sp_plus_insns; + + start_sequence (); + + insn = emit_move_insn (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -size)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -size_cfa))); + if (frame_pointer_needed) + { + insn = emit_move_insn (fp, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + sp_plus_insns = get_insns (); + end_sequence (); + + /************ Use shortest method ************/ + + emit_insn (get_sequence_length (sp_plus_insns) + < get_sequence_length (fp_plus_insns) + ? sp_plus_insns + : fp_plus_insns); + } + else + { + emit_insn (fp_plus_insns); + } + + cfun->machine->stack_usage += size_cfa; + } /* !minimize && size != 0 */ + } /* !minimize */ +} + + +/* Output function prologue. */ + +void +avr_expand_prologue (void) +{ + HARD_REG_SET set; + HOST_WIDE_INT size; + + size = get_frame_size() + avr_outgoing_args_size(); + + cfun->machine->stack_usage = 0; + + /* Prologue: naked. */ + if (cfun->machine->is_naked) + { + return; + } + + avr_regs_to_save (&set); + + if (cfun->machine->is_interrupt || cfun->machine->is_signal) + { + /* Enable interrupts. */ + if (cfun->machine->is_interrupt) + emit_insn (gen_enable_interrupt ()); + + /* Push zero reg. */ + emit_push_byte (ZERO_REGNO, true); + + /* Push tmp reg. */ + emit_push_byte (TMP_REGNO, true); + + /* Push SREG. */ + /* ??? There's no dwarf2 column reserved for SREG. */ + emit_push_sfr (sreg_rtx, false, false /* clr */); + + /* Clear zero reg. */ + emit_move_insn (zero_reg_rtx, const0_rtx); + + /* Prevent any attempt to delete the setting of ZERO_REG! */ + emit_use (zero_reg_rtx); + + /* Push and clear RAMPD/X/Y/Z if present and low-part register is used. + ??? There are no dwarf2 columns reserved for RAMPD/X/Y/Z. */ + + if (AVR_HAVE_RAMPD) + emit_push_sfr (rampd_rtx, false /* frame-related */, true /* clr */); + + if (AVR_HAVE_RAMPX + && TEST_HARD_REG_BIT (set, REG_X) + && TEST_HARD_REG_BIT (set, REG_X + 1)) + { + emit_push_sfr (rampx_rtx, false /* frame-related */, true /* clr */); + } + + if (AVR_HAVE_RAMPY + && (frame_pointer_needed + || (TEST_HARD_REG_BIT (set, REG_Y) + && TEST_HARD_REG_BIT (set, REG_Y + 1)))) + { + emit_push_sfr (rampy_rtx, false /* frame-related */, true /* clr */); + } + + if (AVR_HAVE_RAMPZ + && TEST_HARD_REG_BIT (set, REG_Z) + && TEST_HARD_REG_BIT (set, REG_Z + 1)) + { + emit_push_sfr (rampz_rtx, false /* frame-related */, AVR_HAVE_RAMPD); + } + } /* is_interrupt is_signal */ + + avr_prologue_setup_frame (size, set); + + if (flag_stack_usage_info) + current_function_static_stack_size = cfun->machine->stack_usage; +} + + +/* Implement `TARGET_ASM_FUNCTION_END_PROLOGUE'. */ +/* Output summary at end of function prologue. */ + +static void +avr_asm_function_end_prologue (FILE *file) +{ + if (cfun->machine->is_naked) + { + fputs ("/* prologue: naked */\n", file); + } + else + { + if (cfun->machine->is_interrupt) + { + fputs ("/* prologue: Interrupt */\n", file); + } + else if (cfun->machine->is_signal) + { + fputs ("/* prologue: Signal */\n", file); + } + else + fputs ("/* prologue: function */\n", file); + } + + if (ACCUMULATE_OUTGOING_ARGS) + fprintf (file, "/* outgoing args size = %d */\n", + avr_outgoing_args_size()); + + fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n", + get_frame_size()); + fprintf (file, "/* stack size = %d */\n", + cfun->machine->stack_usage); + /* Create symbol stack offset here so all functions have it. Add 1 to stack + usage for offset so that SP + .L__stack_offset = return address. */ + fprintf (file, ".L__stack_usage = %d\n", cfun->machine->stack_usage); +} + + +/* Implement `EPILOGUE_USES'. */ + +int +avr_epilogue_uses (int regno ATTRIBUTE_UNUSED) +{ + if (reload_completed + && cfun->machine + && (cfun->machine->is_interrupt || cfun->machine->is_signal)) + return 1; + return 0; +} + +/* Helper for avr_expand_epilogue. Emit a pop of a byte register. */ + +static void +emit_pop_byte (unsigned regno) +{ + rtx mem, reg; + + mem = gen_rtx_PRE_INC (HImode, stack_pointer_rtx); + mem = gen_frame_mem (QImode, mem); + reg = gen_rtx_REG (QImode, regno); + + emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); +} + +/* Output RTL epilogue. */ + +void +avr_expand_epilogue (bool sibcall_p) +{ + int reg; + int live_seq; + HARD_REG_SET set; + int minimize; + HOST_WIDE_INT size; + bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal; + + size = get_frame_size() + avr_outgoing_args_size(); + + /* epilogue: naked */ + if (cfun->machine->is_naked) + { + gcc_assert (!sibcall_p); + + emit_jump_insn (gen_return ()); + return; + } + + avr_regs_to_save (&set); + live_seq = sequent_regs_live (); + + minimize = (TARGET_CALL_PROLOGUES + && live_seq + && !isr_p + && !cfun->machine->is_OS_task + && !cfun->machine->is_OS_main); + + if (minimize + && (live_seq > 4 + || frame_pointer_needed + || size)) + { + /* Get rid of frame. */ + + if (!frame_pointer_needed) + { + emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + } + + if (size) + { + emit_move_insn (frame_pointer_rtx, + plus_constant (Pmode, frame_pointer_rtx, size)); + } + + emit_insn (gen_epilogue_restores (gen_int_mode (live_seq, HImode))); + return; + } + + if (size) + { + /* Try two methods to adjust stack and select shortest. */ + + int irq_state = -1; + rtx fp, my_fp; + rtx fp_plus_insns; + HOST_WIDE_INT size_max; + + gcc_assert (frame_pointer_needed + || !isr_p + || !crtl->is_leaf); + + fp = my_fp = (frame_pointer_needed + ? frame_pointer_rtx + : gen_rtx_REG (Pmode, REG_X)); + + if (AVR_HAVE_8BIT_SP) + { + /* The high byte (r29) does not change: + Prefer SUBI (1 cycle) over SBIW (2 cycles). */ + + my_fp = all_regs_rtx[FRAME_POINTER_REGNUM]; + } + + /* For rationale see comment in prologue generation. */ + + size_max = (HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (my_fp)); + if (size > size_max) + size = size_max; + size = trunc_int_for_mode (size, GET_MODE (my_fp)); + + /********** Method 1: Adjust fp register **********/ + + start_sequence (); + + if (!frame_pointer_needed) + emit_move_insn (fp, stack_pointer_rtx); + + emit_move_insn (my_fp, plus_constant (GET_MODE (my_fp), my_fp, size)); + + /* Copy to stack pointer. */ + + if (TARGET_NO_INTERRUPTS) + irq_state = 0; + + if (AVR_HAVE_8BIT_SP) + irq_state = 2; + + emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp, + GEN_INT (irq_state))); + + fp_plus_insns = get_insns (); + end_sequence (); + + /********** Method 2: Adjust Stack pointer **********/ + + if (avr_sp_immediate_operand (gen_int_mode (size, HImode), HImode)) + { + rtx sp_plus_insns; + + start_sequence (); + + emit_move_insn (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, size)); + + sp_plus_insns = get_insns (); + end_sequence (); + + /************ Use shortest method ************/ + + emit_insn (get_sequence_length (sp_plus_insns) + < get_sequence_length (fp_plus_insns) + ? sp_plus_insns + : fp_plus_insns); + } + else + emit_insn (fp_plus_insns); + } /* size != 0 */ + + if (frame_pointer_needed + && !(cfun->machine->is_OS_task || cfun->machine->is_OS_main)) + { + /* Restore previous frame_pointer. See avr_expand_prologue for + rationale for not using pophi. */ + + emit_pop_byte (REG_Y + 1); + emit_pop_byte (REG_Y); + } + + /* Restore used registers. */ + + for (reg = 31; reg >= 0; --reg) + if (TEST_HARD_REG_BIT (set, reg)) + emit_pop_byte (reg); + + if (isr_p) + { + /* Restore RAMPZ/Y/X/D using tmp_reg as scratch. + The conditions to restore them must be tha same as in prologue. */ + + if (AVR_HAVE_RAMPZ + && TEST_HARD_REG_BIT (set, REG_Z) + && TEST_HARD_REG_BIT (set, REG_Z + 1)) + { + emit_pop_byte (TMP_REGNO); + emit_move_insn (rampz_rtx, tmp_reg_rtx); + } + + if (AVR_HAVE_RAMPY + && (frame_pointer_needed + || (TEST_HARD_REG_BIT (set, REG_Y) + && TEST_HARD_REG_BIT (set, REG_Y + 1)))) + { + emit_pop_byte (TMP_REGNO); + emit_move_insn (rampy_rtx, tmp_reg_rtx); + } + + if (AVR_HAVE_RAMPX + && TEST_HARD_REG_BIT (set, REG_X) + && TEST_HARD_REG_BIT (set, REG_X + 1)) + { + emit_pop_byte (TMP_REGNO); + emit_move_insn (rampx_rtx, tmp_reg_rtx); + } + + if (AVR_HAVE_RAMPD) + { + emit_pop_byte (TMP_REGNO); + emit_move_insn (rampd_rtx, tmp_reg_rtx); + } + + /* Restore SREG using tmp_reg as scratch. */ + + emit_pop_byte (TMP_REGNO); + emit_move_insn (sreg_rtx, tmp_reg_rtx); + + /* Restore tmp REG. */ + emit_pop_byte (TMP_REGNO); + + /* Restore zero REG. */ + emit_pop_byte (ZERO_REGNO); + } + + if (!sibcall_p) + emit_jump_insn (gen_return ()); +} + + +/* Implement `TARGET_ASM_FUNCTION_BEGIN_EPILOGUE'. */ + +static void +avr_asm_function_begin_epilogue (FILE *file) +{ + fprintf (file, "/* epilogue start */\n"); +} + + +/* Implement `TARGET_CANNOT_MODITY_JUMPS_P'. */ + +static bool +avr_cannot_modify_jumps_p (void) +{ + + /* Naked Functions must not have any instructions after + their epilogue, see PR42240 */ + + if (reload_completed + && cfun->machine + && cfun->machine->is_naked) + { + return true; + } + + return false; +} + + +/* Implement `TARGET_MODE_DEPENDENT_ADDRESS_P'. */ + +static bool +avr_mode_dependent_address_p (const_rtx addr ATTRIBUTE_UNUSED, addr_space_t as) +{ + /* FIXME: Non-generic addresses are not mode-dependent in themselves. + This hook just serves to hack around PR rtl-optimization/52543 by + claiming that non-generic addresses were mode-dependent so that + lower-subreg.c will skip these addresses. lower-subreg.c sets up fake + RTXes to probe SET and MEM costs and assumes that MEM is always in the + generic address space which is not true. */ + + return !ADDR_SPACE_GENERIC_P (as); +} + + +/* Helper function for `avr_legitimate_address_p'. */ + +static inline bool +avr_reg_ok_for_addr_p (rtx reg, addr_space_t as, + RTX_CODE outer_code, bool strict) +{ + return (REG_P (reg) + && (avr_regno_mode_code_ok_for_base_p (REGNO (reg), QImode, + as, outer_code, UNKNOWN) + || (!strict + && REGNO (reg) >= FIRST_PSEUDO_REGISTER))); +} + + +/* Return nonzero if X (an RTX) is a legitimate memory address on the target + machine for a memory operand of mode MODE. */ + +static bool +avr_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + bool ok = CONSTANT_ADDRESS_P (x); + + switch (GET_CODE (x)) + { + case REG: + ok = avr_reg_ok_for_addr_p (x, ADDR_SPACE_GENERIC, + MEM, strict); + + if (strict + && GET_MODE_SIZE (mode) > 4 + && REG_X == REGNO (x)) + { + ok = false; + } + break; + + case POST_INC: + case PRE_DEC: + ok = avr_reg_ok_for_addr_p (XEXP (x, 0), ADDR_SPACE_GENERIC, + GET_CODE (x), strict); + break; + + case PLUS: + { + rtx reg = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + + if (REG_P (reg) + && CONST_INT_P (op1) + && INTVAL (op1) >= 0) + { + bool fit = IN_RANGE (INTVAL (op1), 0, MAX_LD_OFFSET (mode)); + + if (fit) + { + ok = (! strict + || avr_reg_ok_for_addr_p (reg, ADDR_SPACE_GENERIC, + PLUS, strict)); + + if (reg == frame_pointer_rtx + || reg == arg_pointer_rtx) + { + ok = true; + } + } + else if (frame_pointer_needed + && reg == frame_pointer_rtx) + { + ok = true; + } + } + } + break; + + default: + break; + } + + if (avr_log.legitimate_address_p) + { + avr_edump ("\n%?: ret=%d, mode=%m strict=%d " + "reload_completed=%d reload_in_progress=%d %s:", + ok, mode, strict, reload_completed, reload_in_progress, + reg_renumber ? "(reg_renumber)" : ""); + + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1)) + && IN_RANGE (INTVAL (XEXP (x, 1)), 0, MAX_LD_OFFSET (mode)) + && reg_renumber) + { + avr_edump ("(r%d ---> r%d)", REGNO (XEXP (x, 0)), + true_regnum (XEXP (x, 0))); + } + + avr_edump ("\n%r\n", x); + } + + return ok; +} + + +/* Former implementation of TARGET_LEGITIMIZE_ADDRESS, + now only a helper for avr_addr_space_legitimize_address. */ +/* Attempts to replace X with a valid + memory address for an operand of mode MODE */ + +static rtx +avr_legitimize_address (rtx x, rtx oldx, enum machine_mode mode) +{ + bool big_offset_p = false; + + x = oldx; + + if (GET_CODE (oldx) == PLUS + && REG_P (XEXP (oldx, 0))) + { + if (REG_P (XEXP (oldx, 1))) + x = force_reg (GET_MODE (oldx), oldx); + else if (CONST_INT_P (XEXP (oldx, 1))) + { + int offs = INTVAL (XEXP (oldx, 1)); + if (frame_pointer_rtx != XEXP (oldx, 0) + && offs > MAX_LD_OFFSET (mode)) + { + big_offset_p = true; + x = force_reg (GET_MODE (oldx), oldx); + } + } + } + + if (avr_log.legitimize_address) + { + avr_edump ("\n%?: mode=%m\n %r\n", mode, oldx); + + if (x != oldx) + avr_edump (" %s --> %r\n", big_offset_p ? "(big offset)" : "", x); + } + + return x; +} + + +/* Implement `LEGITIMIZE_RELOAD_ADDRESS'. */ +/* This will allow register R26/27 to be used where it is no worse than normal + base pointers R28/29 or R30/31. For example, if base offset is greater + than 63 bytes or for R++ or --R addressing. */ + +rtx +avr_legitimize_reload_address (rtx *px, enum machine_mode mode, + int opnum, int type, int addr_type, + int ind_levels ATTRIBUTE_UNUSED, + rtx (*mk_memloc)(rtx,int)) +{ + rtx x = *px; + + if (avr_log.legitimize_reload_address) + avr_edump ("\n%?:%m %r\n", mode, x); + + if (1 && (GET_CODE (x) == POST_INC + || GET_CODE (x) == PRE_DEC)) + { + push_reload (XEXP (x, 0), XEXP (x, 0), &XEXP (x, 0), &XEXP (x, 0), + POINTER_REGS, GET_MODE (x), GET_MODE (x), 0, 0, + opnum, RELOAD_OTHER); + + if (avr_log.legitimize_reload_address) + avr_edump (" RCLASS.1 = %R\n IN = %r\n OUT = %r\n", + POINTER_REGS, XEXP (x, 0), XEXP (x, 0)); + + return x; + } + + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && 0 == reg_equiv_constant (REGNO (XEXP (x, 0))) + && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) >= 1) + { + bool fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode); + + if (fit) + { + if (reg_equiv_address (REGNO (XEXP (x, 0))) != 0) + { + int regno = REGNO (XEXP (x, 0)); + rtx mem = mk_memloc (x, regno); + + push_reload (XEXP (mem, 0), NULL_RTX, &XEXP (mem, 0), NULL, + POINTER_REGS, Pmode, VOIDmode, 0, 0, + 1, (enum reload_type) addr_type); + + if (avr_log.legitimize_reload_address) + avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n", + POINTER_REGS, XEXP (mem, 0), NULL_RTX); + + push_reload (mem, NULL_RTX, &XEXP (x, 0), NULL, + BASE_POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + + if (avr_log.legitimize_reload_address) + avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n", + BASE_POINTER_REGS, mem, NULL_RTX); + + return x; + } + } + else if (! (frame_pointer_needed + && XEXP (x, 0) == frame_pointer_rtx)) + { + push_reload (x, NULL_RTX, px, NULL, + POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0, + opnum, (enum reload_type) type); + + if (avr_log.legitimize_reload_address) + avr_edump (" RCLASS.3 = %R\n IN = %r\n OUT = %r\n", + POINTER_REGS, x, NULL_RTX); + + return x; + } + } + + return NULL_RTX; +} + + +/* Implement `TARGET_SECONDARY_RELOAD' */ + +static reg_class_t +avr_secondary_reload (bool in_p, rtx x, + reg_class_t reload_class ATTRIBUTE_UNUSED, + enum machine_mode mode, secondary_reload_info *sri) +{ + if (in_p + && MEM_P (x) + && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) + && ADDR_SPACE_MEMX != MEM_ADDR_SPACE (x)) + { + /* For the non-generic 16-bit spaces we need a d-class scratch. */ + + switch (mode) + { + default: + gcc_unreachable(); + + case QImode: sri->icode = CODE_FOR_reload_inqi; break; + case QQmode: sri->icode = CODE_FOR_reload_inqq; break; + case UQQmode: sri->icode = CODE_FOR_reload_inuqq; break; + + case HImode: sri->icode = CODE_FOR_reload_inhi; break; + case HQmode: sri->icode = CODE_FOR_reload_inhq; break; + case HAmode: sri->icode = CODE_FOR_reload_inha; break; + case UHQmode: sri->icode = CODE_FOR_reload_inuhq; break; + case UHAmode: sri->icode = CODE_FOR_reload_inuha; break; + + case PSImode: sri->icode = CODE_FOR_reload_inpsi; break; + + case SImode: sri->icode = CODE_FOR_reload_insi; break; + case SFmode: sri->icode = CODE_FOR_reload_insf; break; + case SQmode: sri->icode = CODE_FOR_reload_insq; break; + case SAmode: sri->icode = CODE_FOR_reload_insa; break; + case USQmode: sri->icode = CODE_FOR_reload_inusq; break; + case USAmode: sri->icode = CODE_FOR_reload_inusa; break; + } + } + + return NO_REGS; +} + + +/* Helper function to print assembler resp. track instruction + sequence lengths. Always return "". + + If PLEN == NULL: + Output assembler code from template TPL with operands supplied + by OPERANDS. This is just forwarding to output_asm_insn. + + If PLEN != NULL: + If N_WORDS >= 0 Add N_WORDS to *PLEN. + If N_WORDS < 0 Set *PLEN to -N_WORDS. + Don't output anything. +*/ + +static const char* +avr_asm_len (const char* tpl, rtx* operands, int* plen, int n_words) +{ + if (NULL == plen) + { + output_asm_insn (tpl, operands); + } + else + { + if (n_words < 0) + *plen = -n_words; + else + *plen += n_words; + } + + return ""; +} + + +/* Return a pointer register name as a string. */ + +static const char* +ptrreg_to_str (int regno) +{ + switch (regno) + { + case REG_X: return "X"; + case REG_Y: return "Y"; + case REG_Z: return "Z"; + default: + output_operand_lossage ("address operand requires constraint for" + " X, Y, or Z register"); + } + return NULL; +} + +/* Return the condition name as a string. + Used in conditional jump constructing */ + +static const char* +cond_string (enum rtx_code code) +{ + switch (code) + { + case NE: + return "ne"; + case EQ: + return "eq"; + case GE: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return "pl"; + else + return "ge"; + case LT: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return "mi"; + else + return "lt"; + case GEU: + return "sh"; + case LTU: + return "lo"; + default: + gcc_unreachable (); + } + + return ""; +} + + +/* Implement `TARGET_PRINT_OPERAND_ADDRESS'. */ +/* Output ADDR to FILE as address. */ + +static void +avr_print_operand_address (FILE *file, rtx addr) +{ + switch (GET_CODE (addr)) + { + case REG: + fprintf (file, ptrreg_to_str (REGNO (addr))); + break; + + case PRE_DEC: + fprintf (file, "-%s", ptrreg_to_str (REGNO (XEXP (addr, 0)))); + break; + + case POST_INC: + fprintf (file, "%s+", ptrreg_to_str (REGNO (XEXP (addr, 0)))); + break; + + default: + if (CONSTANT_ADDRESS_P (addr) + && text_segment_operand (addr, VOIDmode)) + { + rtx x = addr; + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x,1)) == CONST_INT) + { + /* Assembler gs() will implant word address. Make offset + a byte offset inside gs() for assembler. This is + needed because the more logical (constant+gs(sym)) is not + accepted by gas. For 128K and smaller devices this is ok. + For large devices it will create a trampoline to offset + from symbol which may not be what the user really wanted. */ + + fprintf (file, "gs("); + output_addr_const (file, XEXP (x,0)); + fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC ")", + 2 * INTVAL (XEXP (x, 1))); + if (AVR_3_BYTE_PC) + if (warning (0, "pointer offset from symbol maybe incorrect")) + { + output_addr_const (stderr, addr); + fprintf(stderr,"\n"); + } + } + else + { + fprintf (file, "gs("); + output_addr_const (file, addr); + fprintf (file, ")"); + } + } + else + output_addr_const (file, addr); + } +} + + +/* Implement `TARGET_PRINT_OPERAND_PUNCT_VALID_P'. */ + +static bool +avr_print_operand_punct_valid_p (unsigned char code) +{ + return code == '~' || code == '!'; +} + + +/* Implement `TARGET_PRINT_OPERAND'. */ +/* Output X as assembler operand to file FILE. + For a description of supported %-codes, see top of avr.md. */ + +static void +avr_print_operand (FILE *file, rtx x, int code) +{ + int abcd = 0; + + if (code >= 'A' && code <= 'D') + abcd = code - 'A'; + + if (code == '~') + { + if (!AVR_HAVE_JMP_CALL) + fputc ('r', file); + } + else if (code == '!') + { + if (AVR_HAVE_EIJMP_EICALL) + fputc ('e', file); + } + else if (code == 't' + || code == 'T') + { + static int t_regno = -1; + static int t_nbits = -1; + + if (REG_P (x) && t_regno < 0 && code == 'T') + { + t_regno = REGNO (x); + t_nbits = GET_MODE_BITSIZE (GET_MODE (x)); + } + else if (CONST_INT_P (x) && t_regno >= 0 + && IN_RANGE (INTVAL (x), 0, t_nbits - 1)) + { + int bpos = INTVAL (x); + + fprintf (file, "%s", reg_names[t_regno + bpos / 8]); + if (code == 'T') + fprintf (file, ",%d", bpos % 8); + + t_regno = -1; + } + else + fatal_insn ("operands to %T/%t must be reg + const_int:", x); + } + else if (REG_P (x)) + { + if (x == zero_reg_rtx) + fprintf (file, "__zero_reg__"); + else if (code == 'r' && REGNO (x) < 32) + fprintf (file, "%d", (int) REGNO (x)); + else + fprintf (file, reg_names[REGNO (x) + abcd]); + } + else if (CONST_INT_P (x)) + { + HOST_WIDE_INT ival = INTVAL (x); + + if ('i' != code) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival + abcd); + else if (low_io_address_operand (x, VOIDmode) + || high_io_address_operand (x, VOIDmode)) + { + if (AVR_HAVE_RAMPZ && ival == avr_addr.rampz) + fprintf (file, "__RAMPZ__"); + else if (AVR_HAVE_RAMPY && ival == avr_addr.rampy) + fprintf (file, "__RAMPY__"); + else if (AVR_HAVE_RAMPX && ival == avr_addr.rampx) + fprintf (file, "__RAMPX__"); + else if (AVR_HAVE_RAMPD && ival == avr_addr.rampd) + fprintf (file, "__RAMPD__"); + else if (AVR_XMEGA && ival == avr_addr.ccp) + fprintf (file, "__CCP__"); + else if (ival == avr_addr.sreg) fprintf (file, "__SREG__"); + else if (ival == avr_addr.sp_l) fprintf (file, "__SP_L__"); + else if (ival == avr_addr.sp_h) fprintf (file, "__SP_H__"); + else + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, + ival - avr_current_arch->sfr_offset); + } + } + else + fatal_insn ("bad address, not an I/O address:", x); + } + else if (MEM_P (x)) + { + rtx addr = XEXP (x, 0); + + if (code == 'm') + { + if (!CONSTANT_P (addr)) + fatal_insn ("bad address, not a constant:", addr); + /* Assembler template with m-code is data - not progmem section */ + if (text_segment_operand (addr, VOIDmode)) + if (warning (0, "accessing data memory with" + " program memory address")) + { + output_addr_const (stderr, addr); + fprintf(stderr,"\n"); + } + output_addr_const (file, addr); + } + else if (code == 'i') + { + avr_print_operand (file, addr, 'i'); + } + else if (code == 'o') + { + if (GET_CODE (addr) != PLUS) + fatal_insn ("bad address, not (reg+disp):", addr); + + avr_print_operand (file, XEXP (addr, 1), 0); + } + else if (code == 'p' || code == 'r') + { + if (GET_CODE (addr) != POST_INC && GET_CODE (addr) != PRE_DEC) + fatal_insn ("bad address, not post_inc or pre_dec:", addr); + + if (code == 'p') + avr_print_operand_address (file, XEXP (addr, 0)); /* X, Y, Z */ + else + avr_print_operand (file, XEXP (addr, 0), 0); /* r26, r28, r30 */ + } + else if (GET_CODE (addr) == PLUS) + { + avr_print_operand_address (file, XEXP (addr,0)); + if (REGNO (XEXP (addr, 0)) == REG_X) + fatal_insn ("internal compiler error. Bad address:" + ,addr); + fputc ('+', file); + avr_print_operand (file, XEXP (addr,1), code); + } + else + avr_print_operand_address (file, addr); + } + else if (code == 'i') + { + fatal_insn ("bad address, not an I/O address:", x); + } + else if (code == 'x') + { + /* Constant progmem address - like used in jmp or call */ + if (0 == text_segment_operand (x, VOIDmode)) + if (warning (0, "accessing program memory" + " with data memory address")) + { + output_addr_const (stderr, x); + fprintf(stderr,"\n"); + } + /* Use normal symbol for direct address no linker trampoline needed */ + output_addr_const (file, x); + } + else if (CONST_FIXED_P (x)) + { + HOST_WIDE_INT ival = INTVAL (avr_to_int_mode (x)); + if (code != 0) + output_operand_lossage ("Unsupported code '%c' for fixed-point:", + code); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival); + } + else if (GET_CODE (x) == CONST_DOUBLE) + { + long val; + REAL_VALUE_TYPE rv; + if (GET_MODE (x) != SFmode) + fatal_insn ("internal compiler error. Unknown mode:", x); + REAL_VALUE_FROM_CONST_DOUBLE (rv, x); + REAL_VALUE_TO_TARGET_SINGLE (rv, val); + fprintf (file, "0x%lx", val); + } + else if (GET_CODE (x) == CONST_STRING) + fputs (XSTR (x, 0), file); + else if (code == 'j') + fputs (cond_string (GET_CODE (x)), file); + else if (code == 'k') + fputs (cond_string (reverse_condition (GET_CODE (x))), file); + else + avr_print_operand_address (file, x); +} + + +/* Worker function for `NOTICE_UPDATE_CC'. */ +/* Update the condition code in the INSN. */ + +void +avr_notice_update_cc (rtx body ATTRIBUTE_UNUSED, rtx insn) +{ + rtx set; + enum attr_cc cc = get_attr_cc (insn); + + switch (cc) + { + default: + break; + + case CC_PLUS: + case CC_LDI: + { + rtx *op = recog_data.operand; + int len_dummy, icc; + + /* Extract insn's operands. */ + extract_constrain_insn_cached (insn); + + switch (cc) + { + default: + gcc_unreachable(); + + case CC_PLUS: + avr_out_plus (insn, op, &len_dummy, &icc); + cc = (enum attr_cc) icc; + break; + + case CC_LDI: + + cc = (op[1] == CONST0_RTX (GET_MODE (op[0])) + && reg_overlap_mentioned_p (op[0], zero_reg_rtx)) + /* Loading zero-reg with 0 uses CLR and thus clobbers cc0. */ + ? CC_CLOBBER + /* Any other "r,rL" combination does not alter cc0. */ + : CC_NONE; + + break; + } /* inner switch */ + + break; + } + } /* outer swicth */ + + switch (cc) + { + default: + /* Special values like CC_OUT_PLUS from above have been + mapped to "standard" CC_* values so we never come here. */ + + gcc_unreachable(); + break; + + case CC_NONE: + /* Insn does not affect CC at all. */ + break; + + case CC_SET_N: + CC_STATUS_INIT; + break; + + case CC_SET_ZN: + set = single_set (insn); + CC_STATUS_INIT; + if (set) + { + cc_status.flags |= CC_NO_OVERFLOW; + cc_status.value1 = SET_DEST (set); + } + break; + + case CC_SET_CZN: + /* Insn sets the Z,N,C flags of CC to recog_operand[0]. + The V flag may or may not be known but that's ok because + alter_cond will change tests to use EQ/NE. */ + set = single_set (insn); + CC_STATUS_INIT; + if (set) + { + cc_status.value1 = SET_DEST (set); + cc_status.flags |= CC_OVERFLOW_UNUSABLE; + } + break; + + case CC_COMPARE: + set = single_set (insn); + CC_STATUS_INIT; + if (set) + cc_status.value1 = SET_SRC (set); + break; + + case CC_CLOBBER: + /* Insn doesn't leave CC in a usable state. */ + CC_STATUS_INIT; + break; + } +} + +/* Choose mode for jump insn: + 1 - relative jump in range -63 <= x <= 62 ; + 2 - relative jump in range -2046 <= x <= 2045 ; + 3 - absolute jump (only for ATmega[16]03). */ + +int +avr_jump_mode (rtx x, rtx insn) +{ + int dest_addr = INSN_ADDRESSES (INSN_UID (GET_CODE (x) == LABEL_REF + ? XEXP (x, 0) : x)); + int cur_addr = INSN_ADDRESSES (INSN_UID (insn)); + int jump_distance = cur_addr - dest_addr; + + if (-63 <= jump_distance && jump_distance <= 62) + return 1; + else if (-2046 <= jump_distance && jump_distance <= 2045) + return 2; + else if (AVR_HAVE_JMP_CALL) + return 3; + + return 2; +} + +/* Return an AVR condition jump commands. + X is a comparison RTX. + LEN is a number returned by avr_jump_mode function. + If REVERSE nonzero then condition code in X must be reversed. */ + +const char* +ret_cond_branch (rtx x, int len, int reverse) +{ + RTX_CODE cond = reverse ? reverse_condition (GET_CODE (x)) : GET_CODE (x); + + switch (cond) + { + case GT: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return (len == 1 ? ("breq .+2" CR_TAB + "brpl %0") : + len == 2 ? ("breq .+4" CR_TAB + "brmi .+2" CR_TAB + "rjmp %0") : + ("breq .+6" CR_TAB + "brmi .+4" CR_TAB + "jmp %0")); + + else + return (len == 1 ? ("breq .+2" CR_TAB + "brge %0") : + len == 2 ? ("breq .+4" CR_TAB + "brlt .+2" CR_TAB + "rjmp %0") : + ("breq .+6" CR_TAB + "brlt .+4" CR_TAB + "jmp %0")); + case GTU: + return (len == 1 ? ("breq .+2" CR_TAB + "brsh %0") : + len == 2 ? ("breq .+4" CR_TAB + "brlo .+2" CR_TAB + "rjmp %0") : + ("breq .+6" CR_TAB + "brlo .+4" CR_TAB + "jmp %0")); + case LE: + if (cc_prev_status.flags & CC_OVERFLOW_UNUSABLE) + return (len == 1 ? ("breq %0" CR_TAB + "brmi %0") : + len == 2 ? ("breq .+2" CR_TAB + "brpl .+2" CR_TAB + "rjmp %0") : + ("breq .+2" CR_TAB + "brpl .+4" CR_TAB + "jmp %0")); + else + return (len == 1 ? ("breq %0" CR_TAB + "brlt %0") : + len == 2 ? ("breq .+2" CR_TAB + "brge .+2" CR_TAB + "rjmp %0") : + ("breq .+2" CR_TAB + "brge .+4" CR_TAB + "jmp %0")); + case LEU: + return (len == 1 ? ("breq %0" CR_TAB + "brlo %0") : + len == 2 ? ("breq .+2" CR_TAB + "brsh .+2" CR_TAB + "rjmp %0") : + ("breq .+2" CR_TAB + "brsh .+4" CR_TAB + "jmp %0")); + default: + if (reverse) + { + switch (len) + { + case 1: + return "br%k1 %0"; + case 2: + return ("br%j1 .+2" CR_TAB + "rjmp %0"); + default: + return ("br%j1 .+4" CR_TAB + "jmp %0"); + } + } + else + { + switch (len) + { + case 1: + return "br%j1 %0"; + case 2: + return ("br%k1 .+2" CR_TAB + "rjmp %0"); + default: + return ("br%k1 .+4" CR_TAB + "jmp %0"); + } + } + } + return ""; +} + + +/* Worker function for `FINAL_PRESCAN_INSN'. */ +/* Output insn cost for next insn. */ + +void +avr_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED, + int num_operands ATTRIBUTE_UNUSED) +{ + if (avr_log.rtx_costs) + { + rtx set = single_set (insn); + + if (set) + fprintf (asm_out_file, "/* DEBUG: cost = %d. */\n", + set_src_cost (SET_SRC (set), optimize_insn_for_speed_p ())); + else + fprintf (asm_out_file, "/* DEBUG: pattern-cost = %d. */\n", + rtx_cost (PATTERN (insn), INSN, 0, + optimize_insn_for_speed_p())); + } +} + +/* Return 0 if undefined, 1 if always true or always false. */ + +int +avr_simplify_comparison_p (enum machine_mode mode, RTX_CODE op, rtx x) +{ + unsigned int max = (mode == QImode ? 0xff : + mode == HImode ? 0xffff : + mode == PSImode ? 0xffffff : + mode == SImode ? 0xffffffff : 0); + if (max && op && CONST_INT_P (x)) + { + if (unsigned_condition (op) != op) + max >>= 1; + + if (max != (INTVAL (x) & max) + && INTVAL (x) != 0xff) + return 1; + } + return 0; +} + + +/* Worker function for `FUNCTION_ARG_REGNO_P'. */ +/* Returns nonzero if REGNO is the number of a hard + register in which function arguments are sometimes passed. */ + +int +avr_function_arg_regno_p(int r) +{ + return (r >= 8 && r <= 25); +} + + +/* Worker function for `INIT_CUMULATIVE_ARGS'. */ +/* Initializing the variable cum for the state at the beginning + of the argument list. */ + +void +avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname, + tree fndecl ATTRIBUTE_UNUSED) +{ + cum->nregs = 18; + cum->regno = FIRST_CUM_REG; + if (!libname && stdarg_p (fntype)) + cum->nregs = 0; + + /* Assume the calle may be tail called */ + + cfun->machine->sibcall_fails = 0; +} + +/* Returns the number of registers to allocate for a function argument. */ + +static int +avr_num_arg_regs (enum machine_mode mode, const_tree type) +{ + int size; + + if (mode == BLKmode) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + /* Align all function arguments to start in even-numbered registers. + Odd-sized arguments leave holes above them. */ + + return (size + 1) & ~1; +} + + +/* Implement `TARGET_FUNCTION_ARG'. */ +/* Controls whether a function argument is passed + in a register, and which register. */ + +static rtx +avr_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes = avr_num_arg_regs (mode, type); + + if (cum->nregs && bytes <= cum->nregs) + return gen_rtx_REG (mode, cum->regno - bytes); + + return NULL_RTX; +} + + +/* Implement `TARGET_FUNCTION_ARG_ADVANCE'. */ +/* Update the summarizer variable CUM to advance past an argument + in the argument list. */ + +static void +avr_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes = avr_num_arg_regs (mode, type); + + cum->nregs -= bytes; + cum->regno -= bytes; + + /* A parameter is being passed in a call-saved register. As the original + contents of these regs has to be restored before leaving the function, + a function must not pass arguments in call-saved regs in order to get + tail-called. */ + + if (cum->regno >= 8 + && cum->nregs >= 0 + && !call_used_regs[cum->regno]) + { + /* FIXME: We ship info on failing tail-call in struct machine_function. + This uses internals of calls.c:expand_call() and the way args_so_far + is used. targetm.function_ok_for_sibcall() needs to be extended to + pass &args_so_far, too. At present, CUMULATIVE_ARGS is target + dependent so that such an extension is not wanted. */ + + cfun->machine->sibcall_fails = 1; + } + + /* Test if all registers needed by the ABI are actually available. If the + user has fixed a GPR needed to pass an argument, an (implicit) function + call will clobber that fixed register. See PR45099 for an example. */ + + if (cum->regno >= 8 + && cum->nregs >= 0) + { + int regno; + + for (regno = cum->regno; regno < cum->regno + bytes; regno++) + if (fixed_regs[regno]) + warning (0, "fixed register %s used to pass parameter to function", + reg_names[regno]); + } + + if (cum->nregs <= 0) + { + cum->nregs = 0; + cum->regno = FIRST_CUM_REG; + } +} + +/* Implement `TARGET_FUNCTION_OK_FOR_SIBCALL' */ +/* Decide whether we can make a sibling call to a function. DECL is the + declaration of the function being targeted by the call and EXP is the + CALL_EXPR representing the call. */ + +static bool +avr_function_ok_for_sibcall (tree decl_callee, tree exp_callee) +{ + tree fntype_callee; + + /* Tail-calling must fail if callee-saved regs are used to pass + function args. We must not tail-call when `epilogue_restores' + is used. Unfortunately, we cannot tell at this point if that + actually will happen or not, and we cannot step back from + tail-calling. Thus, we inhibit tail-calling with -mcall-prologues. */ + + if (cfun->machine->sibcall_fails + || TARGET_CALL_PROLOGUES) + { + return false; + } + + fntype_callee = TREE_TYPE (CALL_EXPR_FN (exp_callee)); + + if (decl_callee) + { + decl_callee = TREE_TYPE (decl_callee); + } + else + { + decl_callee = fntype_callee; + + while (FUNCTION_TYPE != TREE_CODE (decl_callee) + && METHOD_TYPE != TREE_CODE (decl_callee)) + { + decl_callee = TREE_TYPE (decl_callee); + } + } + + /* Ensure that caller and callee have compatible epilogues */ + + if (cfun->machine->is_interrupt + || cfun->machine->is_signal + || cfun->machine->is_naked + || avr_naked_function_p (decl_callee) + /* FIXME: For OS_task and OS_main, this might be over-conservative. */ + || (avr_OS_task_function_p (decl_callee) + != cfun->machine->is_OS_task) + || (avr_OS_main_function_p (decl_callee) + != cfun->machine->is_OS_main)) + { + return false; + } + + return true; +} + +/*********************************************************************** + Functions for outputting various mov's for a various modes +************************************************************************/ + +/* Return true if a value of mode MODE is read from flash by + __load_* function from libgcc. */ + +bool +avr_load_libgcc_p (rtx op) +{ + enum machine_mode mode = GET_MODE (op); + int n_bytes = GET_MODE_SIZE (mode); + + return (n_bytes > 2 + && !AVR_HAVE_LPMX + && avr_mem_flash_p (op)); +} + +/* Return true if a value of mode MODE is read by __xload_* function. */ + +bool +avr_xload_libgcc_p (enum machine_mode mode) +{ + int n_bytes = GET_MODE_SIZE (mode); + + return (n_bytes > 1 + || avr_current_device->n_flash > 1); +} + + +/* Fixme: This is a hack because secondary reloads don't works as expected. + + Find an unused d-register to be used as scratch in INSN. + EXCLUDE is either NULL_RTX or some register. In the case where EXCLUDE + is a register, skip all possible return values that overlap EXCLUDE. + The policy for the returned register is similar to that of + `reg_unused_after', i.e. the returned register may overlap the SET_DEST + of INSN. + + Return a QImode d-register or NULL_RTX if nothing found. */ + +static rtx +avr_find_unused_d_reg (rtx insn, rtx exclude) +{ + int regno; + bool isr_p = (avr_interrupt_function_p (current_function_decl) + || avr_signal_function_p (current_function_decl)); + + for (regno = 16; regno < 32; regno++) + { + rtx reg = all_regs_rtx[regno]; + + if ((exclude + && reg_overlap_mentioned_p (exclude, reg)) + || fixed_regs[regno]) + { + continue; + } + + /* Try non-live register */ + + if (!df_regs_ever_live_p (regno) + && (TREE_THIS_VOLATILE (current_function_decl) + || cfun->machine->is_OS_task + || cfun->machine->is_OS_main + || (!isr_p && call_used_regs[regno]))) + { + return reg; + } + + /* Any live register can be used if it is unused after. + Prologue/epilogue will care for it as needed. */ + + if (df_regs_ever_live_p (regno) + && reg_unused_after (insn, reg)) + { + return reg; + } + } + + return NULL_RTX; +} + + +/* Helper function for the next function in the case where only restricted + version of LPM instruction is available. */ + +static const char* +avr_out_lpm_no_lpmx (rtx insn, rtx *xop, int *plen) +{ + rtx dest = xop[0]; + rtx addr = xop[1]; + int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); + int regno_dest; + + regno_dest = REGNO (dest); + + /* The implicit target register of LPM. */ + xop[3] = lpm_reg_rtx; + + switch (GET_CODE (addr)) + { + default: + gcc_unreachable(); + + case REG: + + gcc_assert (REG_Z == REGNO (addr)); + + switch (n_bytes) + { + default: + gcc_unreachable(); + + case 1: + avr_asm_len ("%4lpm", xop, plen, 1); + + if (regno_dest != LPM_REGNO) + avr_asm_len ("mov %0,%3", xop, plen, 1); + + return ""; + + case 2: + if (REGNO (dest) == REG_Z) + return avr_asm_len ("%4lpm" CR_TAB + "push %3" CR_TAB + "adiw %2,1" CR_TAB + "%4lpm" CR_TAB + "mov %B0,%3" CR_TAB + "pop %A0", xop, plen, 6); + + avr_asm_len ("%4lpm" CR_TAB + "mov %A0,%3" CR_TAB + "adiw %2,1" CR_TAB + "%4lpm" CR_TAB + "mov %B0,%3", xop, plen, 5); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,1", xop, plen, 1); + + break; /* 2 */ + } + + break; /* REG */ + + case POST_INC: + + gcc_assert (REG_Z == REGNO (XEXP (addr, 0)) + && n_bytes <= 4); + + if (regno_dest == LPM_REGNO) + avr_asm_len ("%4lpm" CR_TAB + "adiw %2,1", xop, plen, 2); + else + avr_asm_len ("%4lpm" CR_TAB + "mov %A0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + if (n_bytes >= 2) + avr_asm_len ("%4lpm" CR_TAB + "mov %B0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + if (n_bytes >= 3) + avr_asm_len ("%4lpm" CR_TAB + "mov %C0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + if (n_bytes >= 4) + avr_asm_len ("%4lpm" CR_TAB + "mov %D0,%3" CR_TAB + "adiw %2,1", xop, plen, 3); + + break; /* POST_INC */ + + } /* switch CODE (addr) */ + + return ""; +} + + +/* If PLEN == NULL: Ouput instructions to load a value from a memory location + OP[1] in AS1 to register OP[0]. + If PLEN != 0 set *PLEN to the length in words of the instruction sequence. + Return "". */ + +const char* +avr_out_lpm (rtx insn, rtx *op, int *plen) +{ + rtx xop[7]; + rtx dest = op[0]; + rtx src = SET_SRC (single_set (insn)); + rtx addr; + int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); + int segment; + RTX_CODE code; + addr_space_t as = MEM_ADDR_SPACE (src); + + if (plen) + *plen = 0; + + if (MEM_P (dest)) + { + warning (0, "writing to address space %qs not supported", + avr_addrspace[MEM_ADDR_SPACE (dest)].name); + + return ""; + } + + addr = XEXP (src, 0); + code = GET_CODE (addr); + + gcc_assert (REG_P (dest)); + gcc_assert (REG == code || POST_INC == code); + + xop[0] = dest; + xop[1] = addr; + xop[2] = lpm_addr_reg_rtx; + xop[4] = xstring_empty; + xop[5] = tmp_reg_rtx; + xop[6] = XEXP (rampz_rtx, 0); + + segment = avr_addrspace[as].segment; + + /* Set RAMPZ as needed. */ + + if (segment) + { + xop[4] = GEN_INT (segment); + xop[3] = avr_find_unused_d_reg (insn, lpm_addr_reg_rtx); + + if (xop[3] != NULL_RTX) + { + avr_asm_len ("ldi %3,%4" CR_TAB + "out %i6,%3", xop, plen, 2); + } + else if (segment == 1) + { + avr_asm_len ("clr %5" CR_TAB + "inc %5" CR_TAB + "out %i6,%5", xop, plen, 3); + } + else + { + avr_asm_len ("mov %5,%2" CR_TAB + "ldi %2,%4" CR_TAB + "out %i6,%2" CR_TAB + "mov %2,%5", xop, plen, 4); + } + + xop[4] = xstring_e; + + if (!AVR_HAVE_ELPMX) + return avr_out_lpm_no_lpmx (insn, xop, plen); + } + else if (!AVR_HAVE_LPMX) + { + return avr_out_lpm_no_lpmx (insn, xop, plen); + } + + /* We have [E]LPMX: Output reading from Flash the comfortable way. */ + + switch (GET_CODE (addr)) + { + default: + gcc_unreachable(); + + case REG: + + gcc_assert (REG_Z == REGNO (addr)); + + switch (n_bytes) + { + default: + gcc_unreachable(); + + case 1: + return avr_asm_len ("%4lpm %0,%a2", xop, plen, 1); + + case 2: + if (REGNO (dest) == REG_Z) + return avr_asm_len ("%4lpm %5,%a2+" CR_TAB + "%4lpm %B0,%a2" CR_TAB + "mov %A0,%5", xop, plen, 3); + else + { + avr_asm_len ("%4lpm %A0,%a2+" CR_TAB + "%4lpm %B0,%a2", xop, plen, 2); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,1", xop, plen, 1); + } + + break; /* 2 */ + + case 3: + + avr_asm_len ("%4lpm %A0,%a2+" CR_TAB + "%4lpm %B0,%a2+" CR_TAB + "%4lpm %C0,%a2", xop, plen, 3); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,2", xop, plen, 1); + + break; /* 3 */ + + case 4: + + avr_asm_len ("%4lpm %A0,%a2+" CR_TAB + "%4lpm %B0,%a2+", xop, plen, 2); + + if (REGNO (dest) == REG_Z - 2) + return avr_asm_len ("%4lpm %5,%a2+" CR_TAB + "%4lpm %C0,%a2" CR_TAB + "mov %D0,%5", xop, plen, 3); + else + { + avr_asm_len ("%4lpm %C0,%a2+" CR_TAB + "%4lpm %D0,%a2", xop, plen, 2); + + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,3", xop, plen, 1); + } + + break; /* 4 */ + } /* n_bytes */ + + break; /* REG */ + + case POST_INC: + + gcc_assert (REG_Z == REGNO (XEXP (addr, 0)) + && n_bytes <= 4); + + avr_asm_len ("%4lpm %A0,%a2+", xop, plen, 1); + if (n_bytes >= 2) avr_asm_len ("%4lpm %B0,%a2+", xop, plen, 1); + if (n_bytes >= 3) avr_asm_len ("%4lpm %C0,%a2+", xop, plen, 1); + if (n_bytes >= 4) avr_asm_len ("%4lpm %D0,%a2+", xop, plen, 1); + + break; /* POST_INC */ + + } /* switch CODE (addr) */ + + if (xop[4] == xstring_e && AVR_HAVE_RAMPD) + { + /* Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM. */ + + xop[0] = zero_reg_rtx; + avr_asm_len ("out %i6,%0", xop, plen, 1); + } + + return ""; +} + + +/* Worker function for xload_8 insn. */ + +const char* +avr_out_xload (rtx insn ATTRIBUTE_UNUSED, rtx *op, int *plen) +{ + rtx xop[4]; + + xop[0] = op[0]; + xop[1] = op[1]; + xop[2] = lpm_addr_reg_rtx; + xop[3] = AVR_HAVE_LPMX ? op[0] : lpm_reg_rtx; + + avr_asm_len (AVR_HAVE_LPMX ? "lpm %3,%a2" : "lpm", xop, plen, -1); + + avr_asm_len ("sbrc %1,7" CR_TAB + "ld %3,%a2", xop, plen, 2); + + if (REGNO (xop[0]) != REGNO (xop[3])) + avr_asm_len ("mov %0,%3", xop, plen, 1); + + return ""; +} + + +const char* +output_movqi (rtx insn, rtx operands[], int *plen) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + + if (avr_mem_flash_p (src) + || avr_mem_flash_p (dest)) + { + return avr_out_lpm (insn, operands, plen); + } + + gcc_assert (1 == GET_MODE_SIZE (GET_MODE (dest))); + + if (REG_P (dest)) + { + if (REG_P (src)) /* mov r,r */ + { + if (test_hard_reg_class (STACK_REG, dest)) + return avr_asm_len ("out %0,%1", operands, plen, -1); + else if (test_hard_reg_class (STACK_REG, src)) + return avr_asm_len ("in %0,%1", operands, plen, -1); + + return avr_asm_len ("mov %0,%1", operands, plen, -1); + } + else if (CONSTANT_P (src)) + { + output_reload_in_const (operands, NULL_RTX, plen, false); + return ""; + } + else if (MEM_P (src)) + return out_movqi_r_mr (insn, operands, plen); /* mov r,m */ + } + else if (MEM_P (dest)) + { + rtx xop[2]; + + xop[0] = dest; + xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src; + + return out_movqi_mr_r (insn, xop, plen); + } + + return ""; +} + + +const char * +output_movhi (rtx insn, rtx xop[], int *plen) +{ + rtx dest = xop[0]; + rtx src = xop[1]; + + gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2); + + if (avr_mem_flash_p (src) + || avr_mem_flash_p (dest)) + { + return avr_out_lpm (insn, xop, plen); + } + + gcc_assert (2 == GET_MODE_SIZE (GET_MODE (dest))); + + if (REG_P (dest)) + { + if (REG_P (src)) /* mov r,r */ + { + if (test_hard_reg_class (STACK_REG, dest)) + { + if (AVR_HAVE_8BIT_SP) + return avr_asm_len ("out __SP_L__,%A1", xop, plen, -1); + + if (AVR_XMEGA) + return avr_asm_len ("out __SP_L__,%A1" CR_TAB + "out __SP_H__,%B1", xop, plen, -2); + + /* Use simple load of SP if no interrupts are used. */ + + return TARGET_NO_INTERRUPTS + ? avr_asm_len ("out __SP_H__,%B1" CR_TAB + "out __SP_L__,%A1", xop, plen, -2) + : avr_asm_len ("in __tmp_reg__,__SREG__" CR_TAB + "cli" CR_TAB + "out __SP_H__,%B1" CR_TAB + "out __SREG__,__tmp_reg__" CR_TAB + "out __SP_L__,%A1", xop, plen, -5); + } + else if (test_hard_reg_class (STACK_REG, src)) + { + return !AVR_HAVE_SPH + ? avr_asm_len ("in %A0,__SP_L__" CR_TAB + "clr %B0", xop, plen, -2) + + : avr_asm_len ("in %A0,__SP_L__" CR_TAB + "in %B0,__SP_H__", xop, plen, -2); + } + + return AVR_HAVE_MOVW + ? avr_asm_len ("movw %0,%1", xop, plen, -1) + + : avr_asm_len ("mov %A0,%A1" CR_TAB + "mov %B0,%B1", xop, plen, -2); + } /* REG_P (src) */ + else if (CONSTANT_P (src)) + { + return output_reload_inhi (xop, NULL, plen); + } + else if (MEM_P (src)) + { + return out_movhi_r_mr (insn, xop, plen); /* mov r,m */ + } + } + else if (MEM_P (dest)) + { + rtx xop[2]; + + xop[0] = dest; + xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src; + + return out_movhi_mr_r (insn, xop, plen); + } + + fatal_insn ("invalid insn:", insn); + + return ""; +} + +static const char* +out_movqi_r_mr (rtx insn, rtx op[], int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx x = XEXP (src, 0); + + if (CONSTANT_ADDRESS_P (x)) + { + return optimize > 0 && io_address_operand (x, QImode) + ? avr_asm_len ("in %0,%i1", op, plen, -1) + : avr_asm_len ("lds %0,%m1", op, plen, -2); + } + else if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))) + { + /* memory access by reg+disp */ + + int disp = INTVAL (XEXP (x, 1)); + + if (disp - GET_MODE_SIZE (GET_MODE (src)) >= 63) + { + if (REGNO (XEXP (x, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))) + return avr_asm_len ("adiw r28,%o1-63" CR_TAB + "ldd %0,Y+63" CR_TAB + "sbiw r28,%o1-63", op, plen, -3); + + return avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB + "sbci r29,hi8(-%o1)" CR_TAB + "ld %0,Y" CR_TAB + "subi r28,lo8(%o1)" CR_TAB + "sbci r29,hi8(%o1)", op, plen, -5); + } + else if (REGNO (XEXP (x, 0)) == REG_X) + { + /* This is a paranoid case LEGITIMIZE_RELOAD_ADDRESS must exclude + it but I have this situation with extremal optimizing options. */ + + avr_asm_len ("adiw r26,%o1" CR_TAB + "ld %0,X", op, plen, -2); + + if (!reg_overlap_mentioned_p (dest, XEXP (x,0)) + && !reg_unused_after (insn, XEXP (x,0))) + { + avr_asm_len ("sbiw r26,%o1", op, plen, 1); + } + + return ""; + } + + return avr_asm_len ("ldd %0,%1", op, plen, -1); + } + + return avr_asm_len ("ld %0,%1", op, plen, -1); +} + +static const char* +out_movhi_r_mr (rtx insn, rtx op[], int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (src, 0); + int reg_dest = true_regnum (dest); + int reg_base = true_regnum (base); + /* "volatile" forces reading low byte first, even if less efficient, + for correct operation with 16-bit I/O registers. */ + int mem_volatile_p = MEM_VOLATILE_P (src); + + if (reg_base > 0) + { + if (reg_dest == reg_base) /* R = (R) */ + return avr_asm_len ("ld __tmp_reg__,%1+" CR_TAB + "ld %B0,%1" CR_TAB + "mov %A0,__tmp_reg__", op, plen, -3); + + if (reg_base != REG_X) + return avr_asm_len ("ld %A0,%1" CR_TAB + "ldd %B0,%1+1", op, plen, -2); + + avr_asm_len ("ld %A0,X+" CR_TAB + "ld %B0,X", op, plen, -2); + + if (!reg_unused_after (insn, base)) + avr_asm_len ("sbiw r26,1", op, plen, 1); + + return ""; + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + int reg_base = true_regnum (XEXP (base, 0)); + + if (disp > MAX_LD_OFFSET (GET_MODE (src))) + { + if (REGNO (XEXP (base, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + return disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)) + ? avr_asm_len ("adiw r28,%o1-62" CR_TAB + "ldd %A0,Y+62" CR_TAB + "ldd %B0,Y+63" CR_TAB + "sbiw r28,%o1-62", op, plen, -4) + + : avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB + "sbci r29,hi8(-%o1)" CR_TAB + "ld %A0,Y" CR_TAB + "ldd %B0,Y+1" CR_TAB + "subi r28,lo8(%o1)" CR_TAB + "sbci r29,hi8(%o1)", op, plen, -6); + } + + /* This is a paranoid case. LEGITIMIZE_RELOAD_ADDRESS must exclude + it but I have this situation with extremal + optimization options. */ + + if (reg_base == REG_X) + return reg_base == reg_dest + ? avr_asm_len ("adiw r26,%o1" CR_TAB + "ld __tmp_reg__,X+" CR_TAB + "ld %B0,X" CR_TAB + "mov %A0,__tmp_reg__", op, plen, -4) + + : avr_asm_len ("adiw r26,%o1" CR_TAB + "ld %A0,X+" CR_TAB + "ld %B0,X" CR_TAB + "sbiw r26,%o1+1", op, plen, -4); + + return reg_base == reg_dest + ? avr_asm_len ("ldd __tmp_reg__,%A1" CR_TAB + "ldd %B0,%B1" CR_TAB + "mov %A0,__tmp_reg__", op, plen, -3) + + : avr_asm_len ("ldd %A0,%A1" CR_TAB + "ldd %B0,%B1", op, plen, -2); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + { + if (reg_overlap_mentioned_p (dest, XEXP (base, 0))) + fatal_insn ("incorrect insn:", insn); + + if (!mem_volatile_p) + return avr_asm_len ("ld %B0,%1" CR_TAB + "ld %A0,%1", op, plen, -2); + + return REGNO (XEXP (base, 0)) == REG_X + ? avr_asm_len ("sbiw r26,2" CR_TAB + "ld %A0,X+" CR_TAB + "ld %B0,X" CR_TAB + "sbiw r26,1", op, plen, -4) + + : avr_asm_len ("sbiw %r1,2" CR_TAB + "ld %A0,%p1" CR_TAB + "ldd %B0,%p1+1", op, plen, -3); + } + else if (GET_CODE (base) == POST_INC) /* (R++) */ + { + if (reg_overlap_mentioned_p (dest, XEXP (base, 0))) + fatal_insn ("incorrect insn:", insn); + + return avr_asm_len ("ld %A0,%1" CR_TAB + "ld %B0,%1", op, plen, -2); + } + else if (CONSTANT_ADDRESS_P (base)) + { + return optimize > 0 && io_address_operand (base, HImode) + ? avr_asm_len ("in %A0,%i1" CR_TAB + "in %B0,%i1+1", op, plen, -2) + + : avr_asm_len ("lds %A0,%m1" CR_TAB + "lds %B0,%m1+1", op, plen, -4); + } + + fatal_insn ("unknown move insn:",insn); + return ""; +} + +static const char* +out_movsi_r_mr (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (src, 0); + int reg_dest = true_regnum (dest); + int reg_base = true_regnum (base); + int tmp; + + if (!l) + l = &tmp; + + if (reg_base > 0) + { + if (reg_base == REG_X) /* (R26) */ + { + if (reg_dest == REG_X) + /* "ld r26,-X" is undefined */ + return *l=7, ("adiw r26,3" CR_TAB + "ld r29,X" CR_TAB + "ld r28,-X" CR_TAB + "ld __tmp_reg__,-X" CR_TAB + "sbiw r26,1" CR_TAB + "ld r26,X" CR_TAB + "mov r27,__tmp_reg__"); + else if (reg_dest == REG_X - 2) + return *l=5, ("ld %A0,X+" CR_TAB + "ld %B0,X+" CR_TAB + "ld __tmp_reg__,X+" CR_TAB + "ld %D0,X" CR_TAB + "mov %C0,__tmp_reg__"); + else if (reg_unused_after (insn, base)) + return *l=4, ("ld %A0,X+" CR_TAB + "ld %B0,X+" CR_TAB + "ld %C0,X+" CR_TAB + "ld %D0,X"); + else + return *l=5, ("ld %A0,X+" CR_TAB + "ld %B0,X+" CR_TAB + "ld %C0,X+" CR_TAB + "ld %D0,X" CR_TAB + "sbiw r26,3"); + } + else + { + if (reg_dest == reg_base) + return *l=5, ("ldd %D0,%1+3" CR_TAB + "ldd %C0,%1+2" CR_TAB + "ldd __tmp_reg__,%1+1" CR_TAB + "ld %A0,%1" CR_TAB + "mov %B0,__tmp_reg__"); + else if (reg_base == reg_dest + 2) + return *l=5, ("ld %A0,%1" CR_TAB + "ldd %B0,%1+1" CR_TAB + "ldd __tmp_reg__,%1+2" CR_TAB + "ldd %D0,%1+3" CR_TAB + "mov %C0,__tmp_reg__"); + else + return *l=4, ("ld %A0,%1" CR_TAB + "ldd %B0,%1+1" CR_TAB + "ldd %C0,%1+2" CR_TAB + "ldd %D0,%1+3"); + } + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + + if (disp > MAX_LD_OFFSET (GET_MODE (src))) + { + if (REGNO (XEXP (base, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))) + return *l = 6, ("adiw r28,%o1-60" CR_TAB + "ldd %A0,Y+60" CR_TAB + "ldd %B0,Y+61" CR_TAB + "ldd %C0,Y+62" CR_TAB + "ldd %D0,Y+63" CR_TAB + "sbiw r28,%o1-60"); + + return *l = 8, ("subi r28,lo8(-%o1)" CR_TAB + "sbci r29,hi8(-%o1)" CR_TAB + "ld %A0,Y" CR_TAB + "ldd %B0,Y+1" CR_TAB + "ldd %C0,Y+2" CR_TAB + "ldd %D0,Y+3" CR_TAB + "subi r28,lo8(%o1)" CR_TAB + "sbci r29,hi8(%o1)"); + } + + reg_base = true_regnum (XEXP (base, 0)); + if (reg_base == REG_X) + { + /* R = (X + d) */ + if (reg_dest == REG_X) + { + *l = 7; + /* "ld r26,-X" is undefined */ + return ("adiw r26,%o1+3" CR_TAB + "ld r29,X" CR_TAB + "ld r28,-X" CR_TAB + "ld __tmp_reg__,-X" CR_TAB + "sbiw r26,1" CR_TAB + "ld r26,X" CR_TAB + "mov r27,__tmp_reg__"); + } + *l = 6; + if (reg_dest == REG_X - 2) + return ("adiw r26,%o1" CR_TAB + "ld r24,X+" CR_TAB + "ld r25,X+" CR_TAB + "ld __tmp_reg__,X+" CR_TAB + "ld r27,X" CR_TAB + "mov r26,__tmp_reg__"); + + return ("adiw r26,%o1" CR_TAB + "ld %A0,X+" CR_TAB + "ld %B0,X+" CR_TAB + "ld %C0,X+" CR_TAB + "ld %D0,X" CR_TAB + "sbiw r26,%o1+3"); + } + if (reg_dest == reg_base) + return *l=5, ("ldd %D0,%D1" CR_TAB + "ldd %C0,%C1" CR_TAB + "ldd __tmp_reg__,%B1" CR_TAB + "ldd %A0,%A1" CR_TAB + "mov %B0,__tmp_reg__"); + else if (reg_dest == reg_base - 2) + return *l=5, ("ldd %A0,%A1" CR_TAB + "ldd %B0,%B1" CR_TAB + "ldd __tmp_reg__,%C1" CR_TAB + "ldd %D0,%D1" CR_TAB + "mov %C0,__tmp_reg__"); + return *l=4, ("ldd %A0,%A1" CR_TAB + "ldd %B0,%B1" CR_TAB + "ldd %C0,%C1" CR_TAB + "ldd %D0,%D1"); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return *l=4, ("ld %D0,%1" CR_TAB + "ld %C0,%1" CR_TAB + "ld %B0,%1" CR_TAB + "ld %A0,%1"); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + return *l=4, ("ld %A0,%1" CR_TAB + "ld %B0,%1" CR_TAB + "ld %C0,%1" CR_TAB + "ld %D0,%1"); + else if (CONSTANT_ADDRESS_P (base)) + return *l=8, ("lds %A0,%m1" CR_TAB + "lds %B0,%m1+1" CR_TAB + "lds %C0,%m1+2" CR_TAB + "lds %D0,%m1+3"); + + fatal_insn ("unknown move insn:",insn); + return ""; +} + +static const char* +out_movsi_mr_r (rtx insn, rtx op[], int *l) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (dest, 0); + int reg_base = true_regnum (base); + int reg_src = true_regnum (src); + int tmp; + + if (!l) + l = &tmp; + + if (CONSTANT_ADDRESS_P (base)) + return *l=8,("sts %m0,%A1" CR_TAB + "sts %m0+1,%B1" CR_TAB + "sts %m0+2,%C1" CR_TAB + "sts %m0+3,%D1"); + if (reg_base > 0) /* (r) */ + { + if (reg_base == REG_X) /* (R26) */ + { + if (reg_src == REG_X) + { + /* "st X+,r26" is undefined */ + if (reg_unused_after (insn, base)) + return *l=6, ("mov __tmp_reg__,r27" CR_TAB + "st X,r26" CR_TAB + "adiw r26,1" CR_TAB + "st X+,__tmp_reg__" CR_TAB + "st X+,r28" CR_TAB + "st X,r29"); + else + return *l=7, ("mov __tmp_reg__,r27" CR_TAB + "st X,r26" CR_TAB + "adiw r26,1" CR_TAB + "st X+,__tmp_reg__" CR_TAB + "st X+,r28" CR_TAB + "st X,r29" CR_TAB + "sbiw r26,3"); + } + else if (reg_base == reg_src + 2) + { + if (reg_unused_after (insn, base)) + return *l=7, ("mov __zero_reg__,%C1" CR_TAB + "mov __tmp_reg__,%D1" CR_TAB + "st %0+,%A1" CR_TAB + "st %0+,%B1" CR_TAB + "st %0+,__zero_reg__" CR_TAB + "st %0,__tmp_reg__" CR_TAB + "clr __zero_reg__"); + else + return *l=8, ("mov __zero_reg__,%C1" CR_TAB + "mov __tmp_reg__,%D1" CR_TAB + "st %0+,%A1" CR_TAB + "st %0+,%B1" CR_TAB + "st %0+,__zero_reg__" CR_TAB + "st %0,__tmp_reg__" CR_TAB + "clr __zero_reg__" CR_TAB + "sbiw r26,3"); + } + return *l=5, ("st %0+,%A1" CR_TAB + "st %0+,%B1" CR_TAB + "st %0+,%C1" CR_TAB + "st %0,%D1" CR_TAB + "sbiw r26,3"); + } + else + return *l=4, ("st %0,%A1" CR_TAB + "std %0+1,%B1" CR_TAB + "std %0+2,%C1" CR_TAB + "std %0+3,%D1"); + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + reg_base = REGNO (XEXP (base, 0)); + if (disp > MAX_LD_OFFSET (GET_MODE (dest))) + { + if (reg_base != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))) + return *l = 6, ("adiw r28,%o0-60" CR_TAB + "std Y+60,%A1" CR_TAB + "std Y+61,%B1" CR_TAB + "std Y+62,%C1" CR_TAB + "std Y+63,%D1" CR_TAB + "sbiw r28,%o0-60"); + + return *l = 8, ("subi r28,lo8(-%o0)" CR_TAB + "sbci r29,hi8(-%o0)" CR_TAB + "st Y,%A1" CR_TAB + "std Y+1,%B1" CR_TAB + "std Y+2,%C1" CR_TAB + "std Y+3,%D1" CR_TAB + "subi r28,lo8(%o0)" CR_TAB + "sbci r29,hi8(%o0)"); + } + if (reg_base == REG_X) + { + /* (X + d) = R */ + if (reg_src == REG_X) + { + *l = 9; + return ("mov __tmp_reg__,r26" CR_TAB + "mov __zero_reg__,r27" CR_TAB + "adiw r26,%o0" CR_TAB + "st X+,__tmp_reg__" CR_TAB + "st X+,__zero_reg__" CR_TAB + "st X+,r28" CR_TAB + "st X,r29" CR_TAB + "clr __zero_reg__" CR_TAB + "sbiw r26,%o0+3"); + } + else if (reg_src == REG_X - 2) + { + *l = 9; + return ("mov __tmp_reg__,r26" CR_TAB + "mov __zero_reg__,r27" CR_TAB + "adiw r26,%o0" CR_TAB + "st X+,r24" CR_TAB + "st X+,r25" CR_TAB + "st X+,__tmp_reg__" CR_TAB + "st X,__zero_reg__" CR_TAB + "clr __zero_reg__" CR_TAB + "sbiw r26,%o0+3"); + } + *l = 6; + return ("adiw r26,%o0" CR_TAB + "st X+,%A1" CR_TAB + "st X+,%B1" CR_TAB + "st X+,%C1" CR_TAB + "st X,%D1" CR_TAB + "sbiw r26,%o0+3"); + } + return *l=4, ("std %A0,%A1" CR_TAB + "std %B0,%B1" CR_TAB + "std %C0,%C1" CR_TAB + "std %D0,%D1"); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return *l=4, ("st %0,%D1" CR_TAB + "st %0,%C1" CR_TAB + "st %0,%B1" CR_TAB + "st %0,%A1"); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + return *l=4, ("st %0,%A1" CR_TAB + "st %0,%B1" CR_TAB + "st %0,%C1" CR_TAB + "st %0,%D1"); + fatal_insn ("unknown move insn:",insn); + return ""; +} + +const char * +output_movsisf (rtx insn, rtx operands[], int *l) +{ + int dummy; + rtx dest = operands[0]; + rtx src = operands[1]; + int *real_l = l; + + if (avr_mem_flash_p (src) + || avr_mem_flash_p (dest)) + { + return avr_out_lpm (insn, operands, real_l); + } + + if (!l) + l = &dummy; + + gcc_assert (4 == GET_MODE_SIZE (GET_MODE (dest))); + if (REG_P (dest)) + { + if (REG_P (src)) /* mov r,r */ + { + if (true_regnum (dest) > true_regnum (src)) + { + if (AVR_HAVE_MOVW) + { + *l = 2; + return ("movw %C0,%C1" CR_TAB + "movw %A0,%A1"); + } + *l = 4; + return ("mov %D0,%D1" CR_TAB + "mov %C0,%C1" CR_TAB + "mov %B0,%B1" CR_TAB + "mov %A0,%A1"); + } + else + { + if (AVR_HAVE_MOVW) + { + *l = 2; + return ("movw %A0,%A1" CR_TAB + "movw %C0,%C1"); + } + *l = 4; + return ("mov %A0,%A1" CR_TAB + "mov %B0,%B1" CR_TAB + "mov %C0,%C1" CR_TAB + "mov %D0,%D1"); + } + } + else if (CONSTANT_P (src)) + { + return output_reload_insisf (operands, NULL_RTX, real_l); + } + else if (MEM_P (src)) + return out_movsi_r_mr (insn, operands, real_l); /* mov r,m */ + } + else if (MEM_P (dest)) + { + const char *templ; + + if (src == CONST0_RTX (GET_MODE (dest))) + operands[1] = zero_reg_rtx; + + templ = out_movsi_mr_r (insn, operands, real_l); + + if (!real_l) + output_asm_insn (templ, operands); + + operands[1] = src; + return ""; + } + fatal_insn ("invalid insn:", insn); + return ""; +} + + +/* Handle loads of 24-bit types from memory to register. */ + +static const char* +avr_out_load_psi (rtx insn, rtx *op, int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (src, 0); + int reg_dest = true_regnum (dest); + int reg_base = true_regnum (base); + + if (reg_base > 0) + { + if (reg_base == REG_X) /* (R26) */ + { + if (reg_dest == REG_X) + /* "ld r26,-X" is undefined */ + return avr_asm_len ("adiw r26,2" CR_TAB + "ld r28,X" CR_TAB + "ld __tmp_reg__,-X" CR_TAB + "sbiw r26,1" CR_TAB + "ld r26,X" CR_TAB + "mov r27,__tmp_reg__", op, plen, -6); + else + { + avr_asm_len ("ld %A0,X+" CR_TAB + "ld %B0,X+" CR_TAB + "ld %C0,X", op, plen, -3); + + if (reg_dest != REG_X - 2 + && !reg_unused_after (insn, base)) + { + avr_asm_len ("sbiw r26,2", op, plen, 1); + } + + return ""; + } + } + else /* reg_base != REG_X */ + { + if (reg_dest == reg_base) + return avr_asm_len ("ldd %C0,%1+2" CR_TAB + "ldd __tmp_reg__,%1+1" CR_TAB + "ld %A0,%1" CR_TAB + "mov %B0,__tmp_reg__", op, plen, -4); + else + return avr_asm_len ("ld %A0,%1" CR_TAB + "ldd %B0,%1+1" CR_TAB + "ldd %C0,%1+2", op, plen, -3); + } + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + + if (disp > MAX_LD_OFFSET (GET_MODE (src))) + { + if (REGNO (XEXP (base, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src))) + return avr_asm_len ("adiw r28,%o1-61" CR_TAB + "ldd %A0,Y+61" CR_TAB + "ldd %B0,Y+62" CR_TAB + "ldd %C0,Y+63" CR_TAB + "sbiw r28,%o1-61", op, plen, -5); + + return avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB + "sbci r29,hi8(-%o1)" CR_TAB + "ld %A0,Y" CR_TAB + "ldd %B0,Y+1" CR_TAB + "ldd %C0,Y+2" CR_TAB + "subi r28,lo8(%o1)" CR_TAB + "sbci r29,hi8(%o1)", op, plen, -7); + } + + reg_base = true_regnum (XEXP (base, 0)); + if (reg_base == REG_X) + { + /* R = (X + d) */ + if (reg_dest == REG_X) + { + /* "ld r26,-X" is undefined */ + return avr_asm_len ("adiw r26,%o1+2" CR_TAB + "ld r28,X" CR_TAB + "ld __tmp_reg__,-X" CR_TAB + "sbiw r26,1" CR_TAB + "ld r26,X" CR_TAB + "mov r27,__tmp_reg__", op, plen, -6); + } + + avr_asm_len ("adiw r26,%o1" CR_TAB + "ld %A0,X+" CR_TAB + "ld %B0,X+" CR_TAB + "ld %C0,X", op, plen, -4); + + if (reg_dest != REG_W + && !reg_unused_after (insn, XEXP (base, 0))) + avr_asm_len ("sbiw r26,%o1+2", op, plen, 1); + + return ""; + } + + if (reg_dest == reg_base) + return avr_asm_len ("ldd %C0,%C1" CR_TAB + "ldd __tmp_reg__,%B1" CR_TAB + "ldd %A0,%A1" CR_TAB + "mov %B0,__tmp_reg__", op, plen, -4); + + return avr_asm_len ("ldd %A0,%A1" CR_TAB + "ldd %B0,%B1" CR_TAB + "ldd %C0,%C1", op, plen, -3); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return avr_asm_len ("ld %C0,%1" CR_TAB + "ld %B0,%1" CR_TAB + "ld %A0,%1", op, plen, -3); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + return avr_asm_len ("ld %A0,%1" CR_TAB + "ld %B0,%1" CR_TAB + "ld %C0,%1", op, plen, -3); + + else if (CONSTANT_ADDRESS_P (base)) + return avr_asm_len ("lds %A0,%m1" CR_TAB + "lds %B0,%m1+1" CR_TAB + "lds %C0,%m1+2", op, plen , -6); + + fatal_insn ("unknown move insn:",insn); + return ""; +} + +/* Handle store of 24-bit type from register or zero to memory. */ + +static const char* +avr_out_store_psi (rtx insn, rtx *op, int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (dest, 0); + int reg_base = true_regnum (base); + + if (CONSTANT_ADDRESS_P (base)) + return avr_asm_len ("sts %m0,%A1" CR_TAB + "sts %m0+1,%B1" CR_TAB + "sts %m0+2,%C1", op, plen, -6); + + if (reg_base > 0) /* (r) */ + { + if (reg_base == REG_X) /* (R26) */ + { + gcc_assert (!reg_overlap_mentioned_p (base, src)); + + avr_asm_len ("st %0+,%A1" CR_TAB + "st %0+,%B1" CR_TAB + "st %0,%C1", op, plen, -3); + + if (!reg_unused_after (insn, base)) + avr_asm_len ("sbiw r26,2", op, plen, 1); + + return ""; + } + else + return avr_asm_len ("st %0,%A1" CR_TAB + "std %0+1,%B1" CR_TAB + "std %0+2,%C1", op, plen, -3); + } + else if (GET_CODE (base) == PLUS) /* (R + i) */ + { + int disp = INTVAL (XEXP (base, 1)); + reg_base = REGNO (XEXP (base, 0)); + + if (disp > MAX_LD_OFFSET (GET_MODE (dest))) + { + if (reg_base != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))) + return avr_asm_len ("adiw r28,%o0-61" CR_TAB + "std Y+61,%A1" CR_TAB + "std Y+62,%B1" CR_TAB + "std Y+63,%C1" CR_TAB + "sbiw r28,%o0-60", op, plen, -5); + + return avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB + "sbci r29,hi8(-%o0)" CR_TAB + "st Y,%A1" CR_TAB + "std Y+1,%B1" CR_TAB + "std Y+2,%C1" CR_TAB + "subi r28,lo8(%o0)" CR_TAB + "sbci r29,hi8(%o0)", op, plen, -7); + } + if (reg_base == REG_X) + { + /* (X + d) = R */ + gcc_assert (!reg_overlap_mentioned_p (XEXP (base, 0), src)); + + avr_asm_len ("adiw r26,%o0" CR_TAB + "st X+,%A1" CR_TAB + "st X+,%B1" CR_TAB + "st X,%C1", op, plen, -4); + + if (!reg_unused_after (insn, XEXP (base, 0))) + avr_asm_len ("sbiw r26,%o0+2", op, plen, 1); + + return ""; + } + + return avr_asm_len ("std %A0,%A1" CR_TAB + "std %B0,%B1" CR_TAB + "std %C0,%C1", op, plen, -3); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + return avr_asm_len ("st %0,%C1" CR_TAB + "st %0,%B1" CR_TAB + "st %0,%A1", op, plen, -3); + else if (GET_CODE (base) == POST_INC) /* (R++) */ + return avr_asm_len ("st %0,%A1" CR_TAB + "st %0,%B1" CR_TAB + "st %0,%C1", op, plen, -3); + + fatal_insn ("unknown move insn:",insn); + return ""; +} + + +/* Move around 24-bit stuff. */ + +const char * +avr_out_movpsi (rtx insn, rtx *op, int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + + if (avr_mem_flash_p (src) + || avr_mem_flash_p (dest)) + { + return avr_out_lpm (insn, op, plen); + } + + if (register_operand (dest, VOIDmode)) + { + if (register_operand (src, VOIDmode)) /* mov r,r */ + { + if (true_regnum (dest) > true_regnum (src)) + { + avr_asm_len ("mov %C0,%C1", op, plen, -1); + + if (AVR_HAVE_MOVW) + return avr_asm_len ("movw %A0,%A1", op, plen, 1); + else + return avr_asm_len ("mov %B0,%B1" CR_TAB + "mov %A0,%A1", op, plen, 2); + } + else + { + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %A0,%A1", op, plen, -1); + else + avr_asm_len ("mov %A0,%A1" CR_TAB + "mov %B0,%B1", op, plen, -2); + + return avr_asm_len ("mov %C0,%C1", op, plen, 1); + } + } + else if (CONSTANT_P (src)) + { + return avr_out_reload_inpsi (op, NULL_RTX, plen); + } + else if (MEM_P (src)) + return avr_out_load_psi (insn, op, plen); /* mov r,m */ + } + else if (MEM_P (dest)) + { + rtx xop[2]; + + xop[0] = dest; + xop[1] = src == CONST0_RTX (GET_MODE (dest)) ? zero_reg_rtx : src; + + return avr_out_store_psi (insn, xop, plen); + } + + fatal_insn ("invalid insn:", insn); + return ""; +} + + +static const char* +out_movqi_mr_r (rtx insn, rtx op[], int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx x = XEXP (dest, 0); + + if (CONSTANT_ADDRESS_P (x)) + { + return optimize > 0 && io_address_operand (x, QImode) + ? avr_asm_len ("out %i0,%1", op, plen, -1) + : avr_asm_len ("sts %m0,%1", op, plen, -2); + } + else if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))) + { + /* memory access by reg+disp */ + + int disp = INTVAL (XEXP (x, 1)); + + if (disp - GET_MODE_SIZE (GET_MODE (dest)) >= 63) + { + if (REGNO (XEXP (x, 0)) != REG_Y) + fatal_insn ("incorrect insn:",insn); + + if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest))) + return avr_asm_len ("adiw r28,%o0-63" CR_TAB + "std Y+63,%1" CR_TAB + "sbiw r28,%o0-63", op, plen, -3); + + return avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB + "sbci r29,hi8(-%o0)" CR_TAB + "st Y,%1" CR_TAB + "subi r28,lo8(%o0)" CR_TAB + "sbci r29,hi8(%o0)", op, plen, -5); + } + else if (REGNO (XEXP (x,0)) == REG_X) + { + if (reg_overlap_mentioned_p (src, XEXP (x, 0))) + { + avr_asm_len ("mov __tmp_reg__,%1" CR_TAB + "adiw r26,%o0" CR_TAB + "st X,__tmp_reg__", op, plen, -3); + } + else + { + avr_asm_len ("adiw r26,%o0" CR_TAB + "st X,%1", op, plen, -2); + } + + if (!reg_unused_after (insn, XEXP (x,0))) + avr_asm_len ("sbiw r26,%o0", op, plen, 1); + + return ""; + } + + return avr_asm_len ("std %0,%1", op, plen, -1); + } + + return avr_asm_len ("st %0,%1", op, plen, -1); +} + + +/* Helper for the next function for XMEGA. It does the same + but with low byte first. */ + +static const char* +avr_out_movhi_mr_r_xmega (rtx insn, rtx op[], int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (dest, 0); + int reg_base = true_regnum (base); + int reg_src = true_regnum (src); + + /* "volatile" forces writing low byte first, even if less efficient, + for correct operation with 16-bit I/O registers like SP. */ + int mem_volatile_p = MEM_VOLATILE_P (dest); + + if (CONSTANT_ADDRESS_P (base)) + return optimize > 0 && io_address_operand (base, HImode) + ? avr_asm_len ("out %i0,%A1" CR_TAB + "out %i0+1,%B1", op, plen, -2) + + : avr_asm_len ("sts %m0,%A1" CR_TAB + "sts %m0+1,%B1", op, plen, -4); + + if (reg_base > 0) + { + if (reg_base != REG_X) + return avr_asm_len ("st %0,%A1" CR_TAB + "std %0+1,%B1", op, plen, -2); + + if (reg_src == REG_X) + /* "st X+,r26" and "st -X,r26" are undefined. */ + avr_asm_len ("mov __tmp_reg__,r27" CR_TAB + "st X,r26" CR_TAB + "adiw r26,1" CR_TAB + "st X,__tmp_reg__", op, plen, -4); + else + avr_asm_len ("st X+,%A1" CR_TAB + "st X,%B1", op, plen, -2); + + return reg_unused_after (insn, base) + ? "" + : avr_asm_len ("sbiw r26,1", op, plen, 1); + } + else if (GET_CODE (base) == PLUS) + { + int disp = INTVAL (XEXP (base, 1)); + reg_base = REGNO (XEXP (base, 0)); + if (disp > MAX_LD_OFFSET (GET_MODE (dest))) + { + if (reg_base != REG_Y) + fatal_insn ("incorrect insn:",insn); + + return disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)) + ? avr_asm_len ("adiw r28,%o0-62" CR_TAB + "std Y+62,%A1" CR_TAB + "std Y+63,%B1" CR_TAB + "sbiw r28,%o0-62", op, plen, -4) + + : avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB + "sbci r29,hi8(-%o0)" CR_TAB + "st Y,%A1" CR_TAB + "std Y+1,%B1" CR_TAB + "subi r28,lo8(%o0)" CR_TAB + "sbci r29,hi8(%o0)", op, plen, -6); + } + + if (reg_base != REG_X) + return avr_asm_len ("std %A0,%A1" CR_TAB + "std %B0,%B1", op, plen, -2); + /* (X + d) = R */ + return reg_src == REG_X + ? avr_asm_len ("mov __tmp_reg__,r26" CR_TAB + "mov __zero_reg__,r27" CR_TAB + "adiw r26,%o0" CR_TAB + "st X+,__tmp_reg__" CR_TAB + "st X,__zero_reg__" CR_TAB + "clr __zero_reg__" CR_TAB + "sbiw r26,%o0+1", op, plen, -7) + + : avr_asm_len ("adiw r26,%o0" CR_TAB + "st X+,%A1" CR_TAB + "st X,%B1" CR_TAB + "sbiw r26,%o0+1", op, plen, -4); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + { + if (!mem_volatile_p) + return avr_asm_len ("st %0,%B1" CR_TAB + "st %0,%A1", op, plen, -2); + + return REGNO (XEXP (base, 0)) == REG_X + ? avr_asm_len ("sbiw r26,2" CR_TAB + "st X+,%A1" CR_TAB + "st X,%B1" CR_TAB + "sbiw r26,1", op, plen, -4) + + : avr_asm_len ("sbiw %r0,2" CR_TAB + "st %p0,%A1" CR_TAB + "std %p0+1,%B1", op, plen, -3); + } + else if (GET_CODE (base) == POST_INC) /* (R++) */ + { + return avr_asm_len ("st %0,%A1" CR_TAB + "st %0,%B1", op, plen, -2); + + } + fatal_insn ("unknown move insn:",insn); + return ""; +} + + +static const char* +out_movhi_mr_r (rtx insn, rtx op[], int *plen) +{ + rtx dest = op[0]; + rtx src = op[1]; + rtx base = XEXP (dest, 0); + int reg_base = true_regnum (base); + int reg_src = true_regnum (src); + int mem_volatile_p; + + /* "volatile" forces writing high-byte first (no-xmega) resp. + low-byte first (xmega) even if less efficient, for correct + operation with 16-bit I/O registers like. */ + + if (AVR_XMEGA) + return avr_out_movhi_mr_r_xmega (insn, op, plen); + + mem_volatile_p = MEM_VOLATILE_P (dest); + + if (CONSTANT_ADDRESS_P (base)) + return optimize > 0 && io_address_operand (base, HImode) + ? avr_asm_len ("out %i0+1,%B1" CR_TAB + "out %i0,%A1", op, plen, -2) + + : avr_asm_len ("sts %m0+1,%B1" CR_TAB + "sts %m0,%A1", op, plen, -4); + + if (reg_base > 0) + { + if (reg_base != REG_X) + return avr_asm_len ("std %0+1,%B1" CR_TAB + "st %0,%A1", op, plen, -2); + + if (reg_src == REG_X) + /* "st X+,r26" and "st -X,r26" are undefined. */ + return !mem_volatile_p && reg_unused_after (insn, src) + ? avr_asm_len ("mov __tmp_reg__,r27" CR_TAB + "st X,r26" CR_TAB + "adiw r26,1" CR_TAB + "st X,__tmp_reg__", op, plen, -4) + + : avr_asm_len ("mov __tmp_reg__,r27" CR_TAB + "adiw r26,1" CR_TAB + "st X,__tmp_reg__" CR_TAB + "sbiw r26,1" CR_TAB + "st X,r26", op, plen, -5); + + return !mem_volatile_p && reg_unused_after (insn, base) + ? avr_asm_len ("st X+,%A1" CR_TAB + "st X,%B1", op, plen, -2) + : avr_asm_len ("adiw r26,1" CR_TAB + "st X,%B1" CR_TAB + "st -X,%A1", op, plen, -3); + } + else if (GET_CODE (base) == PLUS) + { + int disp = INTVAL (XEXP (base, 1)); + reg_base = REGNO (XEXP (base, 0)); + if (disp > MAX_LD_OFFSET (GET_MODE (dest))) + { + if (reg_base != REG_Y) + fatal_insn ("incorrect insn:",insn); + + return disp <= 63 + MAX_LD_OFFSET (GET_MODE (dest)) + ? avr_asm_len ("adiw r28,%o0-62" CR_TAB + "std Y+63,%B1" CR_TAB + "std Y+62,%A1" CR_TAB + "sbiw r28,%o0-62", op, plen, -4) + + : avr_asm_len ("subi r28,lo8(-%o0)" CR_TAB + "sbci r29,hi8(-%o0)" CR_TAB + "std Y+1,%B1" CR_TAB + "st Y,%A1" CR_TAB + "subi r28,lo8(%o0)" CR_TAB + "sbci r29,hi8(%o0)", op, plen, -6); + } + + if (reg_base != REG_X) + return avr_asm_len ("std %B0,%B1" CR_TAB + "std %A0,%A1", op, plen, -2); + /* (X + d) = R */ + return reg_src == REG_X + ? avr_asm_len ("mov __tmp_reg__,r26" CR_TAB + "mov __zero_reg__,r27" CR_TAB + "adiw r26,%o0+1" CR_TAB + "st X,__zero_reg__" CR_TAB + "st -X,__tmp_reg__" CR_TAB + "clr __zero_reg__" CR_TAB + "sbiw r26,%o0", op, plen, -7) + + : avr_asm_len ("adiw r26,%o0+1" CR_TAB + "st X,%B1" CR_TAB + "st -X,%A1" CR_TAB + "sbiw r26,%o0", op, plen, -4); + } + else if (GET_CODE (base) == PRE_DEC) /* (--R) */ + { + return avr_asm_len ("st %0,%B1" CR_TAB + "st %0,%A1", op, plen, -2); + } + else if (GET_CODE (base) == POST_INC) /* (R++) */ + { + if (!mem_volatile_p) + return avr_asm_len ("st %0,%A1" CR_TAB + "st %0,%B1", op, plen, -2); + + return REGNO (XEXP (base, 0)) == REG_X + ? avr_asm_len ("adiw r26,1" CR_TAB + "st X,%B1" CR_TAB + "st -X,%A1" CR_TAB + "adiw r26,2", op, plen, -4) + + : avr_asm_len ("std %p0+1,%B1" CR_TAB + "st %p0,%A1" CR_TAB + "adiw %r0,2", op, plen, -3); + } + fatal_insn ("unknown move insn:",insn); + return ""; +} + +/* Return 1 if frame pointer for current function required. */ + +static bool +avr_frame_pointer_required_p (void) +{ + return (cfun->calls_alloca + || cfun->calls_setjmp + || cfun->has_nonlocal_label + || crtl->args.info.nregs == 0 + || get_frame_size () > 0); +} + +/* Returns the condition of compare insn INSN, or UNKNOWN. */ + +static RTX_CODE +compare_condition (rtx insn) +{ + rtx next = next_real_insn (insn); + + if (next && JUMP_P (next)) + { + rtx pat = PATTERN (next); + rtx src = SET_SRC (pat); + + if (IF_THEN_ELSE == GET_CODE (src)) + return GET_CODE (XEXP (src, 0)); + } + + return UNKNOWN; +} + + +/* Returns true iff INSN is a tst insn that only tests the sign. */ + +static bool +compare_sign_p (rtx insn) +{ + RTX_CODE cond = compare_condition (insn); + return (cond == GE || cond == LT); +} + + +/* Returns true iff the next insn is a JUMP_INSN with a condition + that needs to be swapped (GT, GTU, LE, LEU). */ + +static bool +compare_diff_p (rtx insn) +{ + RTX_CODE cond = compare_condition (insn); + return (cond == GT || cond == GTU || cond == LE || cond == LEU) ? cond : 0; +} + +/* Returns true iff INSN is a compare insn with the EQ or NE condition. */ + +static bool +compare_eq_p (rtx insn) +{ + RTX_CODE cond = compare_condition (insn); + return (cond == EQ || cond == NE); +} + + +/* Output compare instruction + + compare (XOP[0], XOP[1]) + + for a register XOP[0] and a compile-time constant XOP[1]. Return "". + XOP[2] is an 8-bit scratch register as needed. + + PLEN == NULL: Output instructions. + PLEN != NULL: Set *PLEN to the length (in words) of the sequence. + Don't output anything. */ + +const char* +avr_out_compare (rtx insn, rtx *xop, int *plen) +{ + /* Register to compare and value to compare against. */ + rtx xreg = xop[0]; + rtx xval = xop[1]; + + /* MODE of the comparison. */ + enum machine_mode mode; + + /* Number of bytes to operate on. */ + int i, n_bytes = GET_MODE_SIZE (GET_MODE (xreg)); + + /* Value (0..0xff) held in clobber register xop[2] or -1 if unknown. */ + int clobber_val = -1; + + /* Map fixed mode operands to integer operands with the same binary + representation. They are easier to handle in the remainder. */ + + if (CONST_FIXED_P (xval)) + { + xreg = avr_to_int_mode (xop[0]); + xval = avr_to_int_mode (xop[1]); + } + + mode = GET_MODE (xreg); + + gcc_assert (REG_P (xreg)); + gcc_assert ((CONST_INT_P (xval) && n_bytes <= 4) + || (const_double_operand (xval, VOIDmode) && n_bytes == 8)); + + if (plen) + *plen = 0; + + /* Comparisons == +/-1 and != +/-1 can be done similar to camparing + against 0 by ORing the bytes. This is one instruction shorter. + Notice that 64-bit comparisons are always against reg:ALL8 18 (ACC_A) + and therefore don't use this. */ + + if (!test_hard_reg_class (LD_REGS, xreg) + && compare_eq_p (insn) + && reg_unused_after (insn, xreg)) + { + if (xval == const1_rtx) + { + avr_asm_len ("dec %A0" CR_TAB + "or %A0,%B0", xop, plen, 2); + + if (n_bytes >= 3) + avr_asm_len ("or %A0,%C0", xop, plen, 1); + + if (n_bytes >= 4) + avr_asm_len ("or %A0,%D0", xop, plen, 1); + + return ""; + } + else if (xval == constm1_rtx) + { + if (n_bytes >= 4) + avr_asm_len ("and %A0,%D0", xop, plen, 1); + + if (n_bytes >= 3) + avr_asm_len ("and %A0,%C0", xop, plen, 1); + + return avr_asm_len ("and %A0,%B0" CR_TAB + "com %A0", xop, plen, 2); + } + } + + for (i = 0; i < n_bytes; i++) + { + /* We compare byte-wise. */ + rtx reg8 = simplify_gen_subreg (QImode, xreg, mode, i); + rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i); + + /* 8-bit value to compare with this byte. */ + unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode); + + /* Registers R16..R31 can operate with immediate. */ + bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8); + + xop[0] = reg8; + xop[1] = gen_int_mode (val8, QImode); + + /* Word registers >= R24 can use SBIW/ADIW with 0..63. */ + + if (i == 0 + && test_hard_reg_class (ADDW_REGS, reg8)) + { + int val16 = trunc_int_for_mode (INTVAL (xval), HImode); + + if (IN_RANGE (val16, 0, 63) + && (val8 == 0 + || reg_unused_after (insn, xreg))) + { + avr_asm_len ("sbiw %0,%1", xop, plen, 1); + i++; + continue; + } + + if (n_bytes == 2 + && IN_RANGE (val16, -63, -1) + && compare_eq_p (insn) + && reg_unused_after (insn, xreg)) + { + return avr_asm_len ("adiw %0,%n1", xop, plen, 1); + } + } + + /* Comparing against 0 is easy. */ + + if (val8 == 0) + { + avr_asm_len (i == 0 + ? "cp %0,__zero_reg__" + : "cpc %0,__zero_reg__", xop, plen, 1); + continue; + } + + /* Upper registers can compare and subtract-with-carry immediates. + Notice that compare instructions do the same as respective subtract + instruction; the only difference is that comparisons don't write + the result back to the target register. */ + + if (ld_reg_p) + { + if (i == 0) + { + avr_asm_len ("cpi %0,%1", xop, plen, 1); + continue; + } + else if (reg_unused_after (insn, xreg)) + { + avr_asm_len ("sbci %0,%1", xop, plen, 1); + continue; + } + } + + /* Must load the value into the scratch register. */ + + gcc_assert (REG_P (xop[2])); + + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", xop, plen, 1); + clobber_val = (int) val8; + + avr_asm_len (i == 0 + ? "cp %0,%2" + : "cpc %0,%2", xop, plen, 1); + } + + return ""; +} + + +/* Prepare operands of compare_const_di2 to be used with avr_out_compare. */ + +const char* +avr_out_compare64 (rtx insn, rtx *op, int *plen) +{ + rtx xop[3]; + + xop[0] = gen_rtx_REG (DImode, 18); + xop[1] = op[0]; + xop[2] = op[1]; + + return avr_out_compare (insn, xop, plen); +} + +/* Output test instruction for HImode. */ + +const char* +avr_out_tsthi (rtx insn, rtx *op, int *plen) +{ + if (compare_sign_p (insn)) + { + avr_asm_len ("tst %B0", op, plen, -1); + } + else if (reg_unused_after (insn, op[0]) + && compare_eq_p (insn)) + { + /* Faster than sbiw if we can clobber the operand. */ + avr_asm_len ("or %A0,%B0", op, plen, -1); + } + else + { + avr_out_compare (insn, op, plen); + } + + return ""; +} + + +/* Output test instruction for PSImode. */ + +const char* +avr_out_tstpsi (rtx insn, rtx *op, int *plen) +{ + if (compare_sign_p (insn)) + { + avr_asm_len ("tst %C0", op, plen, -1); + } + else if (reg_unused_after (insn, op[0]) + && compare_eq_p (insn)) + { + /* Faster than sbiw if we can clobber the operand. */ + avr_asm_len ("or %A0,%B0" CR_TAB + "or %A0,%C0", op, plen, -2); + } + else + { + avr_out_compare (insn, op, plen); + } + + return ""; +} + + +/* Output test instruction for SImode. */ + +const char* +avr_out_tstsi (rtx insn, rtx *op, int *plen) +{ + if (compare_sign_p (insn)) + { + avr_asm_len ("tst %D0", op, plen, -1); + } + else if (reg_unused_after (insn, op[0]) + && compare_eq_p (insn)) + { + /* Faster than sbiw if we can clobber the operand. */ + avr_asm_len ("or %A0,%B0" CR_TAB + "or %A0,%C0" CR_TAB + "or %A0,%D0", op, plen, -3); + } + else + { + avr_out_compare (insn, op, plen); + } + + return ""; +} + + +/* Generate asm equivalent for various shifts. This only handles cases + that are not already carefully hand-optimized in ?sh??i3_out. + + OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted. + OPERANDS[2] is the shift count as CONST_INT, MEM or REG. + OPERANDS[3] is a QImode scratch register from LD regs if + available and SCRATCH, otherwise (no scratch available) + + TEMPL is an assembler template that shifts by one position. + T_LEN is the length of this template. */ + +void +out_shift_with_cnt (const char *templ, rtx insn, rtx operands[], + int *plen, int t_len) +{ + bool second_label = true; + bool saved_in_tmp = false; + bool use_zero_reg = false; + rtx op[5]; + + op[0] = operands[0]; + op[1] = operands[1]; + op[2] = operands[2]; + op[3] = operands[3]; + + if (plen) + *plen = 0; + + if (CONST_INT_P (operands[2])) + { + bool scratch = (GET_CODE (PATTERN (insn)) == PARALLEL + && REG_P (operands[3])); + int count = INTVAL (operands[2]); + int max_len = 10; /* If larger than this, always use a loop. */ + + if (count <= 0) + return; + + if (count < 8 && !scratch) + use_zero_reg = true; + + if (optimize_size) + max_len = t_len + (scratch ? 3 : (use_zero_reg ? 4 : 5)); + + if (t_len * count <= max_len) + { + /* Output shifts inline with no loop - faster. */ + + while (count-- > 0) + avr_asm_len (templ, op, plen, t_len); + + return; + } + + if (scratch) + { + avr_asm_len ("ldi %3,%2", op, plen, 1); + } + else if (use_zero_reg) + { + /* Hack to save one word: use __zero_reg__ as loop counter. + Set one bit, then shift in a loop until it is 0 again. */ + + op[3] = zero_reg_rtx; + + avr_asm_len ("set" CR_TAB + "bld %3,%2-1", op, plen, 2); + } + else + { + /* No scratch register available, use one from LD_REGS (saved in + __tmp_reg__) that doesn't overlap with registers to shift. */ + + op[3] = all_regs_rtx[((REGNO (op[0]) - 1) & 15) + 16]; + op[4] = tmp_reg_rtx; + saved_in_tmp = true; + + avr_asm_len ("mov %4,%3" CR_TAB + "ldi %3,%2", op, plen, 2); + } + + second_label = false; + } + else if (MEM_P (op[2])) + { + rtx op_mov[2]; + + op_mov[0] = op[3] = tmp_reg_rtx; + op_mov[1] = op[2]; + + out_movqi_r_mr (insn, op_mov, plen); + } + else if (register_operand (op[2], QImode)) + { + op[3] = op[2]; + + if (!reg_unused_after (insn, op[2]) + || reg_overlap_mentioned_p (op[0], op[2])) + { + op[3] = tmp_reg_rtx; + avr_asm_len ("mov %3,%2", op, plen, 1); + } + } + else + fatal_insn ("bad shift insn:", insn); + + if (second_label) + avr_asm_len ("rjmp 2f", op, plen, 1); + + avr_asm_len ("1:", op, plen, 0); + avr_asm_len (templ, op, plen, t_len); + + if (second_label) + avr_asm_len ("2:", op, plen, 0); + + avr_asm_len (use_zero_reg ? "lsr %3" : "dec %3", op, plen, 1); + avr_asm_len (second_label ? "brpl 1b" : "brne 1b", op, plen, 1); + + if (saved_in_tmp) + avr_asm_len ("mov %3,%4", op, plen, 1); +} + + +/* 8bit shift left ((char)x << i) */ + +const char * +ashlqi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 8) + break; + + *len = 1; + return "clr %0"; + + case 1: + *len = 1; + return "lsl %0"; + + case 2: + *len = 2; + return ("lsl %0" CR_TAB + "lsl %0"); + + case 3: + *len = 3; + return ("lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0"); + + case 4: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 2; + return ("swap %0" CR_TAB + "andi %0,0xf0"); + } + *len = 4; + return ("lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0"); + + case 5: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 3; + return ("swap %0" CR_TAB + "lsl %0" CR_TAB + "andi %0,0xe0"); + } + *len = 5; + return ("lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0"); + + case 6: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 4; + return ("swap %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "andi %0,0xc0"); + } + *len = 6; + return ("lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0" CR_TAB + "lsl %0"); + + case 7: + *len = 3; + return ("ror %0" CR_TAB + "clr %0" CR_TAB + "ror %0"); + } + } + else if (CONSTANT_P (operands[2])) + fatal_insn ("internal compiler error. Incorrect shift:", insn); + + out_shift_with_cnt ("lsl %0", + insn, operands, len, 1); + return ""; +} + + +/* 16bit shift left ((short)x << i) */ + +const char * +ashlhi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 16) + break; + + *len = 2; + return ("clr %B0" CR_TAB + "clr %A0"); + + case 4: + if (optimize_size && scratch) + break; /* 5 */ + if (ldi_ok) + { + *len = 6; + return ("swap %A0" CR_TAB + "swap %B0" CR_TAB + "andi %B0,0xf0" CR_TAB + "eor %B0,%A0" CR_TAB + "andi %A0,0xf0" CR_TAB + "eor %B0,%A0"); + } + if (scratch) + { + *len = 7; + return ("swap %A0" CR_TAB + "swap %B0" CR_TAB + "ldi %3,0xf0" CR_TAB + "and %B0,%3" CR_TAB + "eor %B0,%A0" CR_TAB + "and %A0,%3" CR_TAB + "eor %B0,%A0"); + } + break; /* optimize_size ? 6 : 8 */ + + case 5: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + if (ldi_ok) + { + *len = 8; + return ("lsl %A0" CR_TAB + "rol %B0" CR_TAB + "swap %A0" CR_TAB + "swap %B0" CR_TAB + "andi %B0,0xf0" CR_TAB + "eor %B0,%A0" CR_TAB + "andi %A0,0xf0" CR_TAB + "eor %B0,%A0"); + } + if (scratch) + { + *len = 9; + return ("lsl %A0" CR_TAB + "rol %B0" CR_TAB + "swap %A0" CR_TAB + "swap %B0" CR_TAB + "ldi %3,0xf0" CR_TAB + "and %B0,%3" CR_TAB + "eor %B0,%A0" CR_TAB + "and %A0,%3" CR_TAB + "eor %B0,%A0"); + } + break; /* 10 */ + + case 6: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + *len = 9; + return ("clr __tmp_reg__" CR_TAB + "lsr %B0" CR_TAB + "ror %A0" CR_TAB + "ror __tmp_reg__" CR_TAB + "lsr %B0" CR_TAB + "ror %A0" CR_TAB + "ror __tmp_reg__" CR_TAB + "mov %B0,%A0" CR_TAB + "mov %A0,__tmp_reg__"); + + case 7: + *len = 5; + return ("lsr %B0" CR_TAB + "mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "ror %B0" CR_TAB + "ror %A0"); + + case 8: + return *len = 2, ("mov %B0,%A1" CR_TAB + "clr %A0"); + + case 9: + *len = 3; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "lsl %B0"); + + case 10: + *len = 4; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0"); + + case 11: + *len = 5; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0"); + + case 12: + if (ldi_ok) + { + *len = 4; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "swap %B0" CR_TAB + "andi %B0,0xf0"); + } + if (scratch) + { + *len = 5; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "swap %B0" CR_TAB + "ldi %3,0xf0" CR_TAB + "and %B0,%3"); + } + *len = 6; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0"); + + case 13: + if (ldi_ok) + { + *len = 5; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "swap %B0" CR_TAB + "lsl %B0" CR_TAB + "andi %B0,0xe0"); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return ("ldi %3,0x20" CR_TAB + "mul %A0,%3" CR_TAB + "mov %B0,r0" CR_TAB + "clr %A0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size && scratch) + break; /* 5 */ + if (scratch) + { + *len = 6; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "swap %B0" CR_TAB + "lsl %B0" CR_TAB + "ldi %3,0xe0" CR_TAB + "and %B0,%3"); + } + if (AVR_HAVE_MUL) + { + *len = 6; + return ("set" CR_TAB + "bld r1,5" CR_TAB + "mul %A0,r1" CR_TAB + "mov %B0,r0" CR_TAB + "clr %A0" CR_TAB + "clr __zero_reg__"); + } + *len = 7; + return ("mov %B0,%A0" CR_TAB + "clr %A0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0" CR_TAB + "lsl %B0"); + + case 14: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return ("ldi %B0,0x40" CR_TAB + "mul %A0,%B0" CR_TAB + "mov %B0,r0" CR_TAB + "clr %A0" CR_TAB + "clr __zero_reg__"); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return ("ldi %3,0x40" CR_TAB + "mul %A0,%3" CR_TAB + "mov %B0,r0" CR_TAB + "clr %A0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size && ldi_ok) + { + *len = 5; + return ("mov %B0,%A0" CR_TAB + "ldi %A0,6" "\n1:\t" + "lsl %B0" CR_TAB + "dec %A0" CR_TAB + "brne 1b"); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 6; + return ("clr %B0" CR_TAB + "lsr %A0" CR_TAB + "ror %B0" CR_TAB + "lsr %A0" CR_TAB + "ror %B0" CR_TAB + "clr %A0"); + + case 15: + *len = 4; + return ("clr %B0" CR_TAB + "lsr %A0" CR_TAB + "ror %B0" CR_TAB + "clr %A0"); + } + len = t; + } + out_shift_with_cnt ("lsl %A0" CR_TAB + "rol %B0", insn, operands, len, 2); + return ""; +} + + +/* 24-bit shift left */ + +const char* +avr_out_ashlpsi3 (rtx insn, rtx *op, int *plen) +{ + if (plen) + *plen = 0; + + if (CONST_INT_P (op[2])) + { + switch (INTVAL (op[2])) + { + default: + if (INTVAL (op[2]) < 24) + break; + + return avr_asm_len ("clr %A0" CR_TAB + "clr %B0" CR_TAB + "clr %C0", op, plen, 3); + + case 8: + { + int reg0 = REGNO (op[0]); + int reg1 = REGNO (op[1]); + + if (reg0 >= reg1) + return avr_asm_len ("mov %C0,%B1" CR_TAB + "mov %B0,%A1" CR_TAB + "clr %A0", op, plen, 3); + else + return avr_asm_len ("clr %A0" CR_TAB + "mov %B0,%A1" CR_TAB + "mov %C0,%B1", op, plen, 3); + } + + case 16: + { + int reg0 = REGNO (op[0]); + int reg1 = REGNO (op[1]); + + if (reg0 + 2 != reg1) + avr_asm_len ("mov %C0,%A0", op, plen, 1); + + return avr_asm_len ("clr %B0" CR_TAB + "clr %A0", op, plen, 2); + } + + case 23: + return avr_asm_len ("clr %C0" CR_TAB + "lsr %A0" CR_TAB + "ror %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", op, plen, 5); + } + } + + out_shift_with_cnt ("lsl %A0" CR_TAB + "rol %B0" CR_TAB + "rol %C0", insn, op, plen, 3); + return ""; +} + + +/* 32bit shift left ((long)x << i) */ + +const char * +ashlsi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 32) + break; + + if (AVR_HAVE_MOVW) + return *len = 3, ("clr %D0" CR_TAB + "clr %C0" CR_TAB + "movw %A0,%C0"); + *len = 4; + return ("clr %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + *len = 4; + if (reg0 >= reg1) + return ("mov %D0,%C1" CR_TAB + "mov %C0,%B1" CR_TAB + "mov %B0,%A1" CR_TAB + "clr %A0"); + else + return ("clr %A0" CR_TAB + "mov %B0,%A1" CR_TAB + "mov %C0,%B1" CR_TAB + "mov %D0,%C1"); + } + + case 16: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + if (reg0 + 2 == reg1) + return *len = 2, ("clr %B0" CR_TAB + "clr %A0"); + if (AVR_HAVE_MOVW) + return *len = 3, ("movw %C0,%A1" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + else + return *len = 4, ("mov %C0,%A1" CR_TAB + "mov %D0,%B1" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + } + + case 24: + *len = 4; + return ("mov %D0,%A1" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 31: + *len = 6; + return ("clr %D0" CR_TAB + "lsr %A0" CR_TAB + "ror %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + } + len = t; + } + out_shift_with_cnt ("lsl %A0" CR_TAB + "rol %B0" CR_TAB + "rol %C0" CR_TAB + "rol %D0", insn, operands, len, 4); + return ""; +} + +/* 8bit arithmetic shift right ((signed char)x >> i) */ + +const char * +ashrqi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + case 1: + *len = 1; + return "asr %0"; + + case 2: + *len = 2; + return ("asr %0" CR_TAB + "asr %0"); + + case 3: + *len = 3; + return ("asr %0" CR_TAB + "asr %0" CR_TAB + "asr %0"); + + case 4: + *len = 4; + return ("asr %0" CR_TAB + "asr %0" CR_TAB + "asr %0" CR_TAB + "asr %0"); + + case 5: + *len = 5; + return ("asr %0" CR_TAB + "asr %0" CR_TAB + "asr %0" CR_TAB + "asr %0" CR_TAB + "asr %0"); + + case 6: + *len = 4; + return ("bst %0,6" CR_TAB + "lsl %0" CR_TAB + "sbc %0,%0" CR_TAB + "bld %0,0"); + + default: + if (INTVAL (operands[2]) < 8) + break; + + /* fall through */ + + case 7: + *len = 2; + return ("lsl %0" CR_TAB + "sbc %0,%0"); + } + } + else if (CONSTANT_P (operands[2])) + fatal_insn ("internal compiler error. Incorrect shift:", insn); + + out_shift_with_cnt ("asr %0", + insn, operands, len, 1); + return ""; +} + + +/* 16bit arithmetic shift right ((signed short)x >> i) */ + +const char * +ashrhi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + case 4: + case 5: + /* XXX try to optimize this too? */ + break; + + case 6: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + *len = 8; + return ("mov __tmp_reg__,%A0" CR_TAB + "mov %A0,%B0" CR_TAB + "lsl __tmp_reg__" CR_TAB + "rol %A0" CR_TAB + "sbc %B0,%B0" CR_TAB + "lsl __tmp_reg__" CR_TAB + "rol %A0" CR_TAB + "rol %B0"); + + case 7: + *len = 4; + return ("lsl %A0" CR_TAB + "mov %A0,%B0" CR_TAB + "rol %A0" CR_TAB + "sbc %B0,%B0"); + + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + + if (reg0 == reg1) + return *len = 3, ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0"); + else + return *len = 4, ("mov %A0,%B1" CR_TAB + "clr %B0" CR_TAB + "sbrc %A0,7" CR_TAB + "dec %B0"); + } + + case 9: + *len = 4; + return ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0" CR_TAB + "asr %A0"); + + case 10: + *len = 5; + return ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0" CR_TAB + "asr %A0" CR_TAB + "asr %A0"); + + case 11: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return ("ldi %A0,0x20" CR_TAB + "muls %B0,%A0" CR_TAB + "mov %A0,r1" CR_TAB + "sbc %B0,%B0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 6; + return ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0" CR_TAB + "asr %A0" CR_TAB + "asr %A0" CR_TAB + "asr %A0"); + + case 12: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return ("ldi %A0,0x10" CR_TAB + "muls %B0,%A0" CR_TAB + "mov %A0,r1" CR_TAB + "sbc %B0,%B0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 7; + return ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0" CR_TAB + "asr %A0" CR_TAB + "asr %A0" CR_TAB + "asr %A0" CR_TAB + "asr %A0"); + + case 13: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return ("ldi %A0,0x08" CR_TAB + "muls %B0,%A0" CR_TAB + "mov %A0,r1" CR_TAB + "sbc %B0,%B0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size) + break; /* scratch ? 5 : 7 */ + *len = 8; + return ("mov %A0,%B0" CR_TAB + "lsl %B0" CR_TAB + "sbc %B0,%B0" CR_TAB + "asr %A0" CR_TAB + "asr %A0" CR_TAB + "asr %A0" CR_TAB + "asr %A0" CR_TAB + "asr %A0"); + + case 14: + *len = 5; + return ("lsl %B0" CR_TAB + "sbc %A0,%A0" CR_TAB + "lsl %B0" CR_TAB + "mov %B0,%A0" CR_TAB + "rol %A0"); + + default: + if (INTVAL (operands[2]) < 16) + break; + + /* fall through */ + + case 15: + return *len = 3, ("lsl %B0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0"); + } + len = t; + } + out_shift_with_cnt ("asr %B0" CR_TAB + "ror %A0", insn, operands, len, 2); + return ""; +} + + +/* 24-bit arithmetic shift right */ + +const char* +avr_out_ashrpsi3 (rtx insn, rtx *op, int *plen) +{ + int dest = REGNO (op[0]); + int src = REGNO (op[1]); + + if (CONST_INT_P (op[2])) + { + if (plen) + *plen = 0; + + switch (INTVAL (op[2])) + { + case 8: + if (dest <= src) + return avr_asm_len ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "clr %C0" CR_TAB + "sbrc %B0,7" CR_TAB + "dec %C0", op, plen, 5); + else + return avr_asm_len ("clr %C0" CR_TAB + "sbrc %C1,7" CR_TAB + "dec %C0" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1", op, plen, 5); + + case 16: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + + return avr_asm_len ("clr %B0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %B0" CR_TAB + "mov %C0,%B0", op, plen, 4); + + default: + if (INTVAL (op[2]) < 24) + break; + + /* fall through */ + + case 23: + return avr_asm_len ("lsl %C0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0" CR_TAB + "mov %C0,%A0", op, plen, 4); + } /* switch */ + } + + out_shift_with_cnt ("asr %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0", insn, op, plen, 3); + return ""; +} + + +/* 32-bit arithmetic shift right ((signed long)x >> i) */ + +const char * +ashrsi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + *len=6; + if (reg0 <= reg1) + return ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %C0,%D1" CR_TAB + "clr %D0" CR_TAB + "sbrc %C0,7" CR_TAB + "dec %D0"); + else + return ("clr %D0" CR_TAB + "sbrc %D1,7" CR_TAB + "dec %D0" CR_TAB + "mov %C0,%D1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1"); + } + + case 16: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + + if (reg0 == reg1 + 2) + return *len = 4, ("clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0"); + if (AVR_HAVE_MOVW) + return *len = 5, ("movw %A0,%C1" CR_TAB + "clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0"); + else + return *len = 6, ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0"); + } + + case 24: + return *len = 6, ("mov %A0,%D1" CR_TAB + "clr %D0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %D0" CR_TAB + "mov %B0,%D0" CR_TAB + "mov %C0,%D0"); + + default: + if (INTVAL (operands[2]) < 32) + break; + + /* fall through */ + + case 31: + if (AVR_HAVE_MOVW) + return *len = 4, ("lsl %D0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0" CR_TAB + "movw %C0,%A0"); + else + return *len = 5, ("lsl %D0" CR_TAB + "sbc %A0,%A0" CR_TAB + "mov %B0,%A0" CR_TAB + "mov %C0,%A0" CR_TAB + "mov %D0,%A0"); + } + len = t; + } + out_shift_with_cnt ("asr %D0" CR_TAB + "ror %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0", insn, operands, len, 4); + return ""; +} + +/* 8-bit logic shift right ((unsigned char)x >> i) */ + +const char * +lshrqi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 8) + break; + + *len = 1; + return "clr %0"; + + case 1: + *len = 1; + return "lsr %0"; + + case 2: + *len = 2; + return ("lsr %0" CR_TAB + "lsr %0"); + case 3: + *len = 3; + return ("lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0"); + + case 4: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len=2; + return ("swap %0" CR_TAB + "andi %0,0x0f"); + } + *len = 4; + return ("lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0"); + + case 5: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 3; + return ("swap %0" CR_TAB + "lsr %0" CR_TAB + "andi %0,0x7"); + } + *len = 5; + return ("lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0"); + + case 6: + if (test_hard_reg_class (LD_REGS, operands[0])) + { + *len = 4; + return ("swap %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "andi %0,0x3"); + } + *len = 6; + return ("lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0" CR_TAB + "lsr %0"); + + case 7: + *len = 3; + return ("rol %0" CR_TAB + "clr %0" CR_TAB + "rol %0"); + } + } + else if (CONSTANT_P (operands[2])) + fatal_insn ("internal compiler error. Incorrect shift:", insn); + + out_shift_with_cnt ("lsr %0", + insn, operands, len, 1); + return ""; +} + +/* 16-bit logic shift right ((unsigned short)x >> i) */ + +const char * +lshrhi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL); + int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]); + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 16) + break; + + *len = 2; + return ("clr %B0" CR_TAB + "clr %A0"); + + case 4: + if (optimize_size && scratch) + break; /* 5 */ + if (ldi_ok) + { + *len = 6; + return ("swap %B0" CR_TAB + "swap %A0" CR_TAB + "andi %A0,0x0f" CR_TAB + "eor %A0,%B0" CR_TAB + "andi %B0,0x0f" CR_TAB + "eor %A0,%B0"); + } + if (scratch) + { + *len = 7; + return ("swap %B0" CR_TAB + "swap %A0" CR_TAB + "ldi %3,0x0f" CR_TAB + "and %A0,%3" CR_TAB + "eor %A0,%B0" CR_TAB + "and %B0,%3" CR_TAB + "eor %A0,%B0"); + } + break; /* optimize_size ? 6 : 8 */ + + case 5: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + if (ldi_ok) + { + *len = 8; + return ("lsr %B0" CR_TAB + "ror %A0" CR_TAB + "swap %B0" CR_TAB + "swap %A0" CR_TAB + "andi %A0,0x0f" CR_TAB + "eor %A0,%B0" CR_TAB + "andi %B0,0x0f" CR_TAB + "eor %A0,%B0"); + } + if (scratch) + { + *len = 9; + return ("lsr %B0" CR_TAB + "ror %A0" CR_TAB + "swap %B0" CR_TAB + "swap %A0" CR_TAB + "ldi %3,0x0f" CR_TAB + "and %A0,%3" CR_TAB + "eor %A0,%B0" CR_TAB + "and %B0,%3" CR_TAB + "eor %A0,%B0"); + } + break; /* 10 */ + + case 6: + if (optimize_size) + break; /* scratch ? 5 : 6 */ + *len = 9; + return ("clr __tmp_reg__" CR_TAB + "lsl %A0" CR_TAB + "rol %B0" CR_TAB + "rol __tmp_reg__" CR_TAB + "lsl %A0" CR_TAB + "rol %B0" CR_TAB + "rol __tmp_reg__" CR_TAB + "mov %A0,%B0" CR_TAB + "mov %B0,__tmp_reg__"); + + case 7: + *len = 5; + return ("lsl %A0" CR_TAB + "mov %A0,%B0" CR_TAB + "rol %A0" CR_TAB + "sbc %B0,%B0" CR_TAB + "neg %B0"); + + case 8: + return *len = 2, ("mov %A0,%B1" CR_TAB + "clr %B0"); + + case 9: + *len = 3; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "lsr %A0"); + + case 10: + *len = 4; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 11: + *len = 5; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 12: + if (ldi_ok) + { + *len = 4; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "swap %A0" CR_TAB + "andi %A0,0x0f"); + } + if (scratch) + { + *len = 5; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "swap %A0" CR_TAB + "ldi %3,0x0f" CR_TAB + "and %A0,%3"); + } + *len = 6; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 13: + if (ldi_ok) + { + *len = 5; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "swap %A0" CR_TAB + "lsr %A0" CR_TAB + "andi %A0,0x07"); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return ("ldi %3,0x08" CR_TAB + "mul %B0,%3" CR_TAB + "mov %A0,r1" CR_TAB + "clr %B0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size && scratch) + break; /* 5 */ + if (scratch) + { + *len = 6; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "swap %A0" CR_TAB + "lsr %A0" CR_TAB + "ldi %3,0x07" CR_TAB + "and %A0,%3"); + } + if (AVR_HAVE_MUL) + { + *len = 6; + return ("set" CR_TAB + "bld r1,3" CR_TAB + "mul %B0,r1" CR_TAB + "mov %A0,r1" CR_TAB + "clr %B0" CR_TAB + "clr __zero_reg__"); + } + *len = 7; + return ("mov %A0,%B0" CR_TAB + "clr %B0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0" CR_TAB + "lsr %A0"); + + case 14: + if (AVR_HAVE_MUL && ldi_ok) + { + *len = 5; + return ("ldi %A0,0x04" CR_TAB + "mul %B0,%A0" CR_TAB + "mov %A0,r1" CR_TAB + "clr %B0" CR_TAB + "clr __zero_reg__"); + } + if (AVR_HAVE_MUL && scratch) + { + *len = 5; + return ("ldi %3,0x04" CR_TAB + "mul %B0,%3" CR_TAB + "mov %A0,r1" CR_TAB + "clr %B0" CR_TAB + "clr __zero_reg__"); + } + if (optimize_size && ldi_ok) + { + *len = 5; + return ("mov %A0,%B0" CR_TAB + "ldi %B0,6" "\n1:\t" + "lsr %A0" CR_TAB + "dec %B0" CR_TAB + "brne 1b"); + } + if (optimize_size && scratch) + break; /* 5 */ + *len = 6; + return ("clr %A0" CR_TAB + "lsl %B0" CR_TAB + "rol %A0" CR_TAB + "lsl %B0" CR_TAB + "rol %A0" CR_TAB + "clr %B0"); + + case 15: + *len = 4; + return ("clr %A0" CR_TAB + "lsl %B0" CR_TAB + "rol %A0" CR_TAB + "clr %B0"); + } + len = t; + } + out_shift_with_cnt ("lsr %B0" CR_TAB + "ror %A0", insn, operands, len, 2); + return ""; +} + + +/* 24-bit logic shift right */ + +const char* +avr_out_lshrpsi3 (rtx insn, rtx *op, int *plen) +{ + int dest = REGNO (op[0]); + int src = REGNO (op[1]); + + if (CONST_INT_P (op[2])) + { + if (plen) + *plen = 0; + + switch (INTVAL (op[2])) + { + case 8: + if (dest <= src) + return avr_asm_len ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "clr %C0", op, plen, 3); + else + return avr_asm_len ("clr %C0" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1", op, plen, 3); + + case 16: + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0", op, plen, 2); + + default: + if (INTVAL (op[2]) < 24) + break; + + /* fall through */ + + case 23: + return avr_asm_len ("clr %A0" CR_TAB + "sbrc %C0,7" CR_TAB + "inc %A0" CR_TAB + "clr %B0" CR_TAB + "clr %C0", op, plen, 5); + } /* switch */ + } + + out_shift_with_cnt ("lsr %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0", insn, op, plen, 3); + return ""; +} + + +/* 32-bit logic shift right ((unsigned int)x >> i) */ + +const char * +lshrsi3_out (rtx insn, rtx operands[], int *len) +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + int k; + int *t = len; + + if (!len) + len = &k; + + switch (INTVAL (operands[2])) + { + default: + if (INTVAL (operands[2]) < 32) + break; + + if (AVR_HAVE_MOVW) + return *len = 3, ("clr %D0" CR_TAB + "clr %C0" CR_TAB + "movw %A0,%C0"); + *len = 4; + return ("clr %D0" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0"); + + case 8: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + *len = 4; + if (reg0 <= reg1) + return ("mov %A0,%B1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %C0,%D1" CR_TAB + "clr %D0"); + else + return ("clr %D0" CR_TAB + "mov %C0,%D1" CR_TAB + "mov %B0,%C1" CR_TAB + "mov %A0,%B1"); + } + + case 16: + { + int reg0 = true_regnum (operands[0]); + int reg1 = true_regnum (operands[1]); + + if (reg0 == reg1 + 2) + return *len = 2, ("clr %C0" CR_TAB + "clr %D0"); + if (AVR_HAVE_MOVW) + return *len = 3, ("movw %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + else + return *len = 4, ("mov %B0,%D1" CR_TAB + "mov %A0,%C1" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + } + + case 24: + return *len = 4, ("mov %A0,%D1" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + + case 31: + *len = 6; + return ("clr %A0" CR_TAB + "sbrc %D0,7" CR_TAB + "inc %A0" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0"); + } + len = t; + } + out_shift_with_cnt ("lsr %D0" CR_TAB + "ror %C0" CR_TAB + "ror %B0" CR_TAB + "ror %A0", insn, operands, len, 4); + return ""; +} + + +/* Output addition of register XOP[0] and compile time constant XOP[2]. + CODE == PLUS: perform addition by using ADD instructions or + CODE == MINUS: perform addition by using SUB instructions: + + XOP[0] = XOP[0] + XOP[2] + + Or perform addition/subtraction with register XOP[2] depending on CODE: + + XOP[0] = XOP[0] +/- XOP[2] + + If PLEN == NULL, print assembler instructions to perform the operation; + otherwise, set *PLEN to the length of the instruction sequence (in words) + printed with PLEN == NULL. XOP[3] is an 8-bit scratch register or NULL_RTX. + Set *PCC to effect on cc0 according to respective CC_* insn attribute. + + CODE_SAT == UNKNOWN: Perform ordinary, non-saturating operation. + CODE_SAT != UNKNOWN: Perform operation and saturate according to CODE_SAT. + If CODE_SAT != UNKNOWN then SIGN contains the sign of the summand resp. + the subtrahend in the original insn, provided it is a compile time constant. + In all other cases, SIGN is 0. + + If OUT_LABEL is true, print the final 0: label which is needed for + saturated addition / subtraction. The only case where OUT_LABEL = false + is useful is for saturated addition / subtraction performed during + fixed-point rounding, cf. `avr_out_round'. */ + +static void +avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, + enum rtx_code code_sat, int sign, bool out_label) +{ + /* MODE of the operation. */ + enum machine_mode mode = GET_MODE (xop[0]); + + /* INT_MODE of the same size. */ + enum machine_mode imode = int_mode_for_mode (mode); + + /* Number of bytes to operate on. */ + int i, n_bytes = GET_MODE_SIZE (mode); + + /* Value (0..0xff) held in clobber register op[3] or -1 if unknown. */ + int clobber_val = -1; + + /* op[0]: 8-bit destination register + op[1]: 8-bit const int + op[2]: 8-bit scratch register */ + rtx op[3]; + + /* Started the operation? Before starting the operation we may skip + adding 0. This is no more true after the operation started because + carry must be taken into account. */ + bool started = false; + + /* Value to add. There are two ways to add VAL: R += VAL and R -= -VAL. */ + rtx xval = xop[2]; + + /* Output a BRVC instruction. Only needed with saturation. */ + bool out_brvc = true; + + if (plen) + *plen = 0; + + if (REG_P (xop[2])) + { + *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_SET_N; + + for (i = 0; i < n_bytes; i++) + { + /* We operate byte-wise on the destination. */ + op[0] = simplify_gen_subreg (QImode, xop[0], mode, i); + op[1] = simplify_gen_subreg (QImode, xop[2], mode, i); + + if (i == 0) + avr_asm_len (code == PLUS ? "add %0,%1" : "sub %0,%1", + op, plen, 1); + else + avr_asm_len (code == PLUS ? "adc %0,%1" : "sbc %0,%1", + op, plen, 1); + } + + if (reg_overlap_mentioned_p (xop[0], xop[2])) + { + gcc_assert (REGNO (xop[0]) == REGNO (xop[2])); + + if (MINUS == code) + return; + } + + goto saturate; + } + + /* Except in the case of ADIW with 16-bit register (see below) + addition does not set cc0 in a usable way. */ + + *pcc = (MINUS == code) ? CC_SET_CZN : CC_CLOBBER; + + if (CONST_FIXED_P (xval)) + xval = avr_to_int_mode (xval); + + /* Adding/Subtracting zero is a no-op. */ + + if (xval == const0_rtx) + { + *pcc = CC_NONE; + return; + } + + if (MINUS == code) + xval = simplify_unary_operation (NEG, imode, xval, imode); + + op[2] = xop[3]; + + if (SS_PLUS == code_sat && MINUS == code + && sign < 0 + && 0x80 == (INTVAL (simplify_gen_subreg (QImode, xval, imode, n_bytes-1)) + & GET_MODE_MASK (QImode))) + { + /* We compute x + 0x80 by means of SUB instructions. We negated the + constant subtrahend above and are left with x - (-128) so that we + need something like SUBI r,128 which does not exist because SUBI sets + V according to the sign of the subtrahend. Notice the only case + where this must be done is when NEG overflowed in case [2s] because + the V computation needs the right sign of the subtrahend. */ + + rtx msb = simplify_gen_subreg (QImode, xop[0], mode, n_bytes-1); + + avr_asm_len ("subi %0,128" CR_TAB + "brmi 0f", &msb, plen, 2); + out_brvc = false; + + goto saturate; + } + + for (i = 0; i < n_bytes; i++) + { + /* We operate byte-wise on the destination. */ + rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i); + rtx xval8 = simplify_gen_subreg (QImode, xval, imode, i); + + /* 8-bit value to operate with this byte. */ + unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode); + + /* Registers R16..R31 can operate with immediate. */ + bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8); + + op[0] = reg8; + op[1] = gen_int_mode (val8, QImode); + + /* To get usable cc0 no low-bytes must have been skipped. */ + + if (i && !started) + *pcc = CC_CLOBBER; + + if (!started + && i % 2 == 0 + && i + 2 <= n_bytes + && test_hard_reg_class (ADDW_REGS, reg8)) + { + rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i); + unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode); + + /* Registers R24, X, Y, Z can use ADIW/SBIW with constants < 64 + i.e. operate word-wise. */ + + if (val16 < 64) + { + if (val16 != 0) + { + started = true; + avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1", + op, plen, 1); + + if (n_bytes == 2 && PLUS == code) + *pcc = CC_SET_ZN; + } + + i++; + continue; + } + } + + if (val8 == 0) + { + if (started) + avr_asm_len (code == PLUS + ? "adc %0,__zero_reg__" : "sbc %0,__zero_reg__", + op, plen, 1); + continue; + } + else if ((val8 == 1 || val8 == 0xff) + && UNKNOWN == code_sat + && !started + && i == n_bytes - 1) + { + avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0", + op, plen, 1); + break; + } + + switch (code) + { + case PLUS: + + gcc_assert (plen != NULL || (op[2] && REG_P (op[2]))); + + if (plen != NULL && UNKNOWN != code_sat) + { + /* This belongs to the x + 0x80 corner case. The code with + ADD instruction is not smaller, thus make this case + expensive so that the caller won't pick it. */ + + *plen += 10; + break; + } + + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len (started ? "adc %0,%2" : "add %0,%2", op, plen, 1); + + break; /* PLUS */ + + case MINUS: + + if (ld_reg_p) + avr_asm_len (started ? "sbci %0,%1" : "subi %0,%1", op, plen, 1); + else + { + gcc_assert (plen != NULL || REG_P (op[2])); + + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len (started ? "sbc %0,%2" : "sub %0,%2", op, plen, 1); + } + + break; /* MINUS */ + + default: + /* Unknown code */ + gcc_unreachable(); + } + + started = true; + + } /* for all sub-bytes */ + + saturate: + + if (UNKNOWN == code_sat) + return; + + *pcc = (int) CC_CLOBBER; + + /* Vanilla addition/subtraction is done. We are left with saturation. + + We have to compute A = A B where A is a register and + B is a register or a non-zero compile time constant CONST. + A is register class "r" if unsigned && B is REG. Otherwise, A is in "d". + B stands for the original operand $2 in INSN. In the case of B = CONST, + SIGN in { -1, 1 } is the sign of B. Otherwise, SIGN is 0. + + CODE is the instruction flavor we use in the asm sequence to perform . + + + unsigned + operation | code | sat if | b is | sat value | case + -----------------+-------+----------+--------------+-----------+------- + + as a + b | add | C == 1 | const, reg | u+ = 0xff | [1u] + + as a - (-b) | sub | C == 0 | const | u+ = 0xff | [2u] + - as a - b | sub | C == 1 | const, reg | u- = 0 | [3u] + - as a + (-b) | add | C == 0 | const | u- = 0 | [4u] + + + signed + operation | code | sat if | b is | sat value | case + -----------------+-------+----------+--------------+-----------+------- + + as a + b | add | V == 1 | const, reg | s+ | [1s] + + as a - (-b) | sub | V == 1 | const | s+ | [2s] + - as a - b | sub | V == 1 | const, reg | s- | [3s] + - as a + (-b) | add | V == 1 | const | s- | [4s] + + s+ = b < 0 ? -0x80 : 0x7f + s- = b < 0 ? 0x7f : -0x80 + + The cases a - b actually perform a - (-(-b)) if B is CONST. + */ + + op[0] = simplify_gen_subreg (QImode, xop[0], mode, n_bytes-1); + op[1] = n_bytes > 1 + ? simplify_gen_subreg (QImode, xop[0], mode, n_bytes-2) + : NULL_RTX; + + bool need_copy = true; + int len_call = 1 + AVR_HAVE_JMP_CALL; + + switch (code_sat) + { + default: + gcc_unreachable(); + + case SS_PLUS: + case SS_MINUS: + + if (out_brvc) + avr_asm_len ("brvc 0f", op, plen, 1); + + if (reg_overlap_mentioned_p (xop[0], xop[2])) + { + /* [1s,reg] */ + + if (n_bytes == 1) + avr_asm_len ("ldi %0,0x7f" CR_TAB + "adc %0,__zero_reg__", op, plen, 2); + else + avr_asm_len ("ldi %0,0x7f" CR_TAB + "ldi %1,0xff" CR_TAB + "adc %1,__zero_reg__" CR_TAB + "adc %0,__zero_reg__", op, plen, 4); + } + else if (sign == 0 && PLUS == code) + { + /* [1s,reg] */ + + op[2] = simplify_gen_subreg (QImode, xop[2], mode, n_bytes-1); + + if (n_bytes == 1) + avr_asm_len ("ldi %0,0x80" CR_TAB + "sbrs %2,7" CR_TAB + "dec %0", op, plen, 3); + else + avr_asm_len ("ldi %0,0x80" CR_TAB + "cp %2,%0" CR_TAB + "sbc %1,%1" CR_TAB + "sbci %0,0", op, plen, 4); + } + else if (sign == 0 && MINUS == code) + { + /* [3s,reg] */ + + op[2] = simplify_gen_subreg (QImode, xop[2], mode, n_bytes-1); + + if (n_bytes == 1) + avr_asm_len ("ldi %0,0x7f" CR_TAB + "sbrs %2,7" CR_TAB + "inc %0", op, plen, 3); + else + avr_asm_len ("ldi %0,0x7f" CR_TAB + "cp %0,%2" CR_TAB + "sbc %1,%1" CR_TAB + "sbci %0,-1", op, plen, 4); + } + else if ((sign < 0) ^ (SS_MINUS == code_sat)) + { + /* [1s,const,B < 0] [2s,B < 0] */ + /* [3s,const,B > 0] [4s,B > 0] */ + + if (n_bytes == 8) + { + avr_asm_len ("%~call __clr_8", op, plen, len_call); + need_copy = false; + } + + avr_asm_len ("ldi %0,0x80", op, plen, 1); + if (n_bytes > 1 && need_copy) + avr_asm_len ("clr %1", op, plen, 1); + } + else if ((sign > 0) ^ (SS_MINUS == code_sat)) + { + /* [1s,const,B > 0] [2s,B > 0] */ + /* [3s,const,B < 0] [4s,B < 0] */ + + if (n_bytes == 8) + { + avr_asm_len ("sec" CR_TAB + "%~call __sbc_8", op, plen, 1 + len_call); + need_copy = false; + } + + avr_asm_len ("ldi %0,0x7f", op, plen, 1); + if (n_bytes > 1 && need_copy) + avr_asm_len ("ldi %1,0xff", op, plen, 1); + } + else + gcc_unreachable(); + + break; + + case US_PLUS: + /* [1u] : [2u] */ + + avr_asm_len (PLUS == code ? "brcc 0f" : "brcs 0f", op, plen, 1); + + if (n_bytes == 8) + { + if (MINUS == code) + avr_asm_len ("sec", op, plen, 1); + avr_asm_len ("%~call __sbc_8", op, plen, len_call); + + need_copy = false; + } + else + { + if (MINUS == code && !test_hard_reg_class (LD_REGS, op[0])) + avr_asm_len ("sec" CR_TAB "sbc %0,%0", op, plen, 2); + else + avr_asm_len (PLUS == code ? "sbc %0,%0" : "ldi %0,0xff", + op, plen, 1); + } + break; /* US_PLUS */ + + case US_MINUS: + /* [4u] : [3u] */ + + avr_asm_len (PLUS == code ? "brcs 0f" : "brcc 0f", op, plen, 1); + + if (n_bytes == 8) + { + avr_asm_len ("%~call __clr_8", op, plen, len_call); + need_copy = false; + } + else + avr_asm_len ("clr %0", op, plen, 1); + + break; + } + + /* We set the MSB in the unsigned case and the 2 MSBs in the signed case. + Now copy the right value to the LSBs. */ + + if (need_copy && n_bytes > 1) + { + if (US_MINUS == code_sat || US_PLUS == code_sat) + { + avr_asm_len ("mov %1,%0", op, plen, 1); + + if (n_bytes > 2) + { + op[0] = xop[0]; + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %0,%1", op, plen, 1); + else + avr_asm_len ("mov %A0,%1" CR_TAB + "mov %B0,%1", op, plen, 2); + } + } + else if (n_bytes > 2) + { + op[0] = xop[0]; + avr_asm_len ("mov %A0,%1" CR_TAB + "mov %B0,%1", op, plen, 2); + } + } + + if (need_copy && n_bytes == 8) + { + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %r0+2,%0" CR_TAB + "movw %r0+4,%0", xop, plen, 2); + else + avr_asm_len ("mov %r0+2,%0" CR_TAB + "mov %r0+3,%0" CR_TAB + "mov %r0+4,%0" CR_TAB + "mov %r0+5,%0", xop, plen, 4); + } + + if (out_label) + avr_asm_len ("0:", op, plen, 0); +} + + +/* Output addition/subtraction of register XOP[0] and a constant XOP[2] that + is ont a compile-time constant: + + XOP[0] = XOP[0] +/- XOP[2] + + This is a helper for the function below. The only insns that need this + are additions/subtraction for pointer modes, i.e. HImode and PSImode. */ + +static const char* +avr_out_plus_symbol (rtx *xop, enum rtx_code code, int *plen, int *pcc) +{ + enum machine_mode mode = GET_MODE (xop[0]); + + /* Only pointer modes want to add symbols. */ + + gcc_assert (mode == HImode || mode == PSImode); + + *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_SET_N; + + avr_asm_len (PLUS == code + ? "subi %A0,lo8(-(%2))" CR_TAB "sbci %B0,hi8(-(%2))" + : "subi %A0,lo8(%2)" CR_TAB "sbci %B0,hi8(%2)", + xop, plen, -2); + + if (PSImode == mode) + avr_asm_len (PLUS == code + ? "sbci %C0,hlo8(-(%2))" + : "sbci %C0,hlo8(%2)", xop, plen, 1); + return ""; +} + + +/* Prepare operands of addition/subtraction to be used with avr_out_plus_1. + + INSN is a single_set insn or an insn pattern with a binary operation as + SET_SRC that is one of: PLUS, SS_PLUS, US_PLUS, MINUS, SS_MINUS, US_MINUS. + + XOP are the operands of INSN. In the case of 64-bit operations with + constant XOP[] has just one element: The summand/subtrahend in XOP[0]. + The non-saturating insns up to 32 bits may or may not supply a "d" class + scratch as XOP[3]. + + If PLEN == NULL output the instructions. + If PLEN != NULL set *PLEN to the length of the sequence in words. + + PCC is a pointer to store the instructions' effect on cc0. + PCC may be NULL. + + PLEN and PCC default to NULL. + + OUT_LABEL defaults to TRUE. For a description, see AVR_OUT_PLUS_1. + + Return "" */ + +const char* +avr_out_plus (rtx insn, rtx *xop, int *plen, int *pcc, bool out_label) +{ + int cc_plus, cc_minus, cc_dummy; + int len_plus, len_minus; + rtx op[4]; + rtx xpattern = INSN_P (insn) ? single_set (insn) : insn; + rtx xdest = SET_DEST (xpattern); + enum machine_mode mode = GET_MODE (xdest); + enum machine_mode imode = int_mode_for_mode (mode); + int n_bytes = GET_MODE_SIZE (mode); + enum rtx_code code_sat = GET_CODE (SET_SRC (xpattern)); + enum rtx_code code + = (PLUS == code_sat || SS_PLUS == code_sat || US_PLUS == code_sat + ? PLUS : MINUS); + + if (!pcc) + pcc = &cc_dummy; + + /* PLUS and MINUS don't saturate: Use modular wrap-around. */ + + if (PLUS == code_sat || MINUS == code_sat) + code_sat = UNKNOWN; + + if (n_bytes <= 4 && REG_P (xop[2])) + { + avr_out_plus_1 (xop, plen, code, pcc, code_sat, 0, out_label); + return ""; + } + + if (8 == n_bytes) + { + op[0] = gen_rtx_REG (DImode, ACC_A); + op[1] = gen_rtx_REG (DImode, ACC_A); + op[2] = avr_to_int_mode (xop[0]); + } + else + { + if (!REG_P (xop[2]) + && !CONST_INT_P (xop[2]) + && !CONST_FIXED_P (xop[2])) + { + return avr_out_plus_symbol (xop, code, plen, pcc); + } + + op[0] = avr_to_int_mode (xop[0]); + op[1] = avr_to_int_mode (xop[1]); + op[2] = avr_to_int_mode (xop[2]); + } + + /* Saturations and 64-bit operations don't have a clobber operand. + For the other cases, the caller will provide a proper XOP[3]. */ + + xpattern = INSN_P (insn) ? PATTERN (insn) : insn; + op[3] = PARALLEL == GET_CODE (xpattern) ? xop[3] : NULL_RTX; + + /* Saturation will need the sign of the original operand. */ + + rtx xmsb = simplify_gen_subreg (QImode, op[2], imode, n_bytes-1); + int sign = INTVAL (xmsb) < 0 ? -1 : 1; + + /* If we subtract and the subtrahend is a constant, then negate it + so that avr_out_plus_1 can be used. */ + + if (MINUS == code) + op[2] = simplify_unary_operation (NEG, imode, op[2], imode); + + /* Work out the shortest sequence. */ + + avr_out_plus_1 (op, &len_minus, MINUS, &cc_minus, code_sat, sign, out_label); + avr_out_plus_1 (op, &len_plus, PLUS, &cc_plus, code_sat, sign, out_label); + + if (plen) + { + *plen = (len_minus <= len_plus) ? len_minus : len_plus; + *pcc = (len_minus <= len_plus) ? cc_minus : cc_plus; + } + else if (len_minus <= len_plus) + avr_out_plus_1 (op, NULL, MINUS, pcc, code_sat, sign, out_label); + else + avr_out_plus_1 (op, NULL, PLUS, pcc, code_sat, sign, out_label); + + return ""; +} + + +/* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile + time constant XOP[2]: + + XOP[0] = XOP[0] XOP[2] + + and return "". If PLEN == NULL, print assembler instructions to perform the + operation; otherwise, set *PLEN to the length of the instruction sequence + (in words) printed with PLEN == NULL. XOP[3] is either an 8-bit clobber + register or SCRATCH if no clobber register is needed for the operation. + INSN is an INSN_P or a pattern of an insn. */ + +const char* +avr_out_bitop (rtx insn, rtx *xop, int *plen) +{ + /* CODE and MODE of the operation. */ + rtx xpattern = INSN_P (insn) ? single_set (insn) : insn; + enum rtx_code code = GET_CODE (SET_SRC (xpattern)); + enum machine_mode mode = GET_MODE (xop[0]); + + /* Number of bytes to operate on. */ + int i, n_bytes = GET_MODE_SIZE (mode); + + /* Value of T-flag (0 or 1) or -1 if unknow. */ + int set_t = -1; + + /* Value (0..0xff) held in clobber register op[3] or -1 if unknown. */ + int clobber_val = -1; + + /* op[0]: 8-bit destination register + op[1]: 8-bit const int + op[2]: 8-bit clobber register or SCRATCH + op[3]: 8-bit register containing 0xff or NULL_RTX */ + rtx op[4]; + + op[2] = xop[3]; + op[3] = NULL_RTX; + + if (plen) + *plen = 0; + + for (i = 0; i < n_bytes; i++) + { + /* We operate byte-wise on the destination. */ + rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i); + rtx xval8 = simplify_gen_subreg (QImode, xop[2], mode, i); + + /* 8-bit value to operate with this byte. */ + unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode); + + /* Number of bits set in the current byte of the constant. */ + int pop8 = avr_popcount (val8); + + /* Registers R16..R31 can operate with immediate. */ + bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8); + + op[0] = reg8; + op[1] = GEN_INT (val8); + + switch (code) + { + case IOR: + + if (0 == pop8) + continue; + else if (ld_reg_p) + avr_asm_len ("ori %0,%1", op, plen, 1); + else if (1 == pop8) + { + if (set_t != 1) + avr_asm_len ("set", op, plen, 1); + set_t = 1; + + op[1] = GEN_INT (exact_log2 (val8)); + avr_asm_len ("bld %0,%1", op, plen, 1); + } + else if (8 == pop8) + { + if (op[3] != NULL_RTX) + avr_asm_len ("mov %0,%3", op, plen, 1); + else + avr_asm_len ("clr %0" CR_TAB + "dec %0", op, plen, 2); + + op[3] = op[0]; + } + else + { + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len ("or %0,%2", op, plen, 1); + } + + continue; /* IOR */ + + case AND: + + if (8 == pop8) + continue; + else if (0 == pop8) + avr_asm_len ("clr %0", op, plen, 1); + else if (ld_reg_p) + avr_asm_len ("andi %0,%1", op, plen, 1); + else if (7 == pop8) + { + if (set_t != 0) + avr_asm_len ("clt", op, plen, 1); + set_t = 0; + + op[1] = GEN_INT (exact_log2 (GET_MODE_MASK (QImode) & ~val8)); + avr_asm_len ("bld %0,%1", op, plen, 1); + } + else + { + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len ("and %0,%2", op, plen, 1); + } + + continue; /* AND */ + + case XOR: + + if (0 == pop8) + continue; + else if (8 == pop8) + avr_asm_len ("com %0", op, plen, 1); + else if (ld_reg_p && val8 == (1 << 7)) + avr_asm_len ("subi %0,%1", op, plen, 1); + else + { + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len ("eor %0,%2", op, plen, 1); + } + + continue; /* XOR */ + + default: + /* Unknown rtx_code */ + gcc_unreachable(); + } + } /* for all sub-bytes */ + + return ""; +} + + +/* PLEN == NULL: Output code to add CONST_INT OP[0] to SP. + PLEN != NULL: Set *PLEN to the length of that sequence. + Return "". */ + +const char* +avr_out_addto_sp (rtx *op, int *plen) +{ + int pc_len = AVR_2_BYTE_PC ? 2 : 3; + int addend = INTVAL (op[0]); + + if (plen) + *plen = 0; + + if (addend < 0) + { + if (flag_verbose_asm || flag_print_asm_name) + avr_asm_len (ASM_COMMENT_START "SP -= %n0", op, plen, 0); + + while (addend <= -pc_len) + { + addend += pc_len; + avr_asm_len ("rcall .", op, plen, 1); + } + + while (addend++ < 0) + avr_asm_len ("push __zero_reg__", op, plen, 1); + } + else if (addend > 0) + { + if (flag_verbose_asm || flag_print_asm_name) + avr_asm_len (ASM_COMMENT_START "SP += %0", op, plen, 0); + + while (addend-- > 0) + avr_asm_len ("pop __tmp_reg__", op, plen, 1); + } + + return ""; +} + + +/* Outputs instructions needed for fixed point type conversion. + This includes converting between any fixed point type, as well + as converting to any integer type. Conversion between integer + types is not supported. + + Converting signed fractional types requires a bit shift if converting + to or from any unsigned fractional type because the decimal place is + shifted by 1 bit. When the destination is a signed fractional, the sign + is stored in either the carry or T bit. */ + +const char* +avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) +{ + size_t i; + rtx xop[6]; + RTX_CODE shift = UNKNOWN; + bool sign_in_carry = false; + bool msb_in_carry = false; + bool lsb_in_tmp_reg = false; + bool lsb_in_carry = false; + bool frac_rounded = false; + const char *code_ashift = "lsl %0"; + + +#define MAY_CLOBBER(RR) \ + /* Shorthand used below. */ \ + ((sign_bytes \ + && IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb)) \ + || (offset && IN_RANGE (RR, dest.regno, dest.regno_msb)) \ + || (reg_unused_after (insn, all_regs_rtx[RR]) \ + && !IN_RANGE (RR, dest.regno, dest.regno_msb))) + + struct + { + /* bytes : Length of operand in bytes. + ibyte : Length of integral part in bytes. + fbyte, fbit : Length of fractional part in bytes, bits. */ + + bool sbit; + unsigned fbit, bytes, ibyte, fbyte; + unsigned regno, regno_msb; + } dest, src, *val[2] = { &dest, &src }; + + if (plen) + *plen = 0; + + /* Step 0: Determine information on source and destination operand we + ====== will need in the remainder. */ + + for (i = 0; i < sizeof (val) / sizeof (*val); i++) + { + enum machine_mode mode; + + xop[i] = operands[i]; + + mode = GET_MODE (xop[i]); + + val[i]->bytes = GET_MODE_SIZE (mode); + val[i]->regno = REGNO (xop[i]); + val[i]->regno_msb = REGNO (xop[i]) + val[i]->bytes - 1; + + if (SCALAR_INT_MODE_P (mode)) + { + val[i]->sbit = intsigned; + val[i]->fbit = 0; + } + else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode)) + { + val[i]->sbit = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode); + val[i]->fbit = GET_MODE_FBIT (mode); + } + else + fatal_insn ("unsupported fixed-point conversion", insn); + + val[i]->fbyte = (1 + val[i]->fbit) / BITS_PER_UNIT; + val[i]->ibyte = val[i]->bytes - val[i]->fbyte; + } + + // Byte offset of the decimal point taking into account different place + // of the decimal point in input and output and different register numbers + // of input and output. + int offset = dest.regno - src.regno + dest.fbyte - src.fbyte; + + // Number of destination bytes that will come from sign / zero extension. + int sign_bytes = (dest.ibyte - src.ibyte) * (dest.ibyte > src.ibyte); + + // Number of bytes at the low end to be filled with zeros. + int zero_bytes = (dest.fbyte - src.fbyte) * (dest.fbyte > src.fbyte); + + // Do we have a 16-Bit register that is cleared? + rtx clrw = NULL_RTX; + + bool sign_extend = src.sbit && sign_bytes; + + if (0 == dest.fbit % 8 && 7 == src.fbit % 8) + shift = ASHIFT; + else if (7 == dest.fbit % 8 && 0 == src.fbit % 8) + shift = ASHIFTRT; + else if (dest.fbit % 8 == src.fbit % 8) + shift = UNKNOWN; + else + gcc_unreachable(); + + /* If we need to round the fraction part, we might need to save/round it + before clobbering any of it in Step 1. Also, we might to want to do + the rounding now to make use of LD_REGS. */ + if (SCALAR_INT_MODE_P (GET_MODE (xop[0])) + && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1])) + && !TARGET_FRACT_CONV_TRUNC) + { + bool overlap + = (src.regno <= + (offset ? dest.regno_msb - sign_bytes : dest.regno + zero_bytes - 1) + && dest.regno - offset -1 >= dest.regno); + unsigned s0 = dest.regno - offset -1; + bool use_src = true; + unsigned sn; + unsigned copied_msb = src.regno_msb; + bool have_carry = false; + + if (src.ibyte > dest.ibyte) + copied_msb -= src.ibyte - dest.ibyte; + + for (sn = s0; sn <= copied_msb; sn++) + if (!IN_RANGE (sn, dest.regno, dest.regno_msb) + && !reg_unused_after (insn, all_regs_rtx[sn])) + use_src = false; + if (use_src && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0)) + { + avr_asm_len ("tst %0" CR_TAB "brpl 0f", + &all_regs_rtx[src.regno_msb], plen, 2); + sn = src.regno; + if (sn < s0) + { + if (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], sn)) + avr_asm_len ("cpi %0,1", &all_regs_rtx[sn], plen, 1); + else + avr_asm_len ("sec" CR_TAB "cpc %0,__zero_reg__", + &all_regs_rtx[sn], plen, 2); + have_carry = true; + } + while (++sn < s0) + avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + avr_asm_len (have_carry ? "sbci %0,128" : "subi %0,129", + &all_regs_rtx[s0], plen, 1); + for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++) + avr_asm_len ("sbci %0,255", &all_regs_rtx[sn], plen, 1); + avr_asm_len ("\n0:", NULL, plen, 0); + frac_rounded = true; + } + else if (use_src && overlap) + { + avr_asm_len ("clr __tmp_reg__" CR_TAB + "sbrc %1,0" CR_TAB "dec __tmp_reg__", xop, plen, 1); + sn = src.regno; + if (sn < s0) + { + avr_asm_len ("add %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1); + have_carry = true; + } + while (++sn < s0) + avr_asm_len ("adc %0,__tmp_reg__", &all_regs_rtx[sn], plen, 1); + if (have_carry) + avr_asm_len ("clt" CR_TAB "bld __tmp_reg__,7" CR_TAB + "adc %0,__tmp_reg__", + &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("lsr __tmp_reg" CR_TAB "add %0,__tmp_reg__", + &all_regs_rtx[s0], plen, 2); + for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++) + avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + frac_rounded = true; + } + else if (overlap) + { + bool use_src + = (TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], s0) + && (IN_RANGE (s0, dest.regno, dest.regno_msb) + || reg_unused_after (insn, all_regs_rtx[s0]))); + xop[2] = all_regs_rtx[s0]; + unsigned sn = src.regno; + if (!use_src || sn == s0) + avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1); + /* We need to consider to-be-discarded bits + if the value is negative. */ + if (sn < s0) + { + avr_asm_len ("tst %0" CR_TAB "brpl 0f", + &all_regs_rtx[src.regno_msb], plen, 2); + /* Test to-be-discarded bytes for any nozero bits. + ??? Could use OR or SBIW to test two registers at once. */ + if (sn < s0) + avr_asm_len ("cp %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + while (++sn < s0) + avr_asm_len ("cpc %0,__zero_reg__", &all_regs_rtx[sn], plen, 1); + /* Set bit 0 in __tmp_reg__ if any of the lower bits was set. */ + if (use_src) + avr_asm_len ("breq 0f" CR_TAB + "ori %2,1" "\n0:\t" "mov __tmp_reg__,%2", + xop, plen, 3); + else + avr_asm_len ("breq 0f" CR_TAB + "set" CR_TAB "bld __tmp_reg__,0\n0:", + xop, plen, 3); + } + lsb_in_tmp_reg = true; + } + } + + /* Step 1: Clear bytes at the low end and copy payload bits from source + ====== to destination. */ + + int step = offset < 0 ? 1 : -1; + unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb; + + // We cleared at least that number of registers. + int clr_n = 0; + + for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step) + { + // Next regno of destination is needed for MOVW + unsigned d1 = d0 + step; + + // Current and next regno of source + signed s0 = d0 - offset; + signed s1 = s0 + step; + + // Must current resp. next regno be CLRed? This applies to the low + // bytes of the destination that have no associated source bytes. + bool clr0 = s0 < (signed) src.regno; + bool clr1 = s1 < (signed) src.regno && d1 >= dest.regno; + + // First gather what code to emit (if any) and additional step to + // apply if a MOVW is in use. xop[2] is destination rtx and xop[3] + // is the source rtx for the current loop iteration. + const char *code = NULL; + int stepw = 0; + + if (clr0) + { + if (AVR_HAVE_MOVW && clr1 && clrw) + { + xop[2] = all_regs_rtx[d0 & ~1]; + xop[3] = clrw; + code = "movw %2,%3"; + stepw = step; + } + else + { + xop[2] = all_regs_rtx[d0]; + code = "clr %2"; + + if (++clr_n >= 2 + && !clrw + && d0 % 2 == (step > 0)) + { + clrw = all_regs_rtx[d0 & ~1]; + } + } + } + else if (offset && s0 <= (signed) src.regno_msb) + { + int movw = AVR_HAVE_MOVW && offset % 2 == 0 + && d0 % 2 == (offset > 0) + && d1 <= dest.regno_msb && d1 >= dest.regno + && s1 <= (signed) src.regno_msb && s1 >= (signed) src.regno; + + xop[2] = all_regs_rtx[d0 & ~movw]; + xop[3] = all_regs_rtx[s0 & ~movw]; + code = movw ? "movw %2,%3" : "mov %2,%3"; + stepw = step * movw; + } + + if (code) + { + if (sign_extend && shift != ASHIFT && !sign_in_carry + && (d0 == src.regno_msb || d0 + stepw == src.regno_msb)) + { + /* We are going to override the sign bit. If we sign-extend, + store the sign in the Carry flag. This is not needed if + the destination will be ASHIFT is the remainder because + the ASHIFT will set Carry without extra instruction. */ + + avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1); + sign_in_carry = true; + } + + unsigned src_msb = dest.regno_msb - sign_bytes - offset + 1; + + if (!sign_extend && shift == ASHIFTRT && !msb_in_carry + && src.ibyte > dest.ibyte + && (d0 == src_msb || d0 + stepw == src_msb)) + { + /* We are going to override the MSB. If we shift right, + store the MSB in the Carry flag. This is only needed if + we don't sign-extend becaue with sign-extension the MSB + (the sign) will be produced by the sign extension. */ + + avr_asm_len ("lsr %0", &all_regs_rtx[src_msb], plen, 1); + msb_in_carry = true; + } + + unsigned src_lsb = dest.regno - offset -1; + + if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry + && !lsb_in_tmp_reg + && (d0 == src_lsb || d0 + stepw == src_lsb)) + { + /* We are going to override the new LSB; store it into carry. */ + + avr_asm_len ("lsl %0", &all_regs_rtx[src_lsb], plen, 1); + code_ashift = "rol %0"; + lsb_in_carry = true; + } + + avr_asm_len (code, xop, plen, 1); + d0 += stepw; + } + } + + /* Step 2: Shift destination left by 1 bit position. This might be needed + ====== for signed input and unsigned output. */ + + if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry) + { + unsigned s0 = dest.regno - offset -1; + + /* n1169 4.1.4 says: + "Conversions from a fixed-point to an integer type round toward zero." + Hence, converting a fract type to integer only gives a non-zero result + for -1. */ + if (SCALAR_INT_MODE_P (GET_MODE (xop[0])) + && SCALAR_FRACT_MODE_P (GET_MODE (xop[1])) + && !TARGET_FRACT_CONV_TRUNC) + { + gcc_assert (s0 == src.regno_msb); + /* Check if the input is -1. We do that by checking if negating + the input causes an integer overflow. */ + unsigned sn = src.regno; + avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1); + while (sn <= s0) + avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn++], plen, 1); + + /* Overflow goes with set carry. Clear carry otherwise. */ + avr_asm_len ("brvs 0f" CR_TAB "clc\n0:", NULL, plen, 2); + } + /* Likewise, when converting from accumulator types to integer, we + need to round up negative values. */ + else if (SCALAR_INT_MODE_P (GET_MODE (xop[0])) + && SCALAR_ACCUM_MODE_P (GET_MODE (xop[1])) + && !TARGET_FRACT_CONV_TRUNC + && !frac_rounded) + { + bool have_carry = false; + + xop[2] = all_regs_rtx[s0]; + if (!lsb_in_tmp_reg && !MAY_CLOBBER (s0)) + avr_asm_len ("mov __tmp_reg__,%2", xop, plen, 1); + avr_asm_len ("tst %0" CR_TAB "brpl 0f", + &all_regs_rtx[src.regno_msb], plen, 2); + if (!lsb_in_tmp_reg) + { + unsigned sn = src.regno; + if (sn < s0) + { + avr_asm_len ("cp __zero_reg__,%0", &all_regs_rtx[sn], + plen, 1); + have_carry = true; + } + while (++sn < s0) + avr_asm_len ("cpc __zero_reg__,%0", &all_regs_rtx[sn], plen, 1); + lsb_in_tmp_reg = !MAY_CLOBBER (s0); + } + /* Add in C and the rounding value 127. */ + /* If the destination msb is a sign byte, and in LD_REGS, + grab it as a temporary. */ + if (sign_bytes + && TEST_HARD_REG_BIT (reg_class_contents[LD_REGS], + dest.regno_msb)) + { + xop[3] = all_regs_rtx[dest.regno_msb]; + avr_asm_len ("ldi %3,127", xop, plen, 1); + avr_asm_len ((have_carry && lsb_in_tmp_reg ? "adc __tmp_reg__,%3" + : have_carry ? "adc %2,%3" + : lsb_in_tmp_reg ? "add __tmp_reg__,%3" + : "add %2,%3"), + xop, plen, 1); + } + else + { + /* Fall back to use __zero_reg__ as a temporary. */ + avr_asm_len ("dec __zero_reg__", NULL, plen, 1); + if (have_carry) + avr_asm_len ("clt" CR_TAB "bld __zero_reg__,7", NULL, plen, 2); + else + avr_asm_len ("lsr __zero_reg__", NULL, plen, 1); + avr_asm_len ((have_carry && lsb_in_tmp_reg + ? "adc __tmp_reg__,__zero_reg__" + : have_carry ? "adc %2,__zero_reg__" + : lsb_in_tmp_reg ? "add __tmp_reg__,__zero_reg__" + : "add %2,__zero_reg__"), + xop, plen, 1); + avr_asm_len ("eor __zero_reg__,__zero_reg__", NULL, plen, 1); + } + for (d0 = dest.regno + zero_bytes; + d0 <= dest.regno_msb - sign_bytes; d0++) + avr_asm_len ("adc %0,__zero_reg__", &all_regs_rtx[d0], plen, 1); + avr_asm_len (lsb_in_tmp_reg + ? "\n0:\t" "lsl __tmp_reg__" : "\n0:\t" "lsl %2", + xop, plen, 1); + } + else if (MAY_CLOBBER (s0)) + avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("mov __tmp_reg__,%0" CR_TAB + "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2); + + code_ashift = "rol %0"; + lsb_in_carry = true; + } + + if (shift == ASHIFT) + { + for (d0 = dest.regno + zero_bytes; + d0 <= dest.regno_msb - sign_bytes; d0++) + { + avr_asm_len (code_ashift, &all_regs_rtx[d0], plen, 1); + code_ashift = "rol %0"; + } + + lsb_in_carry = false; + sign_in_carry = true; + } + + /* Step 4a: Store MSB in carry if we don't already have it or will produce + ======= it in sign-extension below. */ + + if (!sign_extend && shift == ASHIFTRT && !msb_in_carry + && src.ibyte > dest.ibyte) + { + unsigned s0 = dest.regno_msb - sign_bytes - offset + 1; + + if (MAY_CLOBBER (s0)) + avr_asm_len ("lsr %0", &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("mov __tmp_reg__,%0" CR_TAB + "lsr __tmp_reg__", &all_regs_rtx[s0], plen, 2); + + msb_in_carry = true; + } + + /* Step 3: Sign-extend or zero-extend the destination as needed. + ====== */ + + if (sign_extend && !sign_in_carry) + { + unsigned s0 = src.regno_msb; + + if (MAY_CLOBBER (s0)) + avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("mov __tmp_reg__,%0" CR_TAB + "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2); + + sign_in_carry = true; + } + + gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1); + + unsigned copies = 0; + rtx movw = sign_extend ? NULL_RTX : clrw; + + for (d0 = dest.regno_msb - sign_bytes + 1; d0 <= dest.regno_msb; d0++) + { + if (AVR_HAVE_MOVW && movw + && d0 % 2 == 0 && d0 + 1 <= dest.regno_msb) + { + xop[2] = all_regs_rtx[d0]; + xop[3] = movw; + avr_asm_len ("movw %2,%3", xop, plen, 1); + d0++; + } + else + { + avr_asm_len (sign_extend ? "sbc %0,%0" : "clr %0", + &all_regs_rtx[d0], plen, 1); + + if (++copies >= 2 && !movw && d0 % 2 == 1) + movw = all_regs_rtx[d0-1]; + } + } /* for */ + + + /* Step 4: Right shift the destination. This might be needed for + ====== conversions from unsigned to signed. */ + + if (shift == ASHIFTRT) + { + const char *code_ashiftrt = "lsr %0"; + + if (sign_extend || msb_in_carry) + code_ashiftrt = "ror %0"; + + if (src.sbit && src.ibyte == dest.ibyte) + code_ashiftrt = "asr %0"; + + for (d0 = dest.regno_msb - sign_bytes; + d0 >= dest.regno + zero_bytes - 1 && d0 >= dest.regno; d0--) + { + avr_asm_len (code_ashiftrt, &all_regs_rtx[d0], plen, 1); + code_ashiftrt = "ror %0"; + } + } + +#undef MAY_CLOBBER + + return ""; +} + + +/* Output fixed-point rounding. XOP[0] = XOP[1] is the operand to round. + XOP[2] is the rounding point, a CONST_INT. The function prints the + instruction sequence if PLEN = NULL and computes the length in words + of the sequence if PLEN != NULL. Most of this function deals with + preparing operands for calls to `avr_out_plus' and `avr_out_bitop'. */ + +const char* +avr_out_round (rtx insn ATTRIBUTE_UNUSED, rtx *xop, int *plen) +{ + enum machine_mode mode = GET_MODE (xop[0]); + enum machine_mode imode = int_mode_for_mode (mode); + // The smallest fractional bit not cleared by the rounding is 2^(-RP). + int fbit = (int) GET_MODE_FBIT (mode); + double_int i_add = double_int_zero.set_bit (fbit-1 - INTVAL (xop[2])); + // Lengths of PLUS and AND parts. + int len_add = 0, *plen_add = plen ? &len_add : NULL; + int len_and = 0, *plen_and = plen ? &len_and : NULL; + + // Add-Saturate 1/2 * 2^(-RP). Don't print the label "0:" when printing + // the saturated addition so that we can emit the "rjmp 1f" before the + // "0:" below. + + rtx xadd = const_fixed_from_double_int (i_add, mode); + rtx xpattern, xsrc, op[4]; + + xsrc = SIGNED_FIXED_POINT_MODE_P (mode) + ? gen_rtx_SS_PLUS (mode, xop[1], xadd) + : gen_rtx_US_PLUS (mode, xop[1], xadd); + xpattern = gen_rtx_SET (VOIDmode, xop[0], xsrc); + + op[0] = xop[0]; + op[1] = xop[1]; + op[2] = xadd; + avr_out_plus (xpattern, op, plen_add, NULL, false /* Don't print "0:" */); + + avr_asm_len ("rjmp 1f" CR_TAB + "0:", NULL, plen_add, 1); + + // Keep all bits from RP and higher: ... 2^(-RP) + // Clear all bits from RP+1 and lower: 2^(-RP-1) ... + // Rounding point ^^^^^^^ + // Added above ^^^^^^^^^ + rtx xreg = simplify_gen_subreg (imode, xop[0], mode, 0); + rtx xmask = immed_double_int_const (-i_add - i_add, imode); + + xpattern = gen_rtx_SET (VOIDmode, xreg, gen_rtx_AND (imode, xreg, xmask)); + + op[0] = xreg; + op[1] = xreg; + op[2] = xmask; + op[3] = gen_rtx_SCRATCH (QImode); + avr_out_bitop (xpattern, op, plen_and); + avr_asm_len ("1:", NULL, plen, 0); + + if (plen) + *plen = len_add + len_and; + + return ""; +} + + +/* Create RTL split patterns for byte sized rotate expressions. This + produces a series of move instructions and considers overlap situations. + Overlapping non-HImode operands need a scratch register. */ + +bool +avr_rotate_bytes (rtx operands[]) +{ + int i, j; + enum machine_mode mode = GET_MODE (operands[0]); + bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]); + bool same_reg = rtx_equal_p (operands[0], operands[1]); + int num = INTVAL (operands[2]); + rtx scratch = operands[3]; + /* Work out if byte or word move is needed. Odd byte rotates need QImode. + Word move if no scratch is needed, otherwise use size of scratch. */ + enum machine_mode move_mode = QImode; + int move_size, offset, size; + + if (num & 0xf) + move_mode = QImode; + else if ((mode == SImode && !same_reg) || !overlapped) + move_mode = HImode; + else + move_mode = GET_MODE (scratch); + + /* Force DI rotate to use QI moves since other DI moves are currently split + into QI moves so forward propagation works better. */ + if (mode == DImode) + move_mode = QImode; + /* Make scratch smaller if needed. */ + if (SCRATCH != GET_CODE (scratch) + && HImode == GET_MODE (scratch) + && QImode == move_mode) + scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); + + move_size = GET_MODE_SIZE (move_mode); + /* Number of bytes/words to rotate. */ + offset = (num >> 3) / move_size; + /* Number of moves needed. */ + size = GET_MODE_SIZE (mode) / move_size; + /* Himode byte swap is special case to avoid a scratch register. */ + if (mode == HImode && same_reg) + { + /* HImode byte swap, using xor. This is as quick as using scratch. */ + rtx src, dst; + src = simplify_gen_subreg (move_mode, operands[1], mode, 0); + dst = simplify_gen_subreg (move_mode, operands[0], mode, 1); + if (!rtx_equal_p (dst, src)) + { + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + emit_move_insn (src, gen_rtx_XOR (QImode, src, dst)); + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + } + } + else + { +#define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode) */ + /* Create linked list of moves to determine move order. */ + struct { + rtx src, dst; + int links; + } move[MAX_SIZE + 8]; + int blocked, moves; + + gcc_assert (size <= MAX_SIZE); + /* Generate list of subreg moves. */ + for (i = 0; i < size; i++) + { + int from = i; + int to = (from + offset) % size; + move[i].src = simplify_gen_subreg (move_mode, operands[1], + mode, from * move_size); + move[i].dst = simplify_gen_subreg (move_mode, operands[0], + mode, to * move_size); + move[i].links = -1; + } + /* Mark dependence where a dst of one move is the src of another move. + The first move is a conflict as it must wait until second is + performed. We ignore moves to self - we catch this later. */ + if (overlapped) + for (i = 0; i < size; i++) + if (reg_overlap_mentioned_p (move[i].dst, operands[1])) + for (j = 0; j < size; j++) + if (j != i && rtx_equal_p (move[j].src, move[i].dst)) + { + /* The dst of move i is the src of move j. */ + move[i].links = j; + break; + } + + blocked = -1; + moves = 0; + /* Go through move list and perform non-conflicting moves. As each + non-overlapping move is made, it may remove other conflicts + so the process is repeated until no conflicts remain. */ + do + { + blocked = -1; + moves = 0; + /* Emit move where dst is not also a src or we have used that + src already. */ + for (i = 0; i < size; i++) + if (move[i].src != NULL_RTX) + { + if (move[i].links == -1 + || move[move[i].links].src == NULL_RTX) + { + moves++; + /* Ignore NOP moves to self. */ + if (!rtx_equal_p (move[i].dst, move[i].src)) + emit_move_insn (move[i].dst, move[i].src); + + /* Remove conflict from list. */ + move[i].src = NULL_RTX; + } + else + blocked = i; + } + + /* Check for deadlock. This is when no moves occurred and we have + at least one blocked move. */ + if (moves == 0 && blocked != -1) + { + /* Need to use scratch register to break deadlock. + Add move to put dst of blocked move into scratch. + When this move occurs, it will break chain deadlock. + The scratch register is substituted for real move. */ + + gcc_assert (SCRATCH != GET_CODE (scratch)); + + move[size].src = move[blocked].dst; + move[size].dst = scratch; + /* Scratch move is never blocked. */ + move[size].links = -1; + /* Make sure we have valid link. */ + gcc_assert (move[blocked].links != -1); + /* Replace src of blocking move with scratch reg. */ + move[move[blocked].links].src = scratch; + /* Make dependent on scratch move occurring. */ + move[blocked].links = size; + size=size+1; + } + } + while (blocked != -1); + } + return true; +} + + +/* Worker function for `ADJUST_INSN_LENGTH'. */ +/* Modifies the length assigned to instruction INSN + LEN is the initially computed length of the insn. */ + +int +avr_adjust_insn_length (rtx insn, int len) +{ + rtx *op = recog_data.operand; + enum attr_adjust_len adjust_len; + + /* Some complex insns don't need length adjustment and therefore + the length need not/must not be adjusted for these insns. + It is easier to state this in an insn attribute "adjust_len" than + to clutter up code here... */ + + if (-1 == recog_memoized (insn)) + { + return len; + } + + /* Read from insn attribute "adjust_len" if/how length is to be adjusted. */ + + adjust_len = get_attr_adjust_len (insn); + + if (adjust_len == ADJUST_LEN_NO) + { + /* Nothing to adjust: The length from attribute "length" is fine. + This is the default. */ + + return len; + } + + /* Extract insn's operands. */ + + extract_constrain_insn_cached (insn); + + /* Dispatch to right function. */ + + switch (adjust_len) + { + case ADJUST_LEN_RELOAD_IN16: output_reload_inhi (op, op[2], &len); break; + case ADJUST_LEN_RELOAD_IN24: avr_out_reload_inpsi (op, op[2], &len); break; + case ADJUST_LEN_RELOAD_IN32: output_reload_insisf (op, op[2], &len); break; + + case ADJUST_LEN_OUT_BITOP: avr_out_bitop (insn, op, &len); break; + + case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break; + case ADJUST_LEN_ADDTO_SP: avr_out_addto_sp (op, &len); break; + + case ADJUST_LEN_MOV8: output_movqi (insn, op, &len); break; + case ADJUST_LEN_MOV16: output_movhi (insn, op, &len); break; + case ADJUST_LEN_MOV24: avr_out_movpsi (insn, op, &len); break; + case ADJUST_LEN_MOV32: output_movsisf (insn, op, &len); break; + case ADJUST_LEN_MOVMEM: avr_out_movmem (insn, op, &len); break; + case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break; + case ADJUST_LEN_LPM: avr_out_lpm (insn, op, &len); break; + + case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break; + case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break; + case ADJUST_LEN_ROUND: avr_out_round (insn, op, &len); break; + + case ADJUST_LEN_TSTHI: avr_out_tsthi (insn, op, &len); break; + case ADJUST_LEN_TSTPSI: avr_out_tstpsi (insn, op, &len); break; + case ADJUST_LEN_TSTSI: avr_out_tstsi (insn, op, &len); break; + case ADJUST_LEN_COMPARE: avr_out_compare (insn, op, &len); break; + case ADJUST_LEN_COMPARE64: avr_out_compare64 (insn, op, &len); break; + + case ADJUST_LEN_LSHRQI: lshrqi3_out (insn, op, &len); break; + case ADJUST_LEN_LSHRHI: lshrhi3_out (insn, op, &len); break; + case ADJUST_LEN_LSHRSI: lshrsi3_out (insn, op, &len); break; + + case ADJUST_LEN_ASHRQI: ashrqi3_out (insn, op, &len); break; + case ADJUST_LEN_ASHRHI: ashrhi3_out (insn, op, &len); break; + case ADJUST_LEN_ASHRSI: ashrsi3_out (insn, op, &len); break; + + case ADJUST_LEN_ASHLQI: ashlqi3_out (insn, op, &len); break; + case ADJUST_LEN_ASHLHI: ashlhi3_out (insn, op, &len); break; + case ADJUST_LEN_ASHLSI: ashlsi3_out (insn, op, &len); break; + + case ADJUST_LEN_ASHLPSI: avr_out_ashlpsi3 (insn, op, &len); break; + case ADJUST_LEN_ASHRPSI: avr_out_ashrpsi3 (insn, op, &len); break; + case ADJUST_LEN_LSHRPSI: avr_out_lshrpsi3 (insn, op, &len); break; + + case ADJUST_LEN_CALL: len = AVR_HAVE_JMP_CALL ? 2 : 1; break; + + case ADJUST_LEN_INSERT_BITS: avr_out_insert_bits (op, &len); break; + + default: + gcc_unreachable(); + } + + return len; +} + +/* Return nonzero if register REG dead after INSN. */ + +int +reg_unused_after (rtx insn, rtx reg) +{ + return (dead_or_set_p (insn, reg) + || (REG_P(reg) && _reg_unused_after (insn, reg))); +} + +/* Return nonzero if REG is not used after INSN. + We assume REG is a reload reg, and therefore does + not live past labels. It may live past calls or jumps though. */ + +int +_reg_unused_after (rtx insn, rtx reg) +{ + enum rtx_code code; + rtx set; + + /* If the reg is set by this instruction, then it is safe for our + case. Disregard the case where this is a store to memory, since + we are checking a register used in the store address. */ + set = single_set (insn); + if (set && GET_CODE (SET_DEST (set)) != MEM + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; + + while ((insn = NEXT_INSN (insn))) + { + rtx set; + code = GET_CODE (insn); + +#if 0 + /* If this is a label that existed before reload, then the register + if dead here. However, if this is a label added by reorg, then + the register may still be live here. We can't tell the difference, + so we just ignore labels completely. */ + if (code == CODE_LABEL) + return 1; + /* else */ +#endif + + if (!INSN_P (insn)) + continue; + + if (code == JUMP_INSN) + return 0; + + /* If this is a sequence, we must handle them all at once. + We could have for instance a call that sets the target register, + and an insn in a delay slot that uses the register. In this case, + we must return 0. */ + else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) + { + int i; + int retval = 0; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx this_insn = XVECEXP (PATTERN (insn), 0, i); + rtx set = single_set (this_insn); + + if (CALL_P (this_insn)) + code = CALL_INSN; + else if (JUMP_P (this_insn)) + { + if (INSN_ANNULLED_BRANCH_P (this_insn)) + return 0; + code = JUMP_INSN; + } + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + { + if (GET_CODE (SET_DEST (set)) != MEM) + retval = 1; + else + return 0; + } + if (set == 0 + && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) + return 0; + } + if (retval == 1) + return 1; + else if (code == JUMP_INSN) + return 0; + } + + if (code == CALL_INSN) + { + rtx tem; + for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1)) + if (GET_CODE (XEXP (tem, 0)) == USE + && REG_P (XEXP (XEXP (tem, 0), 0)) + && reg_overlap_mentioned_p (reg, XEXP (XEXP (tem, 0), 0))) + return 0; + if (call_used_regs[REGNO (reg)]) + return 1; + } + + set = single_set (insn); + + if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) + return 0; + if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return GET_CODE (SET_DEST (set)) != MEM; + if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return 0; + } + return 1; +} + + +/* Implement `TARGET_ASM_INTEGER'. */ +/* Target hook for assembling integer objects. The AVR version needs + special handling for references to certain labels. */ + +static bool +avr_assemble_integer (rtx x, unsigned int size, int aligned_p) +{ + if (size == POINTER_SIZE / BITS_PER_UNIT && aligned_p + && text_segment_operand (x, VOIDmode)) + { + fputs ("\t.word\tgs(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")\n", asm_out_file); + + return true; + } + else if (GET_MODE (x) == PSImode) + { + /* This needs binutils 2.23+, see PR binutils/13503 */ + + fputs ("\t.byte\tlo8(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")" ASM_COMMENT_START "need binutils PR13503\n", asm_out_file); + + fputs ("\t.byte\thi8(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")" ASM_COMMENT_START "need binutils PR13503\n", asm_out_file); + + fputs ("\t.byte\thh8(", asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")" ASM_COMMENT_START "need binutils PR13503\n", asm_out_file); + + return true; + } + else if (CONST_FIXED_P (x)) + { + unsigned n; + + /* varasm fails to handle big fixed modes that don't fit in hwi. */ + + for (n = 0; n < size; n++) + { + rtx xn = simplify_gen_subreg (QImode, x, GET_MODE (x), n); + default_assemble_integer (xn, 1, aligned_p); + } + + return true; + } + + return default_assemble_integer (x, size, aligned_p); +} + + +/* Implement `TARGET_CLASS_LIKELY_SPILLED_P'. */ +/* Return value is nonzero if pseudos that have been + assigned to registers of class CLASS would likely be spilled + because registers of CLASS are needed for spill registers. */ + +static bool +avr_class_likely_spilled_p (reg_class_t c) +{ + return (c != ALL_REGS && c != ADDW_REGS); +} + + +/* Valid attributes: + progmem - Put data to program memory. + signal - Make a function to be hardware interrupt. + After function prologue interrupts remain disabled. + interrupt - Make a function to be hardware interrupt. Before function + prologue interrupts are enabled by means of SEI. + naked - Don't generate function prologue/epilogue and RET + instruction. */ + +/* Handle a "progmem" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +avr_handle_progmem_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (DECL_P (*node)) + { + if (TREE_CODE (*node) == TYPE_DECL) + { + /* This is really a decl attribute, not a type attribute, + but try to handle it for GCC 3.0 backwards compatibility. */ + + tree type = TREE_TYPE (*node); + tree attr = tree_cons (name, args, TYPE_ATTRIBUTES (type)); + tree newtype = build_type_attribute_variant (type, attr); + + TYPE_MAIN_VARIANT (newtype) = TYPE_MAIN_VARIANT (type); + TREE_TYPE (*node) = newtype; + *no_add_attrs = true; + } + else if (TREE_STATIC (*node) || DECL_EXTERNAL (*node)) + { + *no_add_attrs = false; + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + } + + return NULL_TREE; +} + +/* Handle an attribute requiring a FUNCTION_DECL; arguments as in + struct attribute_spec.handler. */ + +static tree +avr_handle_fndecl_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +static tree +avr_handle_fntype_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + + +/* AVR attributes. */ +static const struct attribute_spec +avr_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "progmem", 0, 0, false, false, false, avr_handle_progmem_attribute, + false }, + { "signal", 0, 0, true, false, false, avr_handle_fndecl_attribute, + false }, + { "interrupt", 0, 0, true, false, false, avr_handle_fndecl_attribute, + false }, + { "naked", 0, 0, false, true, true, avr_handle_fntype_attribute, + false }, + { "OS_task", 0, 0, false, true, true, avr_handle_fntype_attribute, + false }, + { "OS_main", 0, 0, false, true, true, avr_handle_fntype_attribute, + false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; + + +/* Look if DECL shall be placed in program memory space by + means of attribute `progmem' or some address-space qualifier. + Return non-zero if DECL is data that must end up in Flash and + zero if the data lives in RAM (.bss, .data, .rodata, ...). + + Return 2 if DECL is located in 24-bit flash address-space + Return 1 if DECL is located in 16-bit flash address-space + Return -1 if attribute `progmem' occurs in DECL or ATTRIBUTES + Return 0 otherwise */ + +int +avr_progmem_p (tree decl, tree attributes) +{ + tree a; + + if (TREE_CODE (decl) != VAR_DECL) + return 0; + + if (avr_decl_memx_p (decl)) + return 2; + + if (avr_decl_flash_p (decl)) + return 1; + + if (NULL_TREE + != lookup_attribute ("progmem", attributes)) + return -1; + + a = decl; + + do + a = TREE_TYPE(a); + while (TREE_CODE (a) == ARRAY_TYPE); + + if (a == error_mark_node) + return 0; + + if (NULL_TREE != lookup_attribute ("progmem", TYPE_ATTRIBUTES (a))) + return -1; + + return 0; +} + + +/* Scan type TYP for pointer references to address space ASn. + Return ADDR_SPACE_GENERIC (i.e. 0) if all pointers targeting + the AS are also declared to be CONST. + Otherwise, return the respective address space, i.e. a value != 0. */ + +static addr_space_t +avr_nonconst_pointer_addrspace (tree typ) +{ + while (ARRAY_TYPE == TREE_CODE (typ)) + typ = TREE_TYPE (typ); + + if (POINTER_TYPE_P (typ)) + { + addr_space_t as; + tree target = TREE_TYPE (typ); + + /* Pointer to function: Test the function's return type. */ + + if (FUNCTION_TYPE == TREE_CODE (target)) + return avr_nonconst_pointer_addrspace (TREE_TYPE (target)); + + /* "Ordinary" pointers... */ + + while (TREE_CODE (target) == ARRAY_TYPE) + target = TREE_TYPE (target); + + /* Pointers to non-generic address space must be const. + Refuse address spaces outside the device's flash. */ + + as = TYPE_ADDR_SPACE (target); + + if (!ADDR_SPACE_GENERIC_P (as) + && (!TYPE_READONLY (target) + || avr_addrspace[as].segment >= avr_current_device->n_flash)) + { + return as; + } + + /* Scan pointer's target type. */ + + return avr_nonconst_pointer_addrspace (target); + } + + return ADDR_SPACE_GENERIC; +} + + +/* Sanity check NODE so that all pointers targeting non-generic address spaces + go along with CONST qualifier. Writing to these address spaces should + be detected and complained about as early as possible. */ + +static bool +avr_pgm_check_var_decl (tree node) +{ + const char *reason = NULL; + + addr_space_t as = ADDR_SPACE_GENERIC; + + gcc_assert (as == 0); + + if (avr_log.progmem) + avr_edump ("%?: %t\n", node); + + switch (TREE_CODE (node)) + { + default: + break; + + case VAR_DECL: + if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (node)), as) + reason = "variable"; + break; + + case PARM_DECL: + if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (node)), as) + reason = "function parameter"; + break; + + case FIELD_DECL: + if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (node)), as) + reason = "structure field"; + break; + + case FUNCTION_DECL: + if (as = avr_nonconst_pointer_addrspace (TREE_TYPE (TREE_TYPE (node))), + as) + reason = "return type of function"; + break; + + case POINTER_TYPE: + if (as = avr_nonconst_pointer_addrspace (node), as) + reason = "pointer"; + break; + } + + if (reason) + { + if (avr_addrspace[as].segment >= avr_current_device->n_flash) + { + if (TYPE_P (node)) + error ("%qT uses address space %qs beyond flash of %qs", + node, avr_addrspace[as].name, avr_current_device->name); + else + error ("%s %q+D uses address space %qs beyond flash of %qs", + reason, node, avr_addrspace[as].name, + avr_current_device->name); + } + else + { + if (TYPE_P (node)) + error ("pointer targeting address space %qs must be const in %qT", + avr_addrspace[as].name, node); + else + error ("pointer targeting address space %qs must be const" + " in %s %q+D", + avr_addrspace[as].name, reason, node); + } + } + + return reason == NULL; +} + + +/* Add the section attribute if the variable is in progmem. */ + +static void +avr_insert_attributes (tree node, tree *attributes) +{ + avr_pgm_check_var_decl (node); + + if (TREE_CODE (node) == VAR_DECL + && (TREE_STATIC (node) || DECL_EXTERNAL (node)) + && avr_progmem_p (node, *attributes)) + { + addr_space_t as; + tree node0 = node; + + /* For C++, we have to peel arrays in order to get correct + determination of readonlyness. */ + + do + node0 = TREE_TYPE (node0); + while (TREE_CODE (node0) == ARRAY_TYPE); + + if (error_mark_node == node0) + return; + + as = TYPE_ADDR_SPACE (TREE_TYPE (node)); + + if (avr_addrspace[as].segment >= avr_current_device->n_flash) + { + error ("variable %q+D located in address space %qs" + " beyond flash of %qs", + node, avr_addrspace[as].name, avr_current_device->name); + } + + if (!TYPE_READONLY (node0) + && !TREE_READONLY (node)) + { + const char *reason = "__attribute__((progmem))"; + + if (!ADDR_SPACE_GENERIC_P (as)) + reason = avr_addrspace[as].name; + + if (avr_log.progmem) + avr_edump ("\n%?: %t\n%t\n", node, node0); + + error ("variable %q+D must be const in order to be put into" + " read-only section by means of %qs", node, reason); + } + } +} + + +/* Implement `ASM_OUTPUT_ALIGNED_DECL_LOCAL'. */ +/* Implement `ASM_OUTPUT_ALIGNED_DECL_COMMON'. */ +/* Track need of __do_clear_bss. */ + +void +avr_asm_output_aligned_decl_common (FILE * stream, + const_tree decl ATTRIBUTE_UNUSED, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align, bool local_p) +{ + /* __gnu_lto_v1 etc. are just markers for the linker injected by toplev.c. + There is no need to trigger __do_clear_bss code for them. */ + + if (!STR_PREFIX_P (name, "__gnu_lto")) + avr_need_clear_bss_p = true; + + if (local_p) + ASM_OUTPUT_ALIGNED_LOCAL (stream, name, size, align); + else + ASM_OUTPUT_ALIGNED_COMMON (stream, name, size, align); +} + + +/* Unnamed section callback for data_section + to track need of __do_copy_data. */ + +static void +avr_output_data_section_asm_op (const void *data) +{ + avr_need_copy_data_p = true; + + /* Dispatch to default. */ + output_section_asm_op (data); +} + + +/* Unnamed section callback for bss_section + to track need of __do_clear_bss. */ + +static void +avr_output_bss_section_asm_op (const void *data) +{ + avr_need_clear_bss_p = true; + + /* Dispatch to default. */ + output_section_asm_op (data); +} + + +/* Unnamed section callback for progmem*.data sections. */ + +static void +avr_output_progmem_section_asm_op (const void *data) +{ + fprintf (asm_out_file, "\t.section\t%s,\"a\",@progbits\n", + (const char*) data); +} + + +/* Implement `TARGET_ASM_INIT_SECTIONS'. */ + +static void +avr_asm_init_sections (void) +{ + /* Set up a section for jump tables. Alignment is handled by + ASM_OUTPUT_BEFORE_CASE_LABEL. */ + + if (AVR_HAVE_JMP_CALL) + { + progmem_swtable_section + = get_unnamed_section (0, output_section_asm_op, + "\t.section\t.progmem.gcc_sw_table" + ",\"a\",@progbits"); + } + else + { + progmem_swtable_section + = get_unnamed_section (SECTION_CODE, output_section_asm_op, + "\t.section\t.progmem.gcc_sw_table" + ",\"ax\",@progbits"); + } + + /* Override section callbacks to keep track of `avr_need_clear_bss_p' + resp. `avr_need_copy_data_p'. */ + + readonly_data_section->unnamed.callback = avr_output_data_section_asm_op; + data_section->unnamed.callback = avr_output_data_section_asm_op; + bss_section->unnamed.callback = avr_output_bss_section_asm_op; +} + + +/* Implement `TARGET_ASM_FUNCTION_RODATA_SECTION'. */ + +static section* +avr_asm_function_rodata_section (tree decl) +{ + /* If a function is unused and optimized out by -ffunction-sections + and --gc-sections, ensure that the same will happen for its jump + tables by putting them into individual sections. */ + + unsigned int flags; + section * frodata; + + /* Get the frodata section from the default function in varasm.c + but treat function-associated data-like jump tables as code + rather than as user defined data. AVR has no constant pools. */ + { + int fdata = flag_data_sections; + + flag_data_sections = flag_function_sections; + frodata = default_function_rodata_section (decl); + flag_data_sections = fdata; + flags = frodata->common.flags; + } + + if (frodata != readonly_data_section + && flags & SECTION_NAMED) + { + /* Adjust section flags and replace section name prefix. */ + + unsigned int i; + + static const char* const prefix[] = + { + ".rodata", ".progmem.gcc_sw_table", + ".gnu.linkonce.r.", ".gnu.linkonce.t." + }; + + for (i = 0; i < sizeof (prefix) / sizeof (*prefix); i += 2) + { + const char * old_prefix = prefix[i]; + const char * new_prefix = prefix[i+1]; + const char * name = frodata->named.name; + + if (STR_PREFIX_P (name, old_prefix)) + { + const char *rname = ACONCAT ((new_prefix, + name + strlen (old_prefix), NULL)); + flags &= ~SECTION_CODE; + flags |= AVR_HAVE_JMP_CALL ? 0 : SECTION_CODE; + + return get_section (rname, flags, frodata->named.decl); + } + } + } + + return progmem_swtable_section; +} + + +/* Implement `TARGET_ASM_NAMED_SECTION'. */ +/* Track need of __do_clear_bss, __do_copy_data for named sections. */ + +static void +avr_asm_named_section (const char *name, unsigned int flags, tree decl) +{ + if (flags & AVR_SECTION_PROGMEM) + { + addr_space_t as = (flags & AVR_SECTION_PROGMEM) / SECTION_MACH_DEP; + const char *old_prefix = ".rodata"; + const char *new_prefix = avr_addrspace[as].section_name; + + if (STR_PREFIX_P (name, old_prefix)) + { + const char *sname = ACONCAT ((new_prefix, + name + strlen (old_prefix), NULL)); + default_elf_asm_named_section (sname, flags, decl); + return; + } + + default_elf_asm_named_section (new_prefix, flags, decl); + return; + } + + if (!avr_need_copy_data_p) + avr_need_copy_data_p = (STR_PREFIX_P (name, ".data") + || STR_PREFIX_P (name, ".rodata") + || STR_PREFIX_P (name, ".gnu.linkonce.d")); + + if (!avr_need_clear_bss_p) + avr_need_clear_bss_p = STR_PREFIX_P (name, ".bss"); + + default_elf_asm_named_section (name, flags, decl); +} + + +/* Implement `TARGET_SECTION_TYPE_FLAGS'. */ + +static unsigned int +avr_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = default_section_type_flags (decl, name, reloc); + + if (STR_PREFIX_P (name, ".noinit")) + { + if (decl && TREE_CODE (decl) == VAR_DECL + && DECL_INITIAL (decl) == NULL_TREE) + flags |= SECTION_BSS; /* @nobits */ + else + warning (0, "only uninitialized variables can be placed in the " + ".noinit section"); + } + + if (decl && DECL_P (decl) + && avr_progmem_p (decl, DECL_ATTRIBUTES (decl))) + { + addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (decl)); + + /* Attribute progmem puts data in generic address space. + Set section flags as if it was in __flash to get the right + section prefix in the remainder. */ + + if (ADDR_SPACE_GENERIC_P (as)) + as = ADDR_SPACE_FLASH; + + flags |= as * SECTION_MACH_DEP; + flags &= ~SECTION_WRITE; + flags &= ~SECTION_BSS; + } + + return flags; +} + + +/* Implement `TARGET_ENCODE_SECTION_INFO'. */ + +static void +avr_encode_section_info (tree decl, rtx rtl, int new_decl_p) +{ + /* In avr_handle_progmem_attribute, DECL_INITIAL is not yet + readily available, see PR34734. So we postpone the warning + about uninitialized data in program memory section until here. */ + + if (new_decl_p + && decl && DECL_P (decl) + && NULL_TREE == DECL_INITIAL (decl) + && !DECL_EXTERNAL (decl) + && avr_progmem_p (decl, DECL_ATTRIBUTES (decl))) + { + warning (OPT_Wuninitialized, + "uninitialized variable %q+D put into " + "program memory area", decl); + } + + default_encode_section_info (decl, rtl, new_decl_p); + + if (decl && DECL_P (decl) + && TREE_CODE (decl) != FUNCTION_DECL + && MEM_P (rtl) + && SYMBOL_REF == GET_CODE (XEXP (rtl, 0))) + { + rtx sym = XEXP (rtl, 0); + tree type = TREE_TYPE (decl); + if (type == error_mark_node) + return; + addr_space_t as = TYPE_ADDR_SPACE (type); + + /* PSTR strings are in generic space but located in flash: + patch address space. */ + + if (-1 == avr_progmem_p (decl, DECL_ATTRIBUTES (decl))) + as = ADDR_SPACE_FLASH; + + AVR_SYMBOL_SET_ADDR_SPACE (sym, as); + } +} + + +/* Implement `TARGET_ASM_SELECT_SECTION' */ + +static section * +avr_asm_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) +{ + section * sect = default_elf_select_section (decl, reloc, align); + + if (decl && DECL_P (decl) + && avr_progmem_p (decl, DECL_ATTRIBUTES (decl))) + { + addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (decl)); + + /* __progmem__ goes in generic space but shall be allocated to + .progmem.data */ + + if (ADDR_SPACE_GENERIC_P (as)) + as = ADDR_SPACE_FLASH; + + if (sect->common.flags & SECTION_NAMED) + { + const char * name = sect->named.name; + const char * old_prefix = ".rodata"; + const char * new_prefix = avr_addrspace[as].section_name; + + if (STR_PREFIX_P (name, old_prefix)) + { + const char *sname = ACONCAT ((new_prefix, + name + strlen (old_prefix), NULL)); + return get_section (sname, sect->common.flags, sect->named.decl); + } + } + + if (!progmem_section[as]) + { + progmem_section[as] + = get_unnamed_section (0, avr_output_progmem_section_asm_op, + avr_addrspace[as].section_name); + } + + return progmem_section[as]; + } + + return sect; +} + +/* Implement `TARGET_ASM_FILE_START'. */ +/* Outputs some text at the start of each assembler file. */ + +static void +avr_file_start (void) +{ + int sfr_offset = avr_current_arch->sfr_offset; + + if (avr_current_arch->asm_only) + error ("MCU %qs supported for assembler only", avr_current_device->name); + + default_file_start (); + + /* Print I/O addresses of some SFRs used with IN and OUT. */ + + if (AVR_HAVE_SPH) + fprintf (asm_out_file, "__SP_H__ = 0x%02x\n", avr_addr.sp_h - sfr_offset); + + fprintf (asm_out_file, "__SP_L__ = 0x%02x\n", avr_addr.sp_l - sfr_offset); + fprintf (asm_out_file, "__SREG__ = 0x%02x\n", avr_addr.sreg - sfr_offset); + if (AVR_HAVE_RAMPZ) + fprintf (asm_out_file, "__RAMPZ__ = 0x%02x\n", avr_addr.rampz - sfr_offset); + if (AVR_HAVE_RAMPY) + fprintf (asm_out_file, "__RAMPY__ = 0x%02x\n", avr_addr.rampy - sfr_offset); + if (AVR_HAVE_RAMPX) + fprintf (asm_out_file, "__RAMPX__ = 0x%02x\n", avr_addr.rampx - sfr_offset); + if (AVR_HAVE_RAMPD) + fprintf (asm_out_file, "__RAMPD__ = 0x%02x\n", avr_addr.rampd - sfr_offset); + if (AVR_XMEGA) + fprintf (asm_out_file, "__CCP__ = 0x%02x\n", avr_addr.ccp - sfr_offset); + fprintf (asm_out_file, "__tmp_reg__ = %d\n", TMP_REGNO); + fprintf (asm_out_file, "__zero_reg__ = %d\n", ZERO_REGNO); +} + + +/* Implement `TARGET_ASM_FILE_END'. */ +/* Outputs to the stdio stream FILE some + appropriate text to go at the end of an assembler file. */ + +static void +avr_file_end (void) +{ + /* Output these only if there is anything in the + .data* / .rodata* / .gnu.linkonce.* resp. .bss* or COMMON + input section(s) - some code size can be saved by not + linking in the initialization code from libgcc if resp. + sections are empty, see PR18145. */ + + if (avr_need_copy_data_p) + fputs (".global __do_copy_data\n", asm_out_file); + + if (avr_need_clear_bss_p) + fputs (".global __do_clear_bss\n", asm_out_file); +} + + +/* Worker function for `ADJUST_REG_ALLOC_ORDER'. */ +/* Choose the order in which to allocate hard registers for + pseudo-registers local to a basic block. + + Store the desired register order in the array `reg_alloc_order'. + Element 0 should be the register to allocate first; element 1, the + next register; and so on. */ + +void +avr_adjust_reg_alloc_order (void) +{ + unsigned int i; + static const int order_0[] = + { + 24, 25, + 18, 19, 20, 21, 22, 23, + 30, 31, + 26, 27, 28, 29, + 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, + 0, 1, + 32, 33, 34, 35 + }; + static const int order_1[] = + { + 18, 19, 20, 21, 22, 23, 24, 25, + 30, 31, + 26, 27, 28, 29, + 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, + 0, 1, + 32, 33, 34, 35 + }; + static const int order_2[] = + { + 25, 24, 23, 22, 21, 20, 19, 18, + 30, 31, + 26, 27, 28, 29, + 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, + 1, 0, + 32, 33, 34, 35 + }; + + const int *order = (TARGET_ORDER_1 ? order_1 : + TARGET_ORDER_2 ? order_2 : + order_0); + for (i = 0; i < ARRAY_SIZE (order_0); ++i) + reg_alloc_order[i] = order[i]; +} + + +/* Implement `TARGET_REGISTER_MOVE_COST' */ + +static int +avr_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + return (from == STACK_REG ? 6 + : to == STACK_REG ? 12 + : 2); +} + + +/* Implement `TARGET_MEMORY_MOVE_COST' */ + +static int +avr_memory_move_cost (enum machine_mode mode, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + return (mode == QImode ? 2 + : mode == HImode ? 4 + : mode == SImode ? 8 + : mode == SFmode ? 8 + : 16); +} + + +/* Mutually recursive subroutine of avr_rtx_cost for calculating the + cost of an RTX operand given its context. X is the rtx of the + operand, MODE is its mode, and OUTER is the rtx_code of this + operand's parent operator. */ + +static int +avr_operand_rtx_cost (rtx x, enum machine_mode mode, enum rtx_code outer, + int opno, bool speed) +{ + enum rtx_code code = GET_CODE (x); + int total; + + switch (code) + { + case REG: + case SUBREG: + return 0; + + case CONST_INT: + case CONST_FIXED: + case CONST_DOUBLE: + return COSTS_N_INSNS (GET_MODE_SIZE (mode)); + + default: + break; + } + + total = 0; + avr_rtx_costs (x, code, outer, opno, &total, speed); + return total; +} + +/* Worker function for AVR backend's rtx_cost function. + X is rtx expression whose cost is to be calculated. + Return true if the complete cost has been computed. + Return false if subexpressions should be scanned. + In either case, *TOTAL contains the cost result. */ + +static bool +avr_rtx_costs_1 (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, + int opno ATTRIBUTE_UNUSED, int *total, bool speed) +{ + enum rtx_code code = (enum rtx_code) codearg; + enum machine_mode mode = GET_MODE (x); + HOST_WIDE_INT val; + + switch (code) + { + case CONST_INT: + case CONST_FIXED: + case CONST_DOUBLE: + case SYMBOL_REF: + case CONST: + case LABEL_REF: + /* Immediate constants are as cheap as registers. */ + *total = 0; + return true; + + case MEM: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + return true; + + case NEG: + switch (mode) + { + case QImode: + case SFmode: + *total = COSTS_N_INSNS (1); + break; + + case HImode: + case PSImode: + case SImode: + *total = COSTS_N_INSNS (2 * GET_MODE_SIZE (mode) - 1); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case ABS: + switch (mode) + { + case QImode: + case SFmode: + *total = COSTS_N_INSNS (1); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case NOT: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case ZERO_EXTEND: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + - GET_MODE_SIZE (GET_MODE (XEXP (x, 0)))); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case SIGN_EXTEND: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + 2 + - GET_MODE_SIZE (GET_MODE (XEXP (x, 0)))); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case PLUS: + switch (mode) + { + case QImode: + if (AVR_HAVE_MUL + && MULT == GET_CODE (XEXP (x, 0)) + && register_operand (XEXP (x, 1), QImode)) + { + /* multiply-add */ + *total = COSTS_N_INSNS (speed ? 4 : 3); + /* multiply-add with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 0), 1))) + *total = COSTS_N_INSNS (1) + *total; + return true; + } + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + break; + + case HImode: + if (AVR_HAVE_MUL + && (MULT == GET_CODE (XEXP (x, 0)) + || ASHIFT == GET_CODE (XEXP (x, 0))) + && register_operand (XEXP (x, 1), HImode) + && (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0)) + || SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 0), 0)))) + { + /* multiply-add */ + *total = COSTS_N_INSNS (speed ? 5 : 4); + /* multiply-add with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 0), 1))) + *total = COSTS_N_INSNS (1) + *total; + return true; + } + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (2); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (2); + break; + + case PSImode: + if (!CONST_INT_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (3); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63) + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (3); + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (4); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (4); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case MINUS: + if (AVR_HAVE_MUL + && QImode == mode + && register_operand (XEXP (x, 0), QImode) + && MULT == GET_CODE (XEXP (x, 1))) + { + /* multiply-sub */ + *total = COSTS_N_INSNS (speed ? 4 : 3); + /* multiply-sub with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 1), 1))) + *total = COSTS_N_INSNS (1) + *total; + return true; + } + if (AVR_HAVE_MUL + && HImode == mode + && register_operand (XEXP (x, 0), HImode) + && (MULT == GET_CODE (XEXP (x, 1)) + || ASHIFT == GET_CODE (XEXP (x, 1))) + && (ZERO_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0)) + || SIGN_EXTEND == GET_CODE (XEXP (XEXP (x, 1), 0)))) + { + /* multiply-sub */ + *total = COSTS_N_INSNS (speed ? 5 : 4); + /* multiply-sub with constant: will be split and load constant. */ + if (CONST_INT_P (XEXP (XEXP (x, 1), 1))) + *total = COSTS_N_INSNS (1) + *total; + return true; + } + /* FALLTHRU */ + case AND: + case IOR: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + return true; + + case XOR: + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + return true; + + case MULT: + switch (mode) + { + case QImode: + if (AVR_HAVE_MUL) + *total = COSTS_N_INSNS (!speed ? 3 : 4); + else if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + return false; + break; + + case HImode: + if (AVR_HAVE_MUL) + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + enum rtx_code code0 = GET_CODE (op0); + enum rtx_code code1 = GET_CODE (op1); + bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0; + bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1; + + if (ex0 + && (u8_operand (op1, HImode) + || s8_operand (op1, HImode))) + { + *total = COSTS_N_INSNS (!speed ? 4 : 6); + return true; + } + if (ex0 + && register_operand (op1, HImode)) + { + *total = COSTS_N_INSNS (!speed ? 5 : 8); + return true; + } + else if (ex0 || ex1) + { + *total = COSTS_N_INSNS (!speed ? 3 : 5); + return true; + } + else if (register_operand (op0, HImode) + && (u8_operand (op1, HImode) + || s8_operand (op1, HImode))) + { + *total = COSTS_N_INSNS (!speed ? 6 : 9); + return true; + } + else + *total = COSTS_N_INSNS (!speed ? 7 : 10); + } + else if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + return false; + break; + + case PSImode: + if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + *total = 10; + break; + + case SImode: + if (AVR_HAVE_MUL) + { + if (!speed) + { + /* Add some additional costs besides CALL like moves etc. */ + + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4); + } + else + { + /* Just a rough estimate. Even with -O2 we don't want bulky + code expanded inline. */ + + *total = COSTS_N_INSNS (25); + } + } + else + { + if (speed) + *total = COSTS_N_INSNS (300); + else + /* Add some additional costs besides CALL like moves etc. */ + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4); + } + + return true; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + return true; + + case DIV: + case MOD: + case UDIV: + case UMOD: + if (!speed) + *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1); + else + *total = COSTS_N_INSNS (15 * GET_MODE_SIZE (mode)); + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + /* For div/mod with const-int divisor we have at least the cost of + loading the divisor. */ + if (CONST_INT_P (XEXP (x, 1))) + *total += COSTS_N_INSNS (GET_MODE_SIZE (mode)); + /* Add some overall penaly for clobbering and moving around registers */ + *total += COSTS_N_INSNS (2); + return true; + + case ROTATE: + switch (mode) + { + case QImode: + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 4) + *total = COSTS_N_INSNS (1); + + break; + + case HImode: + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 8) + *total = COSTS_N_INSNS (3); + + break; + + case SImode: + if (CONST_INT_P (XEXP (x, 1))) + switch (INTVAL (XEXP (x, 1))) + { + case 8: + case 24: + *total = COSTS_N_INSNS (5); + break; + case 16: + *total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 6); + break; + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case ASHIFT: + switch (mode) + { + case QImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 4 : 17); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + { + val = INTVAL (XEXP (x, 1)); + if (val == 7) + *total = COSTS_N_INSNS (3); + else if (val >= 0 && val <= 7) + *total = COSTS_N_INSNS (val); + else + *total = COSTS_N_INSNS (1); + } + break; + + case HImode: + if (AVR_HAVE_MUL) + { + if (const_2_to_7_operand (XEXP (x, 1), HImode) + && (SIGN_EXTEND == GET_CODE (XEXP (x, 0)) + || ZERO_EXTEND == GET_CODE (XEXP (x, 0)))) + { + *total = COSTS_N_INSNS (!speed ? 4 : 6); + return true; + } + } + + if (const1_rtx == (XEXP (x, 1)) + && SIGN_EXTEND == GET_CODE (XEXP (x, 0))) + { + *total = COSTS_N_INSNS (2); + return true; + } + + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + case 8: + *total = COSTS_N_INSNS (2); + break; + case 9: + *total = COSTS_N_INSNS (3); + break; + case 2: + case 3: + case 10: + case 15: + *total = COSTS_N_INSNS (4); + break; + case 7: + case 11: + case 12: + *total = COSTS_N_INSNS (5); + break; + case 4: + *total = COSTS_N_INSNS (!speed ? 5 : 8); + break; + case 6: + *total = COSTS_N_INSNS (!speed ? 5 : 9); + break; + case 5: + *total = COSTS_N_INSNS (!speed ? 5 : 10); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + break; + + case PSImode: + if (!CONST_INT_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (!speed ? 6 : 73); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + case 8: + case 16: + *total = COSTS_N_INSNS (3); + break; + case 23: + *total = COSTS_N_INSNS (5); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1))); + break; + } + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 24: + *total = COSTS_N_INSNS (3); + break; + case 1: + case 8: + case 16: + *total = COSTS_N_INSNS (4); + break; + case 31: + *total = COSTS_N_INSNS (6); + break; + case 2: + *total = COSTS_N_INSNS (!speed ? 7 : 8); + break; + default: + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case ASHIFTRT: + switch (mode) + { + case QImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 4 : 17); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + { + val = INTVAL (XEXP (x, 1)); + if (val == 6) + *total = COSTS_N_INSNS (4); + else if (val == 7) + *total = COSTS_N_INSNS (2); + else if (val >= 0 && val <= 7) + *total = COSTS_N_INSNS (val); + else + *total = COSTS_N_INSNS (1); + } + break; + + case HImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (2); + break; + case 15: + *total = COSTS_N_INSNS (3); + break; + case 2: + case 7: + case 8: + case 9: + *total = COSTS_N_INSNS (4); + break; + case 10: + case 14: + *total = COSTS_N_INSNS (5); + break; + case 11: + *total = COSTS_N_INSNS (!speed ? 5 : 6); + break; + case 12: + *total = COSTS_N_INSNS (!speed ? 5 : 7); + break; + case 6: + case 13: + *total = COSTS_N_INSNS (!speed ? 5 : 8); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + break; + + case PSImode: + if (!CONST_INT_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (!speed ? 6 : 73); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (3); + break; + case 16: + case 8: + *total = COSTS_N_INSNS (5); + break; + case 23: + *total = COSTS_N_INSNS (4); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1))); + break; + } + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (4); + break; + case 8: + case 16: + case 24: + *total = COSTS_N_INSNS (6); + break; + case 2: + *total = COSTS_N_INSNS (!speed ? 7 : 8); + break; + case 31: + *total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5); + break; + default: + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case LSHIFTRT: + switch (mode) + { + case QImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 4 : 17); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + { + val = INTVAL (XEXP (x, 1)); + if (val == 7) + *total = COSTS_N_INSNS (3); + else if (val >= 0 && val <= 7) + *total = COSTS_N_INSNS (val); + else + *total = COSTS_N_INSNS (1); + } + break; + + case HImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + case 8: + *total = COSTS_N_INSNS (2); + break; + case 9: + *total = COSTS_N_INSNS (3); + break; + case 2: + case 10: + case 15: + *total = COSTS_N_INSNS (4); + break; + case 7: + case 11: + *total = COSTS_N_INSNS (5); + break; + case 3: + case 12: + case 13: + case 14: + *total = COSTS_N_INSNS (!speed ? 5 : 6); + break; + case 4: + *total = COSTS_N_INSNS (!speed ? 5 : 7); + break; + case 5: + case 6: + *total = COSTS_N_INSNS (!speed ? 5 : 9); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 41); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + break; + + case PSImode: + if (!CONST_INT_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (!speed ? 6 : 73); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + case 8: + case 16: + *total = COSTS_N_INSNS (3); + break; + case 23: + *total = COSTS_N_INSNS (5); + break; + default: + *total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1))); + break; + } + break; + + case SImode: + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + { + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + else + switch (INTVAL (XEXP (x, 1))) + { + case 0: + *total = 0; + break; + case 1: + *total = COSTS_N_INSNS (4); + break; + case 2: + *total = COSTS_N_INSNS (!speed ? 7 : 8); + break; + case 8: + case 16: + case 24: + *total = COSTS_N_INSNS (4); + break; + case 31: + *total = COSTS_N_INSNS (6); + break; + default: + *total = COSTS_N_INSNS (!speed ? 7 : 113); + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, + speed); + } + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case COMPARE: + switch (GET_MODE (XEXP (x, 0))) + { + case QImode: + *total = COSTS_N_INSNS (1); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + break; + + case HImode: + *total = COSTS_N_INSNS (2); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + else if (INTVAL (XEXP (x, 1)) != 0) + *total += COSTS_N_INSNS (1); + break; + + case PSImode: + *total = COSTS_N_INSNS (3); + if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) != 0) + *total += COSTS_N_INSNS (2); + break; + + case SImode: + *total = COSTS_N_INSNS (4); + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed); + else if (INTVAL (XEXP (x, 1)) != 0) + *total += COSTS_N_INSNS (3); + break; + + default: + return false; + } + *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); + return true; + + case TRUNCATE: + if (AVR_HAVE_MUL + && LSHIFTRT == GET_CODE (XEXP (x, 0)) + && MULT == GET_CODE (XEXP (XEXP (x, 0), 0)) + && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { + if (QImode == mode || HImode == mode) + { + *total = COSTS_N_INSNS (2); + return true; + } + } + break; + + default: + break; + } + return false; +} + + +/* Implement `TARGET_RTX_COSTS'. */ + +static bool +avr_rtx_costs (rtx x, int codearg, int outer_code, + int opno, int *total, bool speed) +{ + bool done = avr_rtx_costs_1 (x, codearg, outer_code, + opno, total, speed); + + if (avr_log.rtx_costs) + { + avr_edump ("\n%?=%b (%s) total=%d, outer=%C:\n%r\n", + done, speed ? "speed" : "size", *total, outer_code, x); + } + + return done; +} + + +/* Implement `TARGET_ADDRESS_COST'. */ + +static int +avr_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) +{ + int cost = 4; + + if (GET_CODE (x) == PLUS + && CONST_INT_P (XEXP (x, 1)) + && (REG_P (XEXP (x, 0)) + || GET_CODE (XEXP (x, 0)) == SUBREG)) + { + if (INTVAL (XEXP (x, 1)) >= 61) + cost = 18; + } + else if (CONSTANT_ADDRESS_P (x)) + { + if (optimize > 0 + && io_address_operand (x, QImode)) + cost = 2; + } + + if (avr_log.address_cost) + avr_edump ("\n%?: %d = %r\n", cost, x); + + return cost; +} + +/* Test for extra memory constraint 'Q'. + It's a memory address based on Y or Z pointer with valid displacement. */ + +int +extra_constraint_Q (rtx x) +{ + int ok = 0; + + if (GET_CODE (XEXP (x,0)) == PLUS + && REG_P (XEXP (XEXP (x,0), 0)) + && GET_CODE (XEXP (XEXP (x,0), 1)) == CONST_INT + && (INTVAL (XEXP (XEXP (x,0), 1)) + <= MAX_LD_OFFSET (GET_MODE (x)))) + { + rtx xx = XEXP (XEXP (x,0), 0); + int regno = REGNO (xx); + + ok = (/* allocate pseudos */ + regno >= FIRST_PSEUDO_REGISTER + /* strictly check */ + || regno == REG_Z || regno == REG_Y + /* XXX frame & arg pointer checks */ + || xx == frame_pointer_rtx + || xx == arg_pointer_rtx); + + if (avr_log.constraints) + avr_edump ("\n%?=%d reload_completed=%d reload_in_progress=%d\n %r\n", + ok, reload_completed, reload_in_progress, x); + } + + return ok; +} + +/* Convert condition code CONDITION to the valid AVR condition code. */ + +RTX_CODE +avr_normalize_condition (RTX_CODE condition) +{ + switch (condition) + { + case GT: + return GE; + case GTU: + return GEU; + case LE: + return LT; + case LEU: + return LTU; + default: + gcc_unreachable (); + } +} + +/* Helper function for `avr_reorg'. */ + +static rtx +avr_compare_pattern (rtx insn) +{ + rtx pattern = single_set (insn); + + if (pattern + && NONJUMP_INSN_P (insn) + && SET_DEST (pattern) == cc0_rtx + && GET_CODE (SET_SRC (pattern)) == COMPARE) + { + enum machine_mode mode0 = GET_MODE (XEXP (SET_SRC (pattern), 0)); + enum machine_mode mode1 = GET_MODE (XEXP (SET_SRC (pattern), 1)); + + /* The 64-bit comparisons have fixed operands ACC_A and ACC_B. + They must not be swapped, thus skip them. */ + + if ((mode0 == VOIDmode || GET_MODE_SIZE (mode0) <= 4) + && (mode1 == VOIDmode || GET_MODE_SIZE (mode1) <= 4)) + return pattern; + } + + return NULL_RTX; +} + +/* Helper function for `avr_reorg'. */ + +/* Expansion of switch/case decision trees leads to code like + + cc0 = compare (Reg, Num) + if (cc0 == 0) + goto L1 + + cc0 = compare (Reg, Num) + if (cc0 > 0) + goto L2 + + The second comparison is superfluous and can be deleted. + The second jump condition can be transformed from a + "difficult" one to a "simple" one because "cc0 > 0" and + "cc0 >= 0" will have the same effect here. + + This function relies on the way switch/case is being expaned + as binary decision tree. For example code see PR 49903. + + Return TRUE if optimization performed. + Return FALSE if nothing changed. + + INSN1 is a comparison, i.e. avr_compare_pattern != 0. + + We don't want to do this in text peephole because it is + tedious to work out jump offsets there and the second comparison + might have been transormed by `avr_reorg'. + + RTL peephole won't do because peephole2 does not scan across + basic blocks. */ + +static bool +avr_reorg_remove_redundant_compare (rtx insn1) +{ + rtx comp1, ifelse1, xcond1, branch1; + rtx comp2, ifelse2, xcond2, branch2, insn2; + enum rtx_code code; + rtx jump, target, cond; + + /* Look out for: compare1 - branch1 - compare2 - branch2 */ + + branch1 = next_nonnote_nondebug_insn (insn1); + if (!branch1 || !JUMP_P (branch1)) + return false; + + insn2 = next_nonnote_nondebug_insn (branch1); + if (!insn2 || !avr_compare_pattern (insn2)) + return false; + + branch2 = next_nonnote_nondebug_insn (insn2); + if (!branch2 || !JUMP_P (branch2)) + return false; + + comp1 = avr_compare_pattern (insn1); + comp2 = avr_compare_pattern (insn2); + xcond1 = single_set (branch1); + xcond2 = single_set (branch2); + + if (!comp1 || !comp2 + || !rtx_equal_p (comp1, comp2) + || !xcond1 || SET_DEST (xcond1) != pc_rtx + || !xcond2 || SET_DEST (xcond2) != pc_rtx + || IF_THEN_ELSE != GET_CODE (SET_SRC (xcond1)) + || IF_THEN_ELSE != GET_CODE (SET_SRC (xcond2))) + { + return false; + } + + comp1 = SET_SRC (comp1); + ifelse1 = SET_SRC (xcond1); + ifelse2 = SET_SRC (xcond2); + + /* comp is COMPARE now and ifelse is IF_THEN_ELSE. */ + + if (EQ != GET_CODE (XEXP (ifelse1, 0)) + || !REG_P (XEXP (comp1, 0)) + || !CONST_INT_P (XEXP (comp1, 1)) + || XEXP (ifelse1, 2) != pc_rtx + || XEXP (ifelse2, 2) != pc_rtx + || LABEL_REF != GET_CODE (XEXP (ifelse1, 1)) + || LABEL_REF != GET_CODE (XEXP (ifelse2, 1)) + || !COMPARISON_P (XEXP (ifelse2, 0)) + || cc0_rtx != XEXP (XEXP (ifelse1, 0), 0) + || cc0_rtx != XEXP (XEXP (ifelse2, 0), 0) + || const0_rtx != XEXP (XEXP (ifelse1, 0), 1) + || const0_rtx != XEXP (XEXP (ifelse2, 0), 1)) + { + return false; + } + + /* We filtered the insn sequence to look like + + (set (cc0) + (compare (reg:M N) + (const_int VAL))) + (set (pc) + (if_then_else (eq (cc0) + (const_int 0)) + (label_ref L1) + (pc))) + + (set (cc0) + (compare (reg:M N) + (const_int VAL))) + (set (pc) + (if_then_else (CODE (cc0) + (const_int 0)) + (label_ref L2) + (pc))) + */ + + code = GET_CODE (XEXP (ifelse2, 0)); + + /* Map GT/GTU to GE/GEU which is easier for AVR. + The first two instructions compare/branch on EQ + so we may replace the difficult + + if (x == VAL) goto L1; + if (x > VAL) goto L2; + + with easy + + if (x == VAL) goto L1; + if (x >= VAL) goto L2; + + Similarly, replace LE/LEU by LT/LTU. */ + + switch (code) + { + case EQ: + case LT: case LTU: + case GE: case GEU: + break; + + case LE: case LEU: + case GT: case GTU: + code = avr_normalize_condition (code); + break; + + default: + return false; + } + + /* Wrap the branches into UNSPECs so they won't be changed or + optimized in the remainder. */ + + target = XEXP (XEXP (ifelse1, 1), 0); + cond = XEXP (ifelse1, 0); + jump = emit_jump_insn_after (gen_branch_unspec (target, cond), insn1); + + JUMP_LABEL (jump) = JUMP_LABEL (branch1); + + target = XEXP (XEXP (ifelse2, 1), 0); + cond = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx); + jump = emit_jump_insn_after (gen_branch_unspec (target, cond), insn2); + + JUMP_LABEL (jump) = JUMP_LABEL (branch2); + + /* The comparisons in insn1 and insn2 are exactly the same; + insn2 is superfluous so delete it. */ + + delete_insn (insn2); + delete_insn (branch1); + delete_insn (branch2); + + return true; +} + + +/* Implement `TARGET_MACHINE_DEPENDENT_REORG'. */ +/* Optimize conditional jumps. */ + +static void +avr_reorg (void) +{ + rtx insn = get_insns(); + + for (insn = next_real_insn (insn); insn; insn = next_real_insn (insn)) + { + rtx pattern = avr_compare_pattern (insn); + + if (!pattern) + continue; + + if (optimize + && avr_reorg_remove_redundant_compare (insn)) + { + continue; + } + + if (compare_diff_p (insn)) + { + /* Now we work under compare insn with difficult branch. */ + + rtx next = next_real_insn (insn); + rtx pat = PATTERN (next); + + pattern = SET_SRC (pattern); + + if (true_regnum (XEXP (pattern, 0)) >= 0 + && true_regnum (XEXP (pattern, 1)) >= 0) + { + rtx x = XEXP (pattern, 0); + rtx src = SET_SRC (pat); + rtx t = XEXP (src,0); + PUT_CODE (t, swap_condition (GET_CODE (t))); + XEXP (pattern, 0) = XEXP (pattern, 1); + XEXP (pattern, 1) = x; + INSN_CODE (next) = -1; + } + else if (true_regnum (XEXP (pattern, 0)) >= 0 + && XEXP (pattern, 1) == const0_rtx) + { + /* This is a tst insn, we can reverse it. */ + rtx src = SET_SRC (pat); + rtx t = XEXP (src,0); + + PUT_CODE (t, swap_condition (GET_CODE (t))); + XEXP (pattern, 1) = XEXP (pattern, 0); + XEXP (pattern, 0) = const0_rtx; + INSN_CODE (next) = -1; + INSN_CODE (insn) = -1; + } + else if (true_regnum (XEXP (pattern, 0)) >= 0 + && CONST_INT_P (XEXP (pattern, 1))) + { + rtx x = XEXP (pattern, 1); + rtx src = SET_SRC (pat); + rtx t = XEXP (src,0); + enum machine_mode mode = GET_MODE (XEXP (pattern, 0)); + + if (avr_simplify_comparison_p (mode, GET_CODE (t), x)) + { + XEXP (pattern, 1) = gen_int_mode (INTVAL (x) + 1, mode); + PUT_CODE (t, avr_normalize_condition (GET_CODE (t))); + INSN_CODE (next) = -1; + INSN_CODE (insn) = -1; + } + } + } + } +} + +/* Returns register number for function return value.*/ + +static inline unsigned int +avr_ret_register (void) +{ + return 24; +} + + +/* Implement `TARGET_FUNCTION_VALUE_REGNO_P'. */ + +static bool +avr_function_value_regno_p (const unsigned int regno) +{ + return (regno == avr_ret_register ()); +} + + +/* Implement `TARGET_LIBCALL_VALUE'. */ +/* Create an RTX representing the place where a + library function returns a value of mode MODE. */ + +static rtx +avr_libcall_value (enum machine_mode mode, + const_rtx func ATTRIBUTE_UNUSED) +{ + int offs = GET_MODE_SIZE (mode); + + if (offs <= 4) + offs = (offs + 1) & ~1; + + return gen_rtx_REG (mode, avr_ret_register () + 2 - offs); +} + + +/* Implement `TARGET_FUNCTION_VALUE'. */ +/* Create an RTX representing the place where a + function returns a value of data type VALTYPE. */ + +static rtx +avr_function_value (const_tree type, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + unsigned int offs; + + if (TYPE_MODE (type) != BLKmode) + return avr_libcall_value (TYPE_MODE (type), NULL_RTX); + + offs = int_size_in_bytes (type); + if (offs < 2) + offs = 2; + if (offs > 2 && offs < GET_MODE_SIZE (SImode)) + offs = GET_MODE_SIZE (SImode); + else if (offs > GET_MODE_SIZE (SImode) && offs < GET_MODE_SIZE (DImode)) + offs = GET_MODE_SIZE (DImode); + + return gen_rtx_REG (BLKmode, avr_ret_register () + 2 - offs); +} + +int +test_hard_reg_class (enum reg_class rclass, rtx x) +{ + int regno = true_regnum (x); + if (regno < 0) + return 0; + + if (TEST_HARD_REG_CLASS (rclass, regno)) + return 1; + + return 0; +} + + +/* Helper for jump_over_one_insn_p: Test if INSN is a 2-word instruction + and thus is suitable to be skipped by CPSE, SBRC, etc. */ + +static bool +avr_2word_insn_p (rtx insn) +{ + if (avr_current_device->errata_skip + || !insn + || 2 != get_attr_length (insn)) + { + return false; + } + + switch (INSN_CODE (insn)) + { + default: + return false; + + case CODE_FOR_movqi_insn: + case CODE_FOR_movuqq_insn: + case CODE_FOR_movqq_insn: + { + rtx set = single_set (insn); + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + + /* Factor out LDS and STS from movqi_insn. */ + + if (MEM_P (dest) + && (REG_P (src) || src == CONST0_RTX (GET_MODE (dest)))) + { + return CONSTANT_ADDRESS_P (XEXP (dest, 0)); + } + else if (REG_P (dest) + && MEM_P (src)) + { + return CONSTANT_ADDRESS_P (XEXP (src, 0)); + } + + return false; + } + + case CODE_FOR_call_insn: + case CODE_FOR_call_value_insn: + return true; + } +} + + +int +jump_over_one_insn_p (rtx insn, rtx dest) +{ + int uid = INSN_UID (GET_CODE (dest) == LABEL_REF + ? XEXP (dest, 0) + : dest); + int jump_addr = INSN_ADDRESSES (INSN_UID (insn)); + int dest_addr = INSN_ADDRESSES (uid); + int jump_offset = dest_addr - jump_addr - get_attr_length (insn); + + return (jump_offset == 1 + || (jump_offset == 2 + && avr_2word_insn_p (next_active_insn (insn)))); +} + + +/* Worker function for `HARD_REGNO_MODE_OK'. */ +/* Returns 1 if a value of mode MODE can be stored starting with hard + register number REGNO. On the enhanced core, anything larger than + 1 byte must start in even numbered register for "movw" to work + (this way we don't have to check for odd registers everywhere). */ + +int +avr_hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* NOTE: 8-bit values must not be disallowed for R28 or R29. + Disallowing QI et al. in these regs might lead to code like + (set (subreg:QI (reg:HI 28) n) ...) + which will result in wrong code because reload does not + handle SUBREGs of hard regsisters like this. + This could be fixed in reload. However, it appears + that fixing reload is not wanted by reload people. */ + + /* Any GENERAL_REGS register can hold 8-bit values. */ + + if (GET_MODE_SIZE (mode) == 1) + return 1; + + /* FIXME: Ideally, the following test is not needed. + However, it turned out that it can reduce the number + of spill fails. AVR and it's poor endowment with + address registers is extreme stress test for reload. */ + + if (GET_MODE_SIZE (mode) >= 4 + && regno >= REG_X) + return 0; + + /* All modes larger than 8 bits should start in an even register. */ + + return !(regno & 1); +} + + +/* Implement `HARD_REGNO_CALL_PART_CLOBBERED'. */ + +int +avr_hard_regno_call_part_clobbered (unsigned regno, enum machine_mode mode) +{ + /* FIXME: This hook gets called with MODE:REGNO combinations that don't + represent valid hard registers like, e.g. HI:29. Returning TRUE + for such registers can lead to performance degradation as mentioned + in PR53595. Thus, report invalid hard registers as FALSE. */ + + if (!avr_hard_regno_mode_ok (regno, mode)) + return 0; + + /* Return true if any of the following boundaries is crossed: + 17/18, 27/28 and 29/30. */ + + return ((regno < 18 && regno + GET_MODE_SIZE (mode) > 18) + || (regno < REG_Y && regno + GET_MODE_SIZE (mode) > REG_Y) + || (regno < REG_Z && regno + GET_MODE_SIZE (mode) > REG_Z)); +} + + +/* Implement `MODE_CODE_BASE_REG_CLASS'. */ + +enum reg_class +avr_mode_code_base_reg_class (enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as, RTX_CODE outer_code, + RTX_CODE index_code ATTRIBUTE_UNUSED) +{ + if (!ADDR_SPACE_GENERIC_P (as)) + { + return POINTER_Z_REGS; + } + + if (!avr_strict_X) + return reload_completed ? BASE_POINTER_REGS : POINTER_REGS; + + return PLUS == outer_code ? BASE_POINTER_REGS : POINTER_REGS; +} + + +/* Implement `REGNO_MODE_CODE_OK_FOR_BASE_P'. */ + +bool +avr_regno_mode_code_ok_for_base_p (int regno, + enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + RTX_CODE outer_code, + RTX_CODE index_code ATTRIBUTE_UNUSED) +{ + bool ok = false; + + if (!ADDR_SPACE_GENERIC_P (as)) + { + if (regno < FIRST_PSEUDO_REGISTER + && regno == REG_Z) + { + return true; + } + + if (reg_renumber) + { + regno = reg_renumber[regno]; + + if (regno == REG_Z) + { + return true; + } + } + + return false; + } + + if (regno < FIRST_PSEUDO_REGISTER + && (regno == REG_X + || regno == REG_Y + || regno == REG_Z + || regno == ARG_POINTER_REGNUM)) + { + ok = true; + } + else if (reg_renumber) + { + regno = reg_renumber[regno]; + + if (regno == REG_X + || regno == REG_Y + || regno == REG_Z + || regno == ARG_POINTER_REGNUM) + { + ok = true; + } + } + + if (avr_strict_X + && PLUS == outer_code + && regno == REG_X) + { + ok = false; + } + + return ok; +} + + +/* A helper for `output_reload_insisf' and `output_reload_inhi'. */ +/* Set 32-bit register OP[0] to compile-time constant OP[1]. + CLOBBER_REG is a QI clobber register or NULL_RTX. + LEN == NULL: output instructions. + LEN != NULL: set *LEN to the length of the instruction sequence + (in words) printed with LEN = NULL. + If CLEAR_P is true, OP[0] had been cleard to Zero already. + If CLEAR_P is false, nothing is known about OP[0]. + + The effect on cc0 is as follows: + + Load 0 to any register except ZERO_REG : NONE + Load ld register with any value : NONE + Anything else: : CLOBBER */ + +static void +output_reload_in_const (rtx *op, rtx clobber_reg, int *len, bool clear_p) +{ + rtx src = op[1]; + rtx dest = op[0]; + rtx xval, xdest[4]; + int ival[4]; + int clobber_val = 1234; + bool cooked_clobber_p = false; + bool set_p = false; + enum machine_mode mode = GET_MODE (dest); + int n, n_bytes = GET_MODE_SIZE (mode); + + gcc_assert (REG_P (dest) + && CONSTANT_P (src)); + + if (len) + *len = 0; + + /* (REG:SI 14) is special: It's neither in LD_REGS nor in NO_LD_REGS + but has some subregs that are in LD_REGS. Use the MSB (REG:QI 17). */ + + if (REGNO (dest) < 16 + && REGNO (dest) + GET_MODE_SIZE (mode) > 16) + { + clobber_reg = all_regs_rtx[REGNO (dest) + n_bytes - 1]; + } + + /* We might need a clobber reg but don't have one. Look at the value to + be loaded more closely. A clobber is only needed if it is a symbol + or contains a byte that is neither 0, -1 or a power of 2. */ + + if (NULL_RTX == clobber_reg + && !test_hard_reg_class (LD_REGS, dest) + && (! (CONST_INT_P (src) || CONST_FIXED_P (src) || CONST_DOUBLE_P (src)) + || !avr_popcount_each_byte (src, n_bytes, + (1 << 0) | (1 << 1) | (1 << 8)))) + { + /* We have no clobber register but need one. Cook one up. + That's cheaper than loading from constant pool. */ + + cooked_clobber_p = true; + clobber_reg = all_regs_rtx[REG_Z + 1]; + avr_asm_len ("mov __tmp_reg__,%0", &clobber_reg, len, 1); + } + + /* Now start filling DEST from LSB to MSB. */ + + for (n = 0; n < n_bytes; n++) + { + int ldreg_p; + bool done_byte = false; + int j; + rtx xop[3]; + + /* Crop the n-th destination byte. */ + + xdest[n] = simplify_gen_subreg (QImode, dest, mode, n); + ldreg_p = test_hard_reg_class (LD_REGS, xdest[n]); + + if (!CONST_INT_P (src) + && !CONST_FIXED_P (src) + && !CONST_DOUBLE_P (src)) + { + static const char* const asm_code[][2] = + { + { "ldi %2,lo8(%1)" CR_TAB "mov %0,%2", "ldi %0,lo8(%1)" }, + { "ldi %2,hi8(%1)" CR_TAB "mov %0,%2", "ldi %0,hi8(%1)" }, + { "ldi %2,hlo8(%1)" CR_TAB "mov %0,%2", "ldi %0,hlo8(%1)" }, + { "ldi %2,hhi8(%1)" CR_TAB "mov %0,%2", "ldi %0,hhi8(%1)" } + }; + + xop[0] = xdest[n]; + xop[1] = src; + xop[2] = clobber_reg; + + avr_asm_len (asm_code[n][ldreg_p], xop, len, ldreg_p ? 1 : 2); + + continue; + } + + /* Crop the n-th source byte. */ + + xval = simplify_gen_subreg (QImode, src, mode, n); + ival[n] = INTVAL (xval); + + /* Look if we can reuse the low word by means of MOVW. */ + + if (n == 2 + && n_bytes >= 4 + && AVR_HAVE_MOVW) + { + rtx lo16 = simplify_gen_subreg (HImode, src, mode, 0); + rtx hi16 = simplify_gen_subreg (HImode, src, mode, 2); + + if (INTVAL (lo16) == INTVAL (hi16)) + { + if (0 != INTVAL (lo16) + || !clear_p) + { + avr_asm_len ("movw %C0,%A0", &op[0], len, 1); + } + + break; + } + } + + /* Don't use CLR so that cc0 is set as expected. */ + + if (ival[n] == 0) + { + if (!clear_p) + avr_asm_len (ldreg_p ? "ldi %0,0" + : ZERO_REGNO == REGNO (xdest[n]) ? "clr %0" + : "mov %0,__zero_reg__", + &xdest[n], len, 1); + continue; + } + + if (clobber_val == ival[n] + && REGNO (clobber_reg) == REGNO (xdest[n])) + { + continue; + } + + /* LD_REGS can use LDI to move a constant value */ + + if (ldreg_p) + { + xop[0] = xdest[n]; + xop[1] = xval; + avr_asm_len ("ldi %0,lo8(%1)", xop, len, 1); + continue; + } + + /* Try to reuse value already loaded in some lower byte. */ + + for (j = 0; j < n; j++) + if (ival[j] == ival[n]) + { + xop[0] = xdest[n]; + xop[1] = xdest[j]; + + avr_asm_len ("mov %0,%1", xop, len, 1); + done_byte = true; + break; + } + + if (done_byte) + continue; + + /* Need no clobber reg for -1: Use CLR/DEC */ + + if (-1 == ival[n]) + { + if (!clear_p) + avr_asm_len ("clr %0", &xdest[n], len, 1); + + avr_asm_len ("dec %0", &xdest[n], len, 1); + continue; + } + else if (1 == ival[n]) + { + if (!clear_p) + avr_asm_len ("clr %0", &xdest[n], len, 1); + + avr_asm_len ("inc %0", &xdest[n], len, 1); + continue; + } + + /* Use T flag or INC to manage powers of 2 if we have + no clobber reg. */ + + if (NULL_RTX == clobber_reg + && single_one_operand (xval, QImode)) + { + xop[0] = xdest[n]; + xop[1] = GEN_INT (exact_log2 (ival[n] & GET_MODE_MASK (QImode))); + + gcc_assert (constm1_rtx != xop[1]); + + if (!set_p) + { + set_p = true; + avr_asm_len ("set", xop, len, 1); + } + + if (!clear_p) + avr_asm_len ("clr %0", xop, len, 1); + + avr_asm_len ("bld %0,%1", xop, len, 1); + continue; + } + + /* We actually need the LD_REGS clobber reg. */ + + gcc_assert (NULL_RTX != clobber_reg); + + xop[0] = xdest[n]; + xop[1] = xval; + xop[2] = clobber_reg; + clobber_val = ival[n]; + + avr_asm_len ("ldi %2,lo8(%1)" CR_TAB + "mov %0,%2", xop, len, 2); + } + + /* If we cooked up a clobber reg above, restore it. */ + + if (cooked_clobber_p) + { + avr_asm_len ("mov %0,__tmp_reg__", &clobber_reg, len, 1); + } +} + + +/* Reload the constant OP[1] into the HI register OP[0]. + CLOBBER_REG is a QI clobber reg needed to move vast majority of consts + into a NO_LD_REGS register. If CLOBBER_REG is NULL_RTX we either don't + need a clobber reg or have to cook one up. + + PLEN == NULL: Output instructions. + PLEN != NULL: Output nothing. Set *PLEN to number of words occupied + by the insns printed. + + Return "". */ + +const char* +output_reload_inhi (rtx *op, rtx clobber_reg, int *plen) +{ + output_reload_in_const (op, clobber_reg, plen, false); + return ""; +} + + +/* Reload a SI or SF compile time constant OP[1] into the register OP[0]. + CLOBBER_REG is a QI clobber reg needed to move vast majority of consts + into a NO_LD_REGS register. If CLOBBER_REG is NULL_RTX we either don't + need a clobber reg or have to cook one up. + + LEN == NULL: Output instructions. + + LEN != NULL: Output nothing. Set *LEN to number of words occupied + by the insns printed. + + Return "". */ + +const char * +output_reload_insisf (rtx *op, rtx clobber_reg, int *len) +{ + if (AVR_HAVE_MOVW + && !test_hard_reg_class (LD_REGS, op[0]) + && (CONST_INT_P (op[1]) + || CONST_FIXED_P (op[1]) + || CONST_DOUBLE_P (op[1]))) + { + int len_clr, len_noclr; + + /* In some cases it is better to clear the destination beforehand, e.g. + + CLR R2 CLR R3 MOVW R4,R2 INC R2 + + is shorther than + + CLR R2 INC R2 CLR R3 CLR R4 CLR R5 + + We find it too tedious to work that out in the print function. + Instead, we call the print function twice to get the lengths of + both methods and use the shortest one. */ + + output_reload_in_const (op, clobber_reg, &len_clr, true); + output_reload_in_const (op, clobber_reg, &len_noclr, false); + + if (len_noclr - len_clr == 4) + { + /* Default needs 4 CLR instructions: clear register beforehand. */ + + avr_asm_len ("mov %A0,__zero_reg__" CR_TAB + "mov %B0,__zero_reg__" CR_TAB + "movw %C0,%A0", &op[0], len, 3); + + output_reload_in_const (op, clobber_reg, len, true); + + if (len) + *len += 3; + + return ""; + } + } + + /* Default: destination not pre-cleared. */ + + output_reload_in_const (op, clobber_reg, len, false); + return ""; +} + +const char* +avr_out_reload_inpsi (rtx *op, rtx clobber_reg, int *len) +{ + output_reload_in_const (op, clobber_reg, len, false); + return ""; +} + + +/* Worker function for `ASM_OUTPUT_ADDR_VEC_ELT'. */ + +void +avr_output_addr_vec_elt (FILE *stream, int value) +{ + if (AVR_HAVE_JMP_CALL) + fprintf (stream, "\t.word gs(.L%d)\n", value); + else + fprintf (stream, "\trjmp .L%d\n", value); +} + + +/* Implement `TARGET_HARD_REGNO_SCRATCH_OK'. */ +/* Returns true if SCRATCH are safe to be allocated as a scratch + registers (for a define_peephole2) in the current function. */ + +static bool +avr_hard_regno_scratch_ok (unsigned int regno) +{ + /* Interrupt functions can only use registers that have already been saved + by the prologue, even if they would normally be call-clobbered. */ + + if ((cfun->machine->is_interrupt || cfun->machine->is_signal) + && !df_regs_ever_live_p (regno)) + return false; + + /* Don't allow hard registers that might be part of the frame pointer. + Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM + and don't care for a frame pointer that spans more than one register. */ + + if ((!reload_completed || frame_pointer_needed) + && (regno == REG_Y || regno == REG_Y + 1)) + { + return false; + } + + return true; +} + + +/* Worker function for `HARD_REGNO_RENAME_OK'. */ +/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ + +int +avr_hard_regno_rename_ok (unsigned int old_reg, + unsigned int new_reg) +{ + /* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ + + if ((cfun->machine->is_interrupt || cfun->machine->is_signal) + && !df_regs_ever_live_p (new_reg)) + return 0; + + /* Don't allow hard registers that might be part of the frame pointer. + Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM + and don't care for a frame pointer that spans more than one register. */ + + if ((!reload_completed || frame_pointer_needed) + && (old_reg == REG_Y || old_reg == REG_Y + 1 + || new_reg == REG_Y || new_reg == REG_Y + 1)) + { + return 0; + } + + return 1; +} + +/* Output a branch that tests a single bit of a register (QI, HI, SI or DImode) + or memory location in the I/O space (QImode only). + + Operand 0: comparison operator (must be EQ or NE, compare bit to zero). + Operand 1: register operand to test, or CONST_INT memory address. + Operand 2: bit number. + Operand 3: label to jump to if the test is true. */ + +const char* +avr_out_sbxx_branch (rtx insn, rtx operands[]) +{ + enum rtx_code comp = GET_CODE (operands[0]); + bool long_jump = get_attr_length (insn) >= 4; + bool reverse = long_jump || jump_over_one_insn_p (insn, operands[3]); + + if (comp == GE) + comp = EQ; + else if (comp == LT) + comp = NE; + + if (reverse) + comp = reverse_condition (comp); + + switch (GET_CODE (operands[1])) + { + default: + gcc_unreachable(); + + case CONST_INT: + + if (low_io_address_operand (operands[1], QImode)) + { + if (comp == EQ) + output_asm_insn ("sbis %i1,%2", operands); + else + output_asm_insn ("sbic %i1,%2", operands); + } + else + { + output_asm_insn ("in __tmp_reg__,%i1", operands); + if (comp == EQ) + output_asm_insn ("sbrs __tmp_reg__,%2", operands); + else + output_asm_insn ("sbrc __tmp_reg__,%2", operands); + } + + break; /* CONST_INT */ + + case REG: + + if (comp == EQ) + output_asm_insn ("sbrs %T1%T2", operands); + else + output_asm_insn ("sbrc %T1%T2", operands); + + break; /* REG */ + } /* switch */ + + if (long_jump) + return ("rjmp .+4" CR_TAB + "jmp %x3"); + + if (!reverse) + return "rjmp %x3"; + + return ""; +} + +/* Worker function for `TARGET_ASM_CONSTRUCTOR'. */ + +static void +avr_asm_out_ctor (rtx symbol, int priority) +{ + fputs ("\t.global __do_global_ctors\n", asm_out_file); + default_ctor_section_asm_out_constructor (symbol, priority); +} + + +/* Worker function for `TARGET_ASM_DESTRUCTOR'. */ + +static void +avr_asm_out_dtor (rtx symbol, int priority) +{ + fputs ("\t.global __do_global_dtors\n", asm_out_file); + default_dtor_section_asm_out_destructor (symbol, priority); +} + + +/* Worker function for `TARGET_RETURN_IN_MEMORY'. */ + +static bool +avr_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + if (TYPE_MODE (type) == BLKmode) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + return (size == -1 || size > 8); + } + else + return false; +} + + +/* Implement `CASE_VALUES_THRESHOLD'. */ +/* Supply the default for --param case-values-threshold=0 */ + +static unsigned int +avr_case_values_threshold (void) +{ + /* The exact break-even point between a jump table and an if-else tree + depends on several factors not available here like, e.g. if 8-bit + comparisons can be used in the if-else tree or not, on the + range of the case values, if the case value can be reused, on the + register allocation, etc. '7' appears to be a good choice. */ + + return 7; +} + + +/* Implement `TARGET_ADDR_SPACE_ADDRESS_MODE'. */ + +static enum machine_mode +avr_addr_space_address_mode (addr_space_t as) +{ + return avr_addrspace[as].pointer_size == 3 ? PSImode : HImode; +} + + +/* Implement `TARGET_ADDR_SPACE_POINTER_MODE'. */ + +static enum machine_mode +avr_addr_space_pointer_mode (addr_space_t as) +{ + return avr_addr_space_address_mode (as); +} + + +/* Helper for following function. */ + +static bool +avr_reg_ok_for_pgm_addr (rtx reg, bool strict) +{ + gcc_assert (REG_P (reg)); + + if (strict) + { + return REGNO (reg) == REG_Z; + } + + /* Avoid combine to propagate hard regs. */ + + if (can_create_pseudo_p() + && REGNO (reg) < REG_Z) + { + return false; + } + + return true; +} + + +/* Implement `TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P'. */ + +static bool +avr_addr_space_legitimate_address_p (enum machine_mode mode, rtx x, + bool strict, addr_space_t as) +{ + bool ok = false; + + switch (as) + { + default: + gcc_unreachable(); + + case ADDR_SPACE_GENERIC: + return avr_legitimate_address_p (mode, x, strict); + + case ADDR_SPACE_FLASH: + case ADDR_SPACE_FLASH1: + case ADDR_SPACE_FLASH2: + case ADDR_SPACE_FLASH3: + case ADDR_SPACE_FLASH4: + case ADDR_SPACE_FLASH5: + + switch (GET_CODE (x)) + { + case REG: + ok = avr_reg_ok_for_pgm_addr (x, strict); + break; + + case POST_INC: + ok = avr_reg_ok_for_pgm_addr (XEXP (x, 0), strict); + break; + + default: + break; + } + + break; /* FLASH */ + + case ADDR_SPACE_MEMX: + if (REG_P (x)) + ok = (!strict + && can_create_pseudo_p()); + + if (LO_SUM == GET_CODE (x)) + { + rtx hi = XEXP (x, 0); + rtx lo = XEXP (x, 1); + + ok = (REG_P (hi) + && (!strict || REGNO (hi) < FIRST_PSEUDO_REGISTER) + && REG_P (lo) + && REGNO (lo) == REG_Z); + } + + break; /* MEMX */ + } + + if (avr_log.legitimate_address_p) + { + avr_edump ("\n%?: ret=%b, mode=%m strict=%d " + "reload_completed=%d reload_in_progress=%d %s:", + ok, mode, strict, reload_completed, reload_in_progress, + reg_renumber ? "(reg_renumber)" : ""); + + if (GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1)) + && IN_RANGE (INTVAL (XEXP (x, 1)), 0, MAX_LD_OFFSET (mode)) + && reg_renumber) + { + avr_edump ("(r%d ---> r%d)", REGNO (XEXP (x, 0)), + true_regnum (XEXP (x, 0))); + } + + avr_edump ("\n%r\n", x); + } + + return ok; +} + + +/* Implement `TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS'. */ + +static rtx +avr_addr_space_legitimize_address (rtx x, rtx old_x, + enum machine_mode mode, addr_space_t as) +{ + if (ADDR_SPACE_GENERIC_P (as)) + return avr_legitimize_address (x, old_x, mode); + + if (avr_log.legitimize_address) + { + avr_edump ("\n%?: mode=%m\n %r\n", mode, old_x); + } + + return old_x; +} + + +/* Implement `TARGET_ADDR_SPACE_CONVERT'. */ + +static rtx +avr_addr_space_convert (rtx src, tree type_from, tree type_to) +{ + addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (type_from)); + addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (type_to)); + + if (avr_log.progmem) + avr_edump ("\n%!: op = %r\nfrom = %t\nto = %t\n", + src, type_from, type_to); + + /* Up-casting from 16-bit to 24-bit pointer. */ + + if (as_from != ADDR_SPACE_MEMX + && as_to == ADDR_SPACE_MEMX) + { + int msb; + rtx sym = src; + rtx reg = gen_reg_rtx (PSImode); + + while (CONST == GET_CODE (sym) || PLUS == GET_CODE (sym)) + sym = XEXP (sym, 0); + + /* Look at symbol flags: avr_encode_section_info set the flags + also if attribute progmem was seen so that we get the right + promotion for, e.g. PSTR-like strings that reside in generic space + but are located in flash. In that case we patch the incoming + address space. */ + + if (SYMBOL_REF == GET_CODE (sym) + && ADDR_SPACE_FLASH == AVR_SYMBOL_GET_ADDR_SPACE (sym)) + { + as_from = ADDR_SPACE_FLASH; + } + + /* Linearize memory: RAM has bit 23 set. */ + + msb = ADDR_SPACE_GENERIC_P (as_from) + ? 0x80 + : avr_addrspace[as_from].segment; + + src = force_reg (Pmode, src); + + emit_insn (msb == 0 + ? gen_zero_extendhipsi2 (reg, src) + : gen_n_extendhipsi2 (reg, gen_int_mode (msb, QImode), src)); + + return reg; + } + + /* Down-casting from 24-bit to 16-bit throws away the high byte. */ + + if (as_from == ADDR_SPACE_MEMX + && as_to != ADDR_SPACE_MEMX) + { + rtx new_src = gen_reg_rtx (Pmode); + + src = force_reg (PSImode, src); + + emit_move_insn (new_src, + simplify_gen_subreg (Pmode, src, PSImode, 0)); + return new_src; + } + + return src; +} + + +/* Implement `TARGET_ADDR_SPACE_SUBSET_P'. */ + +static bool +avr_addr_space_subset_p (addr_space_t subset ATTRIBUTE_UNUSED, + addr_space_t superset ATTRIBUTE_UNUSED) +{ + /* Allow any kind of pointer mess. */ + + return true; +} + + +/* Implement `TARGET_CONVERT_TO_TYPE'. */ + +static tree +avr_convert_to_type (tree type, tree expr) +{ + /* Print a diagnose for pointer conversion that changes the address + space of the pointer target to a non-enclosing address space, + provided -Waddr-space-convert is on. + + FIXME: Filter out cases where the target object is known to + be located in the right memory, like in + + (const __flash*) PSTR ("text") + + Also try to distinguish between explicit casts requested by + the user and implicit casts like + + void f (const __flash char*); + + void g (const char *p) + { + f ((const __flash*) p); + } + + under the assumption that an explicit casts means that the user + knows what he is doing, e.g. interface with PSTR or old style + code with progmem and pgm_read_xxx. + */ + + if (avr_warn_addr_space_convert + && expr != error_mark_node + && POINTER_TYPE_P (type) + && POINTER_TYPE_P (TREE_TYPE (expr))) + { + addr_space_t as_old = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (expr))); + addr_space_t as_new = TYPE_ADDR_SPACE (TREE_TYPE (type)); + + if (avr_log.progmem) + avr_edump ("%?: type = %t\nexpr = %t\n\n", type, expr); + + if (as_new != ADDR_SPACE_MEMX + && as_new != as_old) + { + location_t loc = EXPR_LOCATION (expr); + const char *name_old = avr_addrspace[as_old].name; + const char *name_new = avr_addrspace[as_new].name; + + warning (OPT_Waddr_space_convert, + "conversion from address space %qs to address space %qs", + ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old, + ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new); + + return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr); + } + } + + return NULL_TREE; +} + + +/* Worker function for movmemhi expander. + XOP[0] Destination as MEM:BLK + XOP[1] Source " " + XOP[2] # Bytes to copy + + Return TRUE if the expansion is accomplished. + Return FALSE if the operand compination is not supported. */ + +bool +avr_emit_movmemhi (rtx *xop) +{ + HOST_WIDE_INT count; + enum machine_mode loop_mode; + addr_space_t as = MEM_ADDR_SPACE (xop[1]); + rtx loop_reg, addr1, a_src, a_dest, insn, xas; + rtx a_hi8 = NULL_RTX; + + if (avr_mem_flash_p (xop[0])) + return false; + + if (!CONST_INT_P (xop[2])) + return false; + + count = INTVAL (xop[2]); + if (count <= 0) + return false; + + a_src = XEXP (xop[1], 0); + a_dest = XEXP (xop[0], 0); + + if (PSImode == GET_MODE (a_src)) + { + gcc_assert (as == ADDR_SPACE_MEMX); + + loop_mode = (count < 0x100) ? QImode : HImode; + loop_reg = gen_rtx_REG (loop_mode, 24); + emit_move_insn (loop_reg, gen_int_mode (count, loop_mode)); + + addr1 = simplify_gen_subreg (HImode, a_src, PSImode, 0); + a_hi8 = simplify_gen_subreg (QImode, a_src, PSImode, 2); + } + else + { + int segment = avr_addrspace[as].segment; + + if (segment + && avr_current_device->n_flash > 1) + { + a_hi8 = GEN_INT (segment); + emit_move_insn (rampz_rtx, a_hi8 = copy_to_mode_reg (QImode, a_hi8)); + } + else if (!ADDR_SPACE_GENERIC_P (as)) + { + as = ADDR_SPACE_FLASH; + } + + addr1 = a_src; + + loop_mode = (count <= 0x100) ? QImode : HImode; + loop_reg = copy_to_mode_reg (loop_mode, gen_int_mode (count, loop_mode)); + } + + xas = GEN_INT (as); + + /* FIXME: Register allocator might come up with spill fails if it is left + on its own. Thus, we allocate the pointer registers by hand: + Z = source address + X = destination address */ + + emit_move_insn (lpm_addr_reg_rtx, addr1); + emit_move_insn (gen_rtx_REG (HImode, REG_X), a_dest); + + /* FIXME: Register allocator does a bad job and might spill address + register(s) inside the loop leading to additional move instruction + to/from stack which could clobber tmp_reg. Thus, do *not* emit + load and store as separate insns. Instead, we perform the copy + by means of one monolithic insn. */ + + gcc_assert (TMP_REGNO == LPM_REGNO); + + if (as != ADDR_SPACE_MEMX) + { + /* Load instruction ([E]LPM or LD) is known at compile time: + Do the copy-loop inline. */ + + rtx (*fun) (rtx, rtx, rtx) + = QImode == loop_mode ? gen_movmem_qi : gen_movmem_hi; + + insn = fun (xas, loop_reg, loop_reg); + } + else + { + rtx (*fun) (rtx, rtx) + = QImode == loop_mode ? gen_movmemx_qi : gen_movmemx_hi; + + emit_move_insn (gen_rtx_REG (QImode, 23), a_hi8); + + insn = fun (xas, GEN_INT (avr_addr.rampz)); + } + + set_mem_addr_space (SET_SRC (XVECEXP (insn, 0, 0)), as); + emit_insn (insn); + + return true; +} + + +/* Print assembler for movmem_qi, movmem_hi insns... + $0 : Address Space + $1, $2 : Loop register + Z : Source address + X : Destination address +*/ + +const char* +avr_out_movmem (rtx insn ATTRIBUTE_UNUSED, rtx *op, int *plen) +{ + addr_space_t as = (addr_space_t) INTVAL (op[0]); + enum machine_mode loop_mode = GET_MODE (op[1]); + bool sbiw_p = test_hard_reg_class (ADDW_REGS, op[1]); + rtx xop[3]; + + if (plen) + *plen = 0; + + xop[0] = op[0]; + xop[1] = op[1]; + xop[2] = tmp_reg_rtx; + + /* Loop label */ + + avr_asm_len ("0:", xop, plen, 0); + + /* Load with post-increment */ + + switch (as) + { + default: + gcc_unreachable(); + + case ADDR_SPACE_GENERIC: + + avr_asm_len ("ld %2,Z+", xop, plen, 1); + break; + + case ADDR_SPACE_FLASH: + + if (AVR_HAVE_LPMX) + avr_asm_len ("lpm %2,Z+", xop, plen, 1); + else + avr_asm_len ("lpm" CR_TAB + "adiw r30,1", xop, plen, 2); + break; + + case ADDR_SPACE_FLASH1: + case ADDR_SPACE_FLASH2: + case ADDR_SPACE_FLASH3: + case ADDR_SPACE_FLASH4: + case ADDR_SPACE_FLASH5: + + if (AVR_HAVE_ELPMX) + avr_asm_len ("elpm %2,Z+", xop, plen, 1); + else + avr_asm_len ("elpm" CR_TAB + "adiw r30,1", xop, plen, 2); + break; + } + + /* Store with post-increment */ + + avr_asm_len ("st X+,%2", xop, plen, 1); + + /* Decrement loop-counter and set Z-flag */ + + if (QImode == loop_mode) + { + avr_asm_len ("dec %1", xop, plen, 1); + } + else if (sbiw_p) + { + avr_asm_len ("sbiw %1,1", xop, plen, 1); + } + else + { + avr_asm_len ("subi %A1,1" CR_TAB + "sbci %B1,0", xop, plen, 2); + } + + /* Loop until zero */ + + return avr_asm_len ("brne 0b", xop, plen, 1); +} + + + +/* Helper for __builtin_avr_delay_cycles */ + +static rtx +avr_mem_clobber (void) +{ + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + return mem; +} + +static void +avr_expand_delay_cycles (rtx operands0) +{ + unsigned HOST_WIDE_INT cycles = UINTVAL (operands0) & GET_MODE_MASK (SImode); + unsigned HOST_WIDE_INT cycles_used; + unsigned HOST_WIDE_INT loop_count; + + if (IN_RANGE (cycles, 83886082, 0xFFFFFFFF)) + { + loop_count = ((cycles - 9) / 6) + 1; + cycles_used = ((loop_count - 1) * 6) + 9; + emit_insn (gen_delay_cycles_4 (gen_int_mode (loop_count, SImode), + avr_mem_clobber())); + cycles -= cycles_used; + } + + if (IN_RANGE (cycles, 262145, 83886081)) + { + loop_count = ((cycles - 7) / 5) + 1; + if (loop_count > 0xFFFFFF) + loop_count = 0xFFFFFF; + cycles_used = ((loop_count - 1) * 5) + 7; + emit_insn (gen_delay_cycles_3 (gen_int_mode (loop_count, SImode), + avr_mem_clobber())); + cycles -= cycles_used; + } + + if (IN_RANGE (cycles, 768, 262144)) + { + loop_count = ((cycles - 5) / 4) + 1; + if (loop_count > 0xFFFF) + loop_count = 0xFFFF; + cycles_used = ((loop_count - 1) * 4) + 5; + emit_insn (gen_delay_cycles_2 (gen_int_mode (loop_count, HImode), + avr_mem_clobber())); + cycles -= cycles_used; + } + + if (IN_RANGE (cycles, 6, 767)) + { + loop_count = cycles / 3; + if (loop_count > 255) + loop_count = 255; + cycles_used = loop_count * 3; + emit_insn (gen_delay_cycles_1 (gen_int_mode (loop_count, QImode), + avr_mem_clobber())); + cycles -= cycles_used; + } + + while (cycles >= 2) + { + emit_insn (gen_nopv (GEN_INT(2))); + cycles -= 2; + } + + if (cycles == 1) + { + emit_insn (gen_nopv (GEN_INT(1))); + cycles--; + } +} + + +/* Compute the image of x under f, i.e. perform x --> f(x) */ + +static int +avr_map (unsigned int f, int x) +{ + return x < 8 ? (f >> (4 * x)) & 0xf : 0; +} + + +/* Return some metrics of map A. */ + +enum + { + /* Number of fixed points in { 0 ... 7 } */ + MAP_FIXED_0_7, + + /* Size of preimage of non-fixed points in { 0 ... 7 } */ + MAP_NONFIXED_0_7, + + /* Mask representing the fixed points in { 0 ... 7 } */ + MAP_MASK_FIXED_0_7, + + /* Size of the preimage of { 0 ... 7 } */ + MAP_PREIMAGE_0_7, + + /* Mask that represents the preimage of { f } */ + MAP_MASK_PREIMAGE_F + }; + +static unsigned +avr_map_metric (unsigned int a, int mode) +{ + unsigned i, metric = 0; + + for (i = 0; i < 8; i++) + { + unsigned ai = avr_map (a, i); + + if (mode == MAP_FIXED_0_7) + metric += ai == i; + else if (mode == MAP_NONFIXED_0_7) + metric += ai < 8 && ai != i; + else if (mode == MAP_MASK_FIXED_0_7) + metric |= ((unsigned) (ai == i)) << i; + else if (mode == MAP_PREIMAGE_0_7) + metric += ai < 8; + else if (mode == MAP_MASK_PREIMAGE_F) + metric |= ((unsigned) (ai == 0xf)) << i; + else + gcc_unreachable(); + } + + return metric; +} + + +/* Return true if IVAL has a 0xf in its hexadecimal representation + and false, otherwise. Only nibbles 0..7 are taken into account. + Used as constraint helper for C0f and Cxf. */ + +bool +avr_has_nibble_0xf (rtx ival) +{ + unsigned int map = UINTVAL (ival) & GET_MODE_MASK (SImode); + return 0 != avr_map_metric (map, MAP_MASK_PREIMAGE_F); +} + + +/* We have a set of bits that are mapped by a function F. + Try to decompose F by means of a second function G so that + + F = F o G^-1 o G + + and + + cost (F o G^-1) + cost (G) < cost (F) + + Example: Suppose builtin insert_bits supplies us with the map + F = 0x3210ffff. Instead of doing 4 bit insertions to get the high + nibble of the result, we can just as well rotate the bits before inserting + them and use the map 0x7654ffff which is cheaper than the original map. + For this example G = G^-1 = 0x32107654 and F o G^-1 = 0x7654ffff. */ + +typedef struct +{ + /* tree code of binary function G */ + enum tree_code code; + + /* The constant second argument of G */ + int arg; + + /* G^-1, the inverse of G (*, arg) */ + unsigned ginv; + + /* The cost of appplying G (*, arg) */ + int cost; + + /* The composition F o G^-1 (*, arg) for some function F */ + unsigned int map; + + /* For debug purpose only */ + const char *str; +} avr_map_op_t; + +static const avr_map_op_t avr_map_op[] = + { + { LROTATE_EXPR, 0, 0x76543210, 0, 0, "id" }, + { LROTATE_EXPR, 1, 0x07654321, 2, 0, "<<<" }, + { LROTATE_EXPR, 2, 0x10765432, 4, 0, "<<<" }, + { LROTATE_EXPR, 3, 0x21076543, 4, 0, "<<<" }, + { LROTATE_EXPR, 4, 0x32107654, 1, 0, "<<<" }, + { LROTATE_EXPR, 5, 0x43210765, 3, 0, "<<<" }, + { LROTATE_EXPR, 6, 0x54321076, 5, 0, "<<<" }, + { LROTATE_EXPR, 7, 0x65432107, 3, 0, "<<<" }, + { RSHIFT_EXPR, 1, 0x6543210c, 1, 0, ">>" }, + { RSHIFT_EXPR, 1, 0x7543210c, 1, 0, ">>" }, + { RSHIFT_EXPR, 2, 0x543210cc, 2, 0, ">>" }, + { RSHIFT_EXPR, 2, 0x643210cc, 2, 0, ">>" }, + { RSHIFT_EXPR, 2, 0x743210cc, 2, 0, ">>" }, + { LSHIFT_EXPR, 1, 0xc7654321, 1, 0, "<<" }, + { LSHIFT_EXPR, 2, 0xcc765432, 2, 0, "<<" } + }; + + +/* Try to decompose F as F = (F o G^-1) o G as described above. + The result is a struct representing F o G^-1 and G. + If result.cost < 0 then such a decomposition does not exist. */ + +static avr_map_op_t +avr_map_decompose (unsigned int f, const avr_map_op_t *g, bool val_const_p) +{ + int i; + bool val_used_p = 0 != avr_map_metric (f, MAP_MASK_PREIMAGE_F); + avr_map_op_t f_ginv = *g; + unsigned int ginv = g->ginv; + + f_ginv.cost = -1; + + /* Step 1: Computing F o G^-1 */ + + for (i = 7; i >= 0; i--) + { + int x = avr_map (f, i); + + if (x <= 7) + { + x = avr_map (ginv, x); + + /* The bit is no element of the image of G: no avail (cost = -1) */ + + if (x > 7) + return f_ginv; + } + + f_ginv.map = (f_ginv.map << 4) + x; + } + + /* Step 2: Compute the cost of the operations. + The overall cost of doing an operation prior to the insertion is + the cost of the insertion plus the cost of the operation. */ + + /* Step 2a: Compute cost of F o G^-1 */ + + if (0 == avr_map_metric (f_ginv.map, MAP_NONFIXED_0_7)) + { + /* The mapping consists only of fixed points and can be folded + to AND/OR logic in the remainder. Reasonable cost is 3. */ + + f_ginv.cost = 2 + (val_used_p && !val_const_p); + } + else + { + rtx xop[4]; + + /* Get the cost of the insn by calling the output worker with some + fake values. Mimic effect of reloading xop[3]: Unused operands + are mapped to 0 and used operands are reloaded to xop[0]. */ + + xop[0] = all_regs_rtx[24]; + xop[1] = gen_int_mode (f_ginv.map, SImode); + xop[2] = all_regs_rtx[25]; + xop[3] = val_used_p ? xop[0] : const0_rtx; + + avr_out_insert_bits (xop, &f_ginv.cost); + + f_ginv.cost += val_const_p && val_used_p ? 1 : 0; + } + + /* Step 2b: Add cost of G */ + + f_ginv.cost += g->cost; + + if (avr_log.builtin) + avr_edump (" %s%d=%d", g->str, g->arg, f_ginv.cost); + + return f_ginv; +} + + +/* Insert bits from XOP[1] into XOP[0] according to MAP. + XOP[0] and XOP[1] don't overlap. + If FIXP_P = true: Move all bits according to MAP using BLD/BST sequences. + If FIXP_P = false: Just move the bit if its position in the destination + is different to its source position. */ + +static void +avr_move_bits (rtx *xop, unsigned int map, bool fixp_p, int *plen) +{ + int bit_dest, b; + + /* T-flag contains this bit of the source, i.e. of XOP[1] */ + int t_bit_src = -1; + + /* We order the operations according to the requested source bit b. */ + + for (b = 0; b < 8; b++) + for (bit_dest = 0; bit_dest < 8; bit_dest++) + { + int bit_src = avr_map (map, bit_dest); + + if (b != bit_src + || bit_src >= 8 + /* Same position: No need to copy as requested by FIXP_P. */ + || (bit_dest == bit_src && !fixp_p)) + continue; + + if (t_bit_src != bit_src) + { + /* Source bit is not yet in T: Store it to T. */ + + t_bit_src = bit_src; + + xop[3] = GEN_INT (bit_src); + avr_asm_len ("bst %T1%T3", xop, plen, 1); + } + + /* Load destination bit with T. */ + + xop[3] = GEN_INT (bit_dest); + avr_asm_len ("bld %T0%T3", xop, plen, 1); + } +} + + +/* PLEN == 0: Print assembler code for `insert_bits'. + PLEN != 0: Compute code length in bytes. + + OP[0]: Result + OP[1]: The mapping composed of nibbles. If nibble no. N is + 0: Bit N of result is copied from bit OP[2].0 + ... ... + 7: Bit N of result is copied from bit OP[2].7 + 0xf: Bit N of result is copied from bit OP[3].N + OP[2]: Bits to be inserted + OP[3]: Target value */ + +const char* +avr_out_insert_bits (rtx *op, int *plen) +{ + unsigned int map = UINTVAL (op[1]) & GET_MODE_MASK (SImode); + unsigned mask_fixed; + bool fixp_p = true; + rtx xop[4]; + + xop[0] = op[0]; + xop[1] = op[2]; + xop[2] = op[3]; + + gcc_assert (REG_P (xop[2]) || CONST_INT_P (xop[2])); + + if (plen) + *plen = 0; + else if (flag_print_asm_name) + fprintf (asm_out_file, ASM_COMMENT_START "map = 0x%08x\n", map); + + /* If MAP has fixed points it might be better to initialize the result + with the bits to be inserted instead of moving all bits by hand. */ + + mask_fixed = avr_map_metric (map, MAP_MASK_FIXED_0_7); + + if (REGNO (xop[0]) == REGNO (xop[1])) + { + /* Avoid early-clobber conflicts */ + + avr_asm_len ("mov __tmp_reg__,%1", xop, plen, 1); + xop[1] = tmp_reg_rtx; + fixp_p = false; + } + + if (avr_map_metric (map, MAP_MASK_PREIMAGE_F)) + { + /* XOP[2] is used and reloaded to XOP[0] already */ + + int n_fix = 0, n_nofix = 0; + + gcc_assert (REG_P (xop[2])); + + /* Get the code size of the bit insertions; once with all bits + moved and once with fixed points omitted. */ + + avr_move_bits (xop, map, true, &n_fix); + avr_move_bits (xop, map, false, &n_nofix); + + if (fixp_p && n_fix - n_nofix > 3) + { + xop[3] = gen_int_mode (~mask_fixed, QImode); + + avr_asm_len ("eor %0,%1" CR_TAB + "andi %0,%3" CR_TAB + "eor %0,%1", xop, plen, 3); + fixp_p = false; + } + } + else + { + /* XOP[2] is unused */ + + if (fixp_p && mask_fixed) + { + avr_asm_len ("mov %0,%1", xop, plen, 1); + fixp_p = false; + } + } + + /* Move/insert remaining bits. */ + + avr_move_bits (xop, map, fixp_p, plen); + + return ""; +} + + +/* IDs for all the AVR builtins. */ + +enum avr_builtin_id + { +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME) \ + AVR_BUILTIN_ ## NAME, +#include "builtins.def" +#undef DEF_BUILTIN + + AVR_BUILTIN_COUNT + }; + +struct GTY(()) avr_builtin_description +{ + enum insn_code icode; + int n_args; + tree fndecl; +}; + + +/* Notice that avr_bdesc[] and avr_builtin_id are initialized in such a way + that a built-in's ID can be used to access the built-in by means of + avr_bdesc[ID] */ + +static GTY(()) struct avr_builtin_description +avr_bdesc[AVR_BUILTIN_COUNT] = + { +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME) \ + { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE }, +#include "builtins.def" +#undef DEF_BUILTIN + }; + + +/* Implement `TARGET_BUILTIN_DECL'. */ + +static tree +avr_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (id < AVR_BUILTIN_COUNT) + return avr_bdesc[id].fndecl; + + return error_mark_node; +} + + +static void +avr_init_builtin_int24 (void) +{ + tree int24_type = make_signed_type (GET_MODE_BITSIZE (PSImode)); + tree uint24_type = make_unsigned_type (GET_MODE_BITSIZE (PSImode)); + + lang_hooks.types.register_builtin_type (int24_type, "__int24"); + lang_hooks.types.register_builtin_type (uint24_type, "__uint24"); +} + + +/* Implement `TARGET_INIT_BUILTINS' */ +/* Set up all builtin functions for this target. */ + +static void +avr_init_builtins (void) +{ + tree void_ftype_void + = build_function_type_list (void_type_node, NULL_TREE); + tree uchar_ftype_uchar + = build_function_type_list (unsigned_char_type_node, + unsigned_char_type_node, + NULL_TREE); + tree uint_ftype_uchar_uchar + = build_function_type_list (unsigned_type_node, + unsigned_char_type_node, + unsigned_char_type_node, + NULL_TREE); + tree int_ftype_char_char + = build_function_type_list (integer_type_node, + char_type_node, + char_type_node, + NULL_TREE); + tree int_ftype_char_uchar + = build_function_type_list (integer_type_node, + char_type_node, + unsigned_char_type_node, + NULL_TREE); + tree void_ftype_ulong + = build_function_type_list (void_type_node, + long_unsigned_type_node, + NULL_TREE); + + tree uchar_ftype_ulong_uchar_uchar + = build_function_type_list (unsigned_char_type_node, + long_unsigned_type_node, + unsigned_char_type_node, + unsigned_char_type_node, + NULL_TREE); + + tree const_memx_void_node + = build_qualified_type (void_type_node, + TYPE_QUAL_CONST + | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_MEMX)); + + tree const_memx_ptr_type_node + = build_pointer_type_for_mode (const_memx_void_node, PSImode, false); + + tree char_ftype_const_memx_ptr + = build_function_type_list (char_type_node, + const_memx_ptr_type_node, + NULL); + +#define ITYP(T) \ + lang_hooks.types.type_for_size (TYPE_PRECISION (T), TYPE_UNSIGNED (T)) + +#define FX_FTYPE_FX(fx) \ + tree fx##r_ftype_##fx##r \ + = build_function_type_list (node_##fx##r, node_##fx##r, NULL); \ + tree fx##k_ftype_##fx##k \ + = build_function_type_list (node_##fx##k, node_##fx##k, NULL) + +#define FX_FTYPE_FX_INT(fx) \ + tree fx##r_ftype_##fx##r_int \ + = build_function_type_list (node_##fx##r, node_##fx##r, \ + integer_type_node, NULL); \ + tree fx##k_ftype_##fx##k_int \ + = build_function_type_list (node_##fx##k, node_##fx##k, \ + integer_type_node, NULL) + +#define INT_FTYPE_FX(fx) \ + tree int_ftype_##fx##r \ + = build_function_type_list (integer_type_node, node_##fx##r, NULL); \ + tree int_ftype_##fx##k \ + = build_function_type_list (integer_type_node, node_##fx##k, NULL) + +#define INTX_FTYPE_FX(fx) \ + tree int##fx##r_ftype_##fx##r \ + = build_function_type_list (ITYP (node_##fx##r), node_##fx##r, NULL); \ + tree int##fx##k_ftype_##fx##k \ + = build_function_type_list (ITYP (node_##fx##k), node_##fx##k, NULL) + +#define FX_FTYPE_INTX(fx) \ + tree fx##r_ftype_int##fx##r \ + = build_function_type_list (node_##fx##r, ITYP (node_##fx##r), NULL); \ + tree fx##k_ftype_int##fx##k \ + = build_function_type_list (node_##fx##k, ITYP (node_##fx##k), NULL) + + tree node_hr = short_fract_type_node; + tree node_nr = fract_type_node; + tree node_lr = long_fract_type_node; + tree node_llr = long_long_fract_type_node; + + tree node_uhr = unsigned_short_fract_type_node; + tree node_unr = unsigned_fract_type_node; + tree node_ulr = unsigned_long_fract_type_node; + tree node_ullr = unsigned_long_long_fract_type_node; + + tree node_hk = short_accum_type_node; + tree node_nk = accum_type_node; + tree node_lk = long_accum_type_node; + tree node_llk = long_long_accum_type_node; + + tree node_uhk = unsigned_short_accum_type_node; + tree node_unk = unsigned_accum_type_node; + tree node_ulk = unsigned_long_accum_type_node; + tree node_ullk = unsigned_long_long_accum_type_node; + + + /* For absfx builtins. */ + + FX_FTYPE_FX (h); + FX_FTYPE_FX (n); + FX_FTYPE_FX (l); + FX_FTYPE_FX (ll); + + /* For roundfx builtins. */ + + FX_FTYPE_FX_INT (h); + FX_FTYPE_FX_INT (n); + FX_FTYPE_FX_INT (l); + FX_FTYPE_FX_INT (ll); + + FX_FTYPE_FX_INT (uh); + FX_FTYPE_FX_INT (un); + FX_FTYPE_FX_INT (ul); + FX_FTYPE_FX_INT (ull); + + /* For countlsfx builtins. */ + + INT_FTYPE_FX (h); + INT_FTYPE_FX (n); + INT_FTYPE_FX (l); + INT_FTYPE_FX (ll); + + INT_FTYPE_FX (uh); + INT_FTYPE_FX (un); + INT_FTYPE_FX (ul); + INT_FTYPE_FX (ull); + + /* For bitsfx builtins. */ + + INTX_FTYPE_FX (h); + INTX_FTYPE_FX (n); + INTX_FTYPE_FX (l); + INTX_FTYPE_FX (ll); + + INTX_FTYPE_FX (uh); + INTX_FTYPE_FX (un); + INTX_FTYPE_FX (ul); + INTX_FTYPE_FX (ull); + + /* For fxbits builtins. */ + + FX_FTYPE_INTX (h); + FX_FTYPE_INTX (n); + FX_FTYPE_INTX (l); + FX_FTYPE_INTX (ll); + + FX_FTYPE_INTX (uh); + FX_FTYPE_INTX (un); + FX_FTYPE_INTX (ul); + FX_FTYPE_INTX (ull); + + +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME) \ + { \ + int id = AVR_BUILTIN_ ## NAME; \ + const char *Name = "__builtin_avr_" #NAME; \ + char *name = (char*) alloca (1 + strlen (Name)); \ + \ + gcc_assert (id < AVR_BUILTIN_COUNT); \ + avr_bdesc[id].fndecl \ + = add_builtin_function (avr_tolower (name, Name), TYPE, id, \ + BUILT_IN_MD, LIBNAME, NULL_TREE); \ + } +#include "builtins.def" +#undef DEF_BUILTIN + + avr_init_builtin_int24 (); +} + + +/* Subroutine of avr_expand_builtin to expand vanilla builtins + with non-void result and 1 ... 3 arguments. */ + +static rtx +avr_default_expand_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, xop[3]; + int n, n_args = call_expr_nargs (exp); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + + gcc_assert (n_args >= 1 && n_args <= 3); + + if (target == NULL_RTX + || GET_MODE (target) != tmode + || !insn_data[icode].operand[0].predicate (target, tmode)) + { + target = gen_reg_rtx (tmode); + } + + for (n = 0; n < n_args; n++) + { + tree arg = CALL_EXPR_ARG (exp, n); + rtx op = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL); + enum machine_mode opmode = GET_MODE (op); + enum machine_mode mode = insn_data[icode].operand[n+1].mode; + + if ((opmode == SImode || opmode == VOIDmode) && mode == HImode) + { + opmode = HImode; + op = gen_lowpart (HImode, op); + } + + /* In case the insn wants input operands in modes different from + the result, abort. */ + + gcc_assert (opmode == mode || opmode == VOIDmode); + + if (!insn_data[icode].operand[n+1].predicate (op, mode)) + op = copy_to_mode_reg (mode, op); + + xop[n] = op; + } + + switch (n_args) + { + case 1: pat = GEN_FCN (icode) (target, xop[0]); break; + case 2: pat = GEN_FCN (icode) (target, xop[0], xop[1]); break; + case 3: pat = GEN_FCN (icode) (target, xop[0], xop[1], xop[2]); break; + + default: + gcc_unreachable(); + } + + if (pat == NULL_RTX) + return NULL_RTX; + + emit_insn (pat); + + return target; +} + + +/* Implement `TARGET_EXPAND_BUILTIN'. */ +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +avr_expand_builtin (tree exp, rtx target, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + unsigned int id = DECL_FUNCTION_CODE (fndecl); + const struct avr_builtin_description *d = &avr_bdesc[id]; + tree arg0; + rtx op0; + + gcc_assert (id < AVR_BUILTIN_COUNT); + + switch (id) + { + case AVR_BUILTIN_NOP: + emit_insn (gen_nopv (GEN_INT(1))); + return 0; + + case AVR_BUILTIN_DELAY_CYCLES: + { + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + if (!CONST_INT_P (op0)) + error ("%s expects a compile time integer constant", bname); + else + avr_expand_delay_cycles (op0); + + return NULL_RTX; + } + + case AVR_BUILTIN_INSERT_BITS: + { + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + + if (!CONST_INT_P (op0)) + { + error ("%s expects a compile time long integer constant" + " as first argument", bname); + return target; + } + + break; + } + + case AVR_BUILTIN_ROUNDHR: case AVR_BUILTIN_ROUNDUHR: + case AVR_BUILTIN_ROUNDR: case AVR_BUILTIN_ROUNDUR: + case AVR_BUILTIN_ROUNDLR: case AVR_BUILTIN_ROUNDULR: + case AVR_BUILTIN_ROUNDLLR: case AVR_BUILTIN_ROUNDULLR: + + case AVR_BUILTIN_ROUNDHK: case AVR_BUILTIN_ROUNDUHK: + case AVR_BUILTIN_ROUNDK: case AVR_BUILTIN_ROUNDUK: + case AVR_BUILTIN_ROUNDLK: case AVR_BUILTIN_ROUNDULK: + case AVR_BUILTIN_ROUNDLLK: case AVR_BUILTIN_ROUNDULLK: + + /* Warn about odd rounding. Rounding points >= FBIT will have + no effect. */ + + if (TREE_CODE (CALL_EXPR_ARG (exp, 1)) != INTEGER_CST) + break; + + int rbit = (int) TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1)); + + if (rbit >= (int) GET_MODE_FBIT (mode)) + { + warning (OPT_Wextra, "rounding to %d bits has no effect for " + "fixed-point value with %d fractional bits", + rbit, GET_MODE_FBIT (mode)); + + return expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, mode, + EXPAND_NORMAL); + } + else if (rbit <= - (int) GET_MODE_IBIT (mode)) + { + warning (0, "rounding result will always be 0"); + return CONST0_RTX (mode); + } + + /* The rounding points RP satisfies now: -IBIT < RP < FBIT. + + TR 18037 only specifies results for RP > 0. However, the + remaining cases of -IBIT < RP <= 0 can easily be supported + without any additional overhead. */ + + break; /* round */ + } + + /* No fold found and no insn: Call support function from libgcc. */ + + if (d->icode == CODE_FOR_nothing + && DECL_ASSEMBLER_NAME (get_callee_fndecl (exp)) != NULL_TREE) + { + return expand_call (exp, target, ignore); + } + + /* No special treatment needed: vanilla expand. */ + + gcc_assert (d->icode != CODE_FOR_nothing); + gcc_assert (d->n_args == call_expr_nargs (exp)); + + if (d->n_args == 0) + { + emit_insn ((GEN_FCN (d->icode)) (target)); + return NULL_RTX; + } + + return avr_default_expand_builtin (d->icode, exp, target); +} + + +/* Helper for `avr_fold_builtin' that folds absfx (FIXED_CST). */ + +static tree +avr_fold_absfx (tree tval) +{ + if (FIXED_CST != TREE_CODE (tval)) + return NULL_TREE; + + /* Our fixed-points have no padding: Use double_int payload directly. */ + + FIXED_VALUE_TYPE fval = TREE_FIXED_CST (tval); + unsigned int bits = GET_MODE_BITSIZE (fval.mode); + double_int ival = fval.data.sext (bits); + + if (!ival.is_negative()) + return tval; + + /* ISO/IEC TR 18037, 7.18a.6.2: The absfx functions are saturating. */ + + fval.data = (ival == double_int::min_value (bits, false).sext (bits)) + ? double_int::max_value (bits, false) + : -ival; + + return build_fixed (TREE_TYPE (tval), fval); +} + + +/* Implement `TARGET_FOLD_BUILTIN'. */ + +static tree +avr_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *arg, + bool ignore ATTRIBUTE_UNUSED) +{ + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree val_type = TREE_TYPE (TREE_TYPE (fndecl)); + + if (!optimize) + return NULL_TREE; + + switch (fcode) + { + default: + break; + + case AVR_BUILTIN_SWAP: + { + return fold_build2 (LROTATE_EXPR, val_type, arg[0], + build_int_cst (val_type, 4)); + } + + case AVR_BUILTIN_ABSHR: + case AVR_BUILTIN_ABSR: + case AVR_BUILTIN_ABSLR: + case AVR_BUILTIN_ABSLLR: + + case AVR_BUILTIN_ABSHK: + case AVR_BUILTIN_ABSK: + case AVR_BUILTIN_ABSLK: + case AVR_BUILTIN_ABSLLK: + /* GCC is not good with folding ABS for fixed-point. Do it by hand. */ + + return avr_fold_absfx (arg[0]); + + case AVR_BUILTIN_BITSHR: case AVR_BUILTIN_HRBITS: + case AVR_BUILTIN_BITSHK: case AVR_BUILTIN_HKBITS: + case AVR_BUILTIN_BITSUHR: case AVR_BUILTIN_UHRBITS: + case AVR_BUILTIN_BITSUHK: case AVR_BUILTIN_UHKBITS: + + case AVR_BUILTIN_BITSR: case AVR_BUILTIN_RBITS: + case AVR_BUILTIN_BITSK: case AVR_BUILTIN_KBITS: + case AVR_BUILTIN_BITSUR: case AVR_BUILTIN_URBITS: + case AVR_BUILTIN_BITSUK: case AVR_BUILTIN_UKBITS: + + case AVR_BUILTIN_BITSLR: case AVR_BUILTIN_LRBITS: + case AVR_BUILTIN_BITSLK: case AVR_BUILTIN_LKBITS: + case AVR_BUILTIN_BITSULR: case AVR_BUILTIN_ULRBITS: + case AVR_BUILTIN_BITSULK: case AVR_BUILTIN_ULKBITS: + + case AVR_BUILTIN_BITSLLR: case AVR_BUILTIN_LLRBITS: + case AVR_BUILTIN_BITSLLK: case AVR_BUILTIN_LLKBITS: + case AVR_BUILTIN_BITSULLR: case AVR_BUILTIN_ULLRBITS: + case AVR_BUILTIN_BITSULLK: case AVR_BUILTIN_ULLKBITS: + + gcc_assert (TYPE_PRECISION (val_type) + == TYPE_PRECISION (TREE_TYPE (arg[0]))); + + return build1 (VIEW_CONVERT_EXPR, val_type, arg[0]); + + case AVR_BUILTIN_INSERT_BITS: + { + tree tbits = arg[1]; + tree tval = arg[2]; + tree tmap; + tree map_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + unsigned int map; + bool changed = false; + unsigned i; + avr_map_op_t best_g; + + if (TREE_CODE (arg[0]) != INTEGER_CST) + { + /* No constant as first argument: Don't fold this and run into + error in avr_expand_builtin. */ + + break; + } + + tmap = double_int_to_tree (map_type, tree_to_double_int (arg[0])); + map = TREE_INT_CST_LOW (tmap); + + if (TREE_CODE (tval) != INTEGER_CST + && 0 == avr_map_metric (map, MAP_MASK_PREIMAGE_F)) + { + /* There are no F in the map, i.e. 3rd operand is unused. + Replace that argument with some constant to render + respective input unused. */ + + tval = build_int_cst (val_type, 0); + changed = true; + } + + if (TREE_CODE (tbits) != INTEGER_CST + && 0 == avr_map_metric (map, MAP_PREIMAGE_0_7)) + { + /* Similar for the bits to be inserted. If they are unused, + we can just as well pass 0. */ + + tbits = build_int_cst (val_type, 0); + } + + if (TREE_CODE (tbits) == INTEGER_CST) + { + /* Inserting bits known at compile time is easy and can be + performed by AND and OR with appropriate masks. */ + + int bits = TREE_INT_CST_LOW (tbits); + int mask_ior = 0, mask_and = 0xff; + + for (i = 0; i < 8; i++) + { + int mi = avr_map (map, i); + + if (mi < 8) + { + if (bits & (1 << mi)) mask_ior |= (1 << i); + else mask_and &= ~(1 << i); + } + } + + tval = fold_build2 (BIT_IOR_EXPR, val_type, tval, + build_int_cst (val_type, mask_ior)); + return fold_build2 (BIT_AND_EXPR, val_type, tval, + build_int_cst (val_type, mask_and)); + } + + if (changed) + return build_call_expr (fndecl, 3, tmap, tbits, tval); + + /* If bits don't change their position we can use vanilla logic + to merge the two arguments. */ + + if (0 == avr_map_metric (map, MAP_NONFIXED_0_7)) + { + int mask_f = avr_map_metric (map, MAP_MASK_PREIMAGE_F); + tree tres, tmask = build_int_cst (val_type, mask_f ^ 0xff); + + tres = fold_build2 (BIT_XOR_EXPR, val_type, tbits, tval); + tres = fold_build2 (BIT_AND_EXPR, val_type, tres, tmask); + return fold_build2 (BIT_XOR_EXPR, val_type, tres, tval); + } + + /* Try to decomposing map to reduce overall cost. */ + + if (avr_log.builtin) + avr_edump ("\n%?: %x\n%?: ROL cost: ", map); + + best_g = avr_map_op[0]; + best_g.cost = 1000; + + for (i = 0; i < sizeof (avr_map_op) / sizeof (*avr_map_op); i++) + { + avr_map_op_t g + = avr_map_decompose (map, avr_map_op + i, + TREE_CODE (tval) == INTEGER_CST); + + if (g.cost >= 0 && g.cost < best_g.cost) + best_g = g; + } + + if (avr_log.builtin) + avr_edump ("\n"); + + if (best_g.arg == 0) + /* No optimization found */ + break; + + /* Apply operation G to the 2nd argument. */ + + if (avr_log.builtin) + avr_edump ("%?: using OP(%s%d, %x) cost %d\n", + best_g.str, best_g.arg, best_g.map, best_g.cost); + + /* Do right-shifts arithmetically: They copy the MSB instead of + shifting in a non-usable value (0) as with logic right-shift. */ + + tbits = fold_convert (signed_char_type_node, tbits); + tbits = fold_build2 (best_g.code, signed_char_type_node, tbits, + build_int_cst (val_type, best_g.arg)); + tbits = fold_convert (val_type, tbits); + + /* Use map o G^-1 instead of original map to undo the effect of G. */ + + tmap = double_int_to_tree (map_type, + double_int::from_uhwi (best_g.map)); + + return build_call_expr (fndecl, 3, tmap, tbits, tval); + } /* AVR_BUILTIN_INSERT_BITS */ + } + + return NULL_TREE; +} + + + +/* Initialize the GCC target structure. */ + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.long\t" +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER avr_assemble_integer +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START avr_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END avr_file_end + +#undef TARGET_ASM_FUNCTION_END_PROLOGUE +#define TARGET_ASM_FUNCTION_END_PROLOGUE avr_asm_function_end_prologue +#undef TARGET_ASM_FUNCTION_BEGIN_EPILOGUE +#define TARGET_ASM_FUNCTION_BEGIN_EPILOGUE avr_asm_function_begin_epilogue + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE avr_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE avr_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P avr_function_value_regno_p + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE avr_attribute_table +#undef TARGET_INSERT_ATTRIBUTES +#define TARGET_INSERT_ATTRIBUTES avr_insert_attributes +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS avr_section_type_flags + +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION avr_asm_named_section +#undef TARGET_ASM_INIT_SECTIONS +#define TARGET_ASM_INIT_SECTIONS avr_asm_init_sections +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO avr_encode_section_info +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION avr_asm_select_section + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST avr_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST avr_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS avr_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST avr_address_cost +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG avr_reorg +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG avr_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE avr_function_arg_advance + +#undef TARGET_SET_CURRENT_FUNCTION +#define TARGET_SET_CURRENT_FUNCTION avr_set_current_function + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY avr_return_in_memory + +#undef TARGET_STRICT_ARGUMENT_NAMING +#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + +#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE +#define TARGET_BUILTIN_SETJMP_FRAME_VALUE avr_builtin_setjmp_frame_value + +#undef TARGET_HARD_REGNO_SCRATCH_OK +#define TARGET_HARD_REGNO_SCRATCH_OK avr_hard_regno_scratch_ok +#undef TARGET_CASE_VALUES_THRESHOLD +#define TARGET_CASE_VALUES_THRESHOLD avr_case_values_threshold + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED avr_frame_pointer_required_p +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE avr_can_eliminate + +#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS +#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS avr_allocate_stack_slots_for_args + +#undef TARGET_WARN_FUNC_RETURN +#define TARGET_WARN_FUNC_RETURN avr_warn_func_return + +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P avr_class_likely_spilled_p + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE avr_option_override + +#undef TARGET_CANNOT_MODIFY_JUMPS_P +#define TARGET_CANNOT_MODIFY_JUMPS_P avr_cannot_modify_jumps_p + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL avr_function_ok_for_sibcall + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS avr_init_builtins + +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL avr_builtin_decl + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN avr_expand_builtin + +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN avr_fold_builtin + +#undef TARGET_ASM_FUNCTION_RODATA_SECTION +#define TARGET_ASM_FUNCTION_RODATA_SECTION avr_asm_function_rodata_section + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P avr_scalar_mode_supported_p + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST avr_build_builtin_va_list + +#undef TARGET_FIXED_POINT_SUPPORTED_P +#define TARGET_FIXED_POINT_SUPPORTED_P hook_bool_void_true + +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE avr_convert_to_type + +#undef TARGET_ADDR_SPACE_SUBSET_P +#define TARGET_ADDR_SPACE_SUBSET_P avr_addr_space_subset_p + +#undef TARGET_ADDR_SPACE_CONVERT +#define TARGET_ADDR_SPACE_CONVERT avr_addr_space_convert + +#undef TARGET_ADDR_SPACE_ADDRESS_MODE +#define TARGET_ADDR_SPACE_ADDRESS_MODE avr_addr_space_address_mode + +#undef TARGET_ADDR_SPACE_POINTER_MODE +#define TARGET_ADDR_SPACE_POINTER_MODE avr_addr_space_pointer_mode + +#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P +#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \ + avr_addr_space_legitimate_address_p + +#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS +#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS avr_addr_space_legitimize_address + +#undef TARGET_MODE_DEPENDENT_ADDRESS_P +#define TARGET_MODE_DEPENDENT_ADDRESS_P avr_mode_dependent_address_p + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD avr_secondary_reload + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND avr_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS avr_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P avr_print_operand_punct_valid_p + +struct gcc_target targetm = TARGET_INITIALIZER; + + +#include "gt-avr.h" diff --git a/gcc-4.9/gcc/config/avr/avr.h b/gcc-4.9/gcc/config/avr/avr.h new file mode 100644 index 000000000..74be83c8a --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr.h @@ -0,0 +1,606 @@ +/* Definitions of target machine for GNU compiler, + for ATMEL AVR at90s8515, ATmega103/103L, ATmega603/603L microcontrollers. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Denis Chertykov (chertykov@gmail.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +typedef struct +{ + /* Id of the address space as used in c_register_addr_space */ + unsigned char id; + + /* Flavour of memory: 0 = RAM, 1 = Flash */ + int memory_class; + + /* Width of pointer (in bytes) */ + int pointer_size; + + /* Name of the address space as visible to the user */ + const char *name; + + /* Segment (i.e. 64k memory chunk) number. */ + int segment; + + /* Section prefix, e.g. ".progmem1.data" */ + const char *section_name; +} avr_addrspace_t; + +extern const avr_addrspace_t avr_addrspace[]; + +/* Known address spaces */ + +enum + { + ADDR_SPACE_RAM, /* ADDR_SPACE_GENERIC */ + ADDR_SPACE_FLASH, + ADDR_SPACE_FLASH1, + ADDR_SPACE_FLASH2, + ADDR_SPACE_FLASH3, + ADDR_SPACE_FLASH4, + ADDR_SPACE_FLASH5, + ADDR_SPACE_MEMX, + /* Sentinel */ + ADDR_SPACE_COUNT + }; + +#define TARGET_CPU_CPP_BUILTINS() avr_cpu_cpp_builtins (pfile) + +#define AVR_HAVE_JMP_CALL (avr_current_arch->have_jmp_call) +#define AVR_HAVE_MUL (avr_current_arch->have_mul) +#define AVR_HAVE_MOVW (avr_current_arch->have_movw_lpmx) +#define AVR_HAVE_LPMX (avr_current_arch->have_movw_lpmx) +#define AVR_HAVE_ELPM (avr_current_arch->have_elpm) +#define AVR_HAVE_ELPMX (avr_current_arch->have_elpmx) +#define AVR_HAVE_RAMPD (avr_current_arch->have_rampd) +#define AVR_HAVE_RAMPX (avr_current_arch->have_rampd) +#define AVR_HAVE_RAMPY (avr_current_arch->have_rampd) +#define AVR_HAVE_RAMPZ (avr_current_arch->have_elpm \ + || avr_current_arch->have_rampd) +#define AVR_HAVE_EIJMP_EICALL (avr_current_arch->have_eijmp_eicall) + +/* Handling of 8-bit SP versus 16-bit SP is as follows: + + -msp8 is used internally to select the right multilib for targets with + 8-bit SP. -msp8 is set automatically by DRIVER_SELF_SPECS for devices + with 8-bit SP or by multilib generation machinery. If a frame pointer is + needed and SP is only 8 bits wide, SP is zero-extended to get FP. + + TARGET_TINY_STACK is triggered by -mtiny-stack which is a user option. + This option has no effect on multilib selection. It serves to save some + bytes on 16-bit SP devices by only changing SP_L and leaving SP_H alone. + + These two properties are reflected by built-in macros __AVR_SP8__ resp. + __AVR_HAVE_8BIT_SP__ and __AVR_HAVE_16BIT_SP__. During multilib generation + there is always __AVR_SP8__ == __AVR_HAVE_8BIT_SP__. */ + +#define AVR_HAVE_8BIT_SP \ + (avr_current_device->short_sp || TARGET_TINY_STACK || avr_sp8) + +#define AVR_HAVE_SPH (!avr_sp8) + +#define AVR_2_BYTE_PC (!AVR_HAVE_EIJMP_EICALL) +#define AVR_3_BYTE_PC (AVR_HAVE_EIJMP_EICALL) + +#define AVR_XMEGA (avr_current_arch->xmega_p) + +#define BITS_BIG_ENDIAN 0 +#define BYTES_BIG_ENDIAN 0 +#define WORDS_BIG_ENDIAN 0 + +#ifdef IN_LIBGCC2 +/* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits). */ +#define UNITS_PER_WORD 4 +#else +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 1 +#endif + +#define POINTER_SIZE 16 + + +/* Maximum sized of reasonable data type + DImode or Dfmode ... */ +#define MAX_FIXED_MODE_SIZE 32 + +#define PARM_BOUNDARY 8 + +#define FUNCTION_BOUNDARY 8 + +#define EMPTY_FIELD_BOUNDARY 8 + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 8 + +#define TARGET_VTABLE_ENTRY_ALIGN 8 + +#define STRICT_ALIGNMENT 0 + +#define INT_TYPE_SIZE (TARGET_INT8 ? 8 : 16) +#define SHORT_TYPE_SIZE (INT_TYPE_SIZE == 8 ? INT_TYPE_SIZE : 16) +#define LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 16 : 32) +#define LONG_LONG_TYPE_SIZE (INT_TYPE_SIZE == 8 ? 32 : 64) +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 32 +#define LONG_DOUBLE_TYPE_SIZE 32 +#define LONG_LONG_ACCUM_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +#define SIZE_TYPE (INT_TYPE_SIZE == 8 ? "long unsigned int" : "unsigned int") +#define PTRDIFF_TYPE (INT_TYPE_SIZE == 8 ? "long int" :"int") + +#define WCHAR_TYPE_SIZE 16 + +#define FIRST_PSEUDO_REGISTER 36 + +#define FIXED_REGISTERS {\ + 1,1,/* r0 r1 */\ + 0,0,/* r2 r3 */\ + 0,0,/* r4 r5 */\ + 0,0,/* r6 r7 */\ + 0,0,/* r8 r9 */\ + 0,0,/* r10 r11 */\ + 0,0,/* r12 r13 */\ + 0,0,/* r14 r15 */\ + 0,0,/* r16 r17 */\ + 0,0,/* r18 r19 */\ + 0,0,/* r20 r21 */\ + 0,0,/* r22 r23 */\ + 0,0,/* r24 r25 */\ + 0,0,/* r26 r27 */\ + 0,0,/* r28 r29 */\ + 0,0,/* r30 r31 */\ + 1,1,/* STACK */\ + 1,1 /* arg pointer */ } + +#define CALL_USED_REGISTERS { \ + 1,1,/* r0 r1 */ \ + 0,0,/* r2 r3 */ \ + 0,0,/* r4 r5 */ \ + 0,0,/* r6 r7 */ \ + 0,0,/* r8 r9 */ \ + 0,0,/* r10 r11 */ \ + 0,0,/* r12 r13 */ \ + 0,0,/* r14 r15 */ \ + 0,0,/* r16 r17 */ \ + 1,1,/* r18 r19 */ \ + 1,1,/* r20 r21 */ \ + 1,1,/* r22 r23 */ \ + 1,1,/* r24 r25 */ \ + 1,1,/* r26 r27 */ \ + 0,0,/* r28 r29 */ \ + 1,1,/* r30 r31 */ \ + 1,1,/* STACK */ \ + 1,1 /* arg pointer */ } + +#define REG_ALLOC_ORDER { \ + 24,25, \ + 18,19, \ + 20,21, \ + 22,23, \ + 30,31, \ + 26,27, \ + 28,29, \ + 17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2, \ + 0,1, \ + 32,33,34,35 \ + } + +#define ADJUST_REG_ALLOC_ORDER avr_adjust_reg_alloc_order() + + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +#define HARD_REGNO_MODE_OK(REGNO, MODE) avr_hard_regno_mode_ok(REGNO, MODE) + +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +enum reg_class { + NO_REGS, + R0_REG, /* r0 */ + POINTER_X_REGS, /* r26 - r27 */ + POINTER_Y_REGS, /* r28 - r29 */ + POINTER_Z_REGS, /* r30 - r31 */ + STACK_REG, /* STACK */ + BASE_POINTER_REGS, /* r28 - r31 */ + POINTER_REGS, /* r26 - r31 */ + ADDW_REGS, /* r24 - r31 */ + SIMPLE_LD_REGS, /* r16 - r23 */ + LD_REGS, /* r16 - r31 */ + NO_LD_REGS, /* r0 - r15 */ + GENERAL_REGS, /* r0 - r31 */ + ALL_REGS, LIM_REG_CLASSES +}; + + +#define N_REG_CLASSES (int)LIM_REG_CLASSES + +#define REG_CLASS_NAMES { \ + "NO_REGS", \ + "R0_REG", /* r0 */ \ + "POINTER_X_REGS", /* r26 - r27 */ \ + "POINTER_Y_REGS", /* r28 - r29 */ \ + "POINTER_Z_REGS", /* r30 - r31 */ \ + "STACK_REG", /* STACK */ \ + "BASE_POINTER_REGS", /* r28 - r31 */ \ + "POINTER_REGS", /* r26 - r31 */ \ + "ADDW_REGS", /* r24 - r31 */ \ + "SIMPLE_LD_REGS", /* r16 - r23 */ \ + "LD_REGS", /* r16 - r31 */ \ + "NO_LD_REGS", /* r0 - r15 */ \ + "GENERAL_REGS", /* r0 - r31 */ \ + "ALL_REGS" } + +#define REG_CLASS_CONTENTS { \ + {0x00000000,0x00000000}, /* NO_REGS */ \ + {0x00000001,0x00000000}, /* R0_REG */ \ + {3 << REG_X,0x00000000}, /* POINTER_X_REGS, r26 - r27 */ \ + {3 << REG_Y,0x00000000}, /* POINTER_Y_REGS, r28 - r29 */ \ + {3 << REG_Z,0x00000000}, /* POINTER_Z_REGS, r30 - r31 */ \ + {0x00000000,0x00000003}, /* STACK_REG, STACK */ \ + {(3 << REG_Y) | (3 << REG_Z), \ + 0x00000000}, /* BASE_POINTER_REGS, r28 - r31 */ \ + {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z), \ + 0x00000000}, /* POINTER_REGS, r26 - r31 */ \ + {(3 << REG_X) | (3 << REG_Y) | (3 << REG_Z) | (3 << REG_W), \ + 0x00000000}, /* ADDW_REGS, r24 - r31 */ \ + {0x00ff0000,0x00000000}, /* SIMPLE_LD_REGS r16 - r23 */ \ + {(3 << REG_X)|(3 << REG_Y)|(3 << REG_Z)|(3 << REG_W)|(0xff << 16), \ + 0x00000000}, /* LD_REGS, r16 - r31 */ \ + {0x0000ffff,0x00000000}, /* NO_LD_REGS r0 - r15 */ \ + {0xffffffff,0x00000000}, /* GENERAL_REGS, r0 - r31 */ \ + {0xffffffff,0x00000003} /* ALL_REGS */ \ +} + +#define REGNO_REG_CLASS(R) avr_regno_reg_class(R) + +#define MODE_CODE_BASE_REG_CLASS(mode, as, outer_code, index_code) \ + avr_mode_code_base_reg_class (mode, as, outer_code, index_code) + +#define INDEX_REG_CLASS NO_REGS + +#define REGNO_MODE_CODE_OK_FOR_BASE_P(num, mode, as, outer_code, index_code) \ + avr_regno_mode_code_ok_for_base_p (num, mode, as, outer_code, index_code) + +#define REGNO_OK_FOR_INDEX_P(NUM) 0 + +#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE) \ + avr_hard_regno_call_part_clobbered (REGNO, MODE) + +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true + +#define STACK_PUSH_CODE POST_DEC + +#define STACK_GROWS_DOWNWARD + +#define STARTING_FRAME_OFFSET avr_starting_frame_offset() + +#define STACK_POINTER_OFFSET 1 + +#define FIRST_PARM_OFFSET(FUNDECL) 0 + +#define STACK_BOUNDARY 8 + +#define STACK_POINTER_REGNUM 32 + +#define FRAME_POINTER_REGNUM REG_Y + +#define ARG_POINTER_REGNUM 34 + +#define STATIC_CHAIN_REGNUM 2 + +#define ELIMINABLE_REGS { \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ + ,{FRAME_POINTER_REGNUM+1,STACK_POINTER_REGNUM+1}} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + OFFSET = avr_initial_elimination_offset (FROM, TO) + +#define RETURN_ADDR_RTX(count, tem) avr_return_addr_rtx (count, tem) + +/* Don't use Push rounding. expr.c: emit_single_push_insn is broken + for POST_DEC targets (PR27386). */ +/*#define PUSH_ROUNDING(NPUSHED) (NPUSHED)*/ + +typedef struct avr_args +{ + /* # Registers available for passing */ + int nregs; + + /* Next available register number */ + int regno; +} CUMULATIVE_ARGS; + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + avr_init_cumulative_args (&(CUM), FNTYPE, LIBNAME, FNDECL) + +#define FUNCTION_ARG_REGNO_P(r) avr_function_arg_regno_p(r) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#define EPILOGUE_USES(REGNO) avr_epilogue_uses(REGNO) + +#define HAVE_POST_INCREMENT 1 +#define HAVE_PRE_DECREMENT 1 + +#define MAX_REGS_PER_ADDRESS 1 + +#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN) \ + do { \ + rtx new_x = avr_legitimize_reload_address (&(X), MODE, OPNUM, TYPE, \ + ADDR_TYPE (TYPE), \ + IND_L, make_memloc); \ + if (new_x) \ + { \ + X = new_x; \ + goto WIN; \ + } \ + } while (0) + +#define BRANCH_COST(speed_p, predictable_p) avr_branch_cost + +#define SLOW_BYTE_ACCESS 0 + +#define NO_FUNCTION_CSE + +#define REGISTER_TARGET_PRAGMAS() \ + do { \ + avr_register_target_pragmas(); \ + } while (0) + +#define TEXT_SECTION_ASM_OP "\t.text" + +#define DATA_SECTION_ASM_OP "\t.data" + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +/* Define the pseudo-ops used to switch to the .ctors and .dtors sections. + There are no shared libraries on this target, and these sections are + placed in the read-only program memory, so they are not writable. */ + +#undef CTORS_SECTION_ASM_OP +#define CTORS_SECTION_ASM_OP "\t.section .ctors,\"a\",@progbits" + +#undef DTORS_SECTION_ASM_OP +#define DTORS_SECTION_ASM_OP "\t.section .dtors,\"a\",@progbits" + +#define TARGET_ASM_CONSTRUCTOR avr_asm_out_ctor + +#define TARGET_ASM_DESTRUCTOR avr_asm_out_dtor + +#define SUPPORTS_INIT_PRIORITY 0 + +#define JUMP_TABLES_IN_TEXT_SECTION 0 + +#define ASM_COMMENT_START " ; " + +#define ASM_APP_ON "/* #APP */\n" + +#define ASM_APP_OFF "/* #NOAPP */\n" + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '\n' || ((C) == '$')) + +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(STREAM, DECL, NAME, SIZE, ALIGN) \ + avr_asm_output_aligned_decl_common (STREAM, DECL, NAME, SIZE, ALIGN, false) + +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN) + +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGN) \ + avr_asm_output_aligned_decl_common (STREAM, DECL, NAME, SIZE, ALIGN, true) + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP ".global\t" + +#define SUPPORTS_WEAK 1 + +#define HAS_INIT_SECTION 1 + +#define REGISTER_NAMES { \ + "r0","r1","r2","r3","r4","r5","r6","r7", \ + "r8","r9","r10","r11","r12","r13","r14","r15", \ + "r16","r17","r18","r19","r20","r21","r22","r23", \ + "r24","r25","r26","r27","r28","r29","r30","r31", \ + "__SP_L__","__SP_H__","argL","argH"} + +#define FINAL_PRESCAN_INSN(insn, operand, nop) \ + avr_final_prescan_insn (insn, operand,nop) + +#define ASM_OUTPUT_REG_PUSH(STREAM, REGNO) \ +{ \ + gcc_assert (REGNO < 32); \ + fprintf (STREAM, "\tpush\tr%d", REGNO); \ +} + +#define ASM_OUTPUT_REG_POP(STREAM, REGNO) \ +{ \ + gcc_assert (REGNO < 32); \ + fprintf (STREAM, "\tpop\tr%d", REGNO); \ +} + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + avr_output_addr_vec_elt (STREAM, VALUE) + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + do { \ + if ((POWER) > 0) \ + fprintf (STREAM, "\t.p2align\t%d\n", POWER); \ + } while (0) + +#define CASE_VECTOR_MODE HImode + +#undef WORD_REGISTER_OPERATIONS + +#define MOVE_MAX 4 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define Pmode HImode + +#define FUNCTION_MODE HImode + +#define DOLLARS_IN_IDENTIFIERS 0 + +#define TRAMPOLINE_SIZE 4 + +/* Store in cc_status the expressions + that the condition codes will describe + after execution of an instruction whose pattern is EXP. + Do not alter them if the instruction would not alter the cc's. */ + +#define NOTICE_UPDATE_CC(EXP, INSN) avr_notice_update_cc (EXP, INSN) + +/* The add insns don't set overflow in a usable way. */ +#define CC_OVERFLOW_UNUSABLE 01000 +/* The mov,and,or,xor insns don't set carry. That's ok though as the + Z bit is all we need when doing unsigned comparisons on the result of + these insns (since they're always with 0). However, conditions.h has + CC_NO_OVERFLOW defined for this purpose. Rename it to something more + understandable. */ +#define CC_NO_CARRY CC_NO_OVERFLOW + + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + fprintf (FILE, "/* profiler %d */", (LABELNO)) + +#define ADJUST_INSN_LENGTH(INSN, LENGTH) \ + (LENGTH = avr_adjust_insn_length (INSN, LENGTH)) + +extern const char *avr_device_to_as (int argc, const char **argv); +extern const char *avr_device_to_ld (int argc, const char **argv); +extern const char *avr_device_to_data_start (int argc, const char **argv); +extern const char *avr_device_to_startfiles (int argc, const char **argv); +extern const char *avr_device_to_devicelib (int argc, const char **argv); +extern const char *avr_device_to_sp8 (int argc, const char **argv); + +#define EXTRA_SPEC_FUNCTIONS \ + { "device_to_as", avr_device_to_as }, \ + { "device_to_ld", avr_device_to_ld }, \ + { "device_to_data_start", avr_device_to_data_start }, \ + { "device_to_startfile", avr_device_to_startfiles }, \ + { "device_to_devicelib", avr_device_to_devicelib }, \ + { "device_to_sp8", avr_device_to_sp8 }, + +#define DRIVER_SELF_SPECS " %:device_to_sp8(%{mmcu=*:%*}) " +#define CPP_SPEC "" + +#define CC1_SPEC "" + +#define CC1PLUS_SPEC "%{!frtti:-fno-rtti} \ + %{!fenforce-eh-specs:-fno-enforce-eh-specs} \ + %{!fexceptions:-fno-exceptions}" + +#define ASM_SPEC "%:device_to_as(%{mmcu=*:%*}) " + +#define LINK_SPEC "\ +%{mrelax:--relax\ + %{mpmem-wrap-around:%{mmcu=at90usb8*:--pmem-wrap-around=8k}\ + %{mmcu=atmega16*:--pmem-wrap-around=16k}\ + %{mmcu=atmega32*|\ + mmcu=at90can32*:--pmem-wrap-around=32k}\ + %{mmcu=atmega64*|\ + mmcu=at90can64*|\ + mmcu=at90usb64*:--pmem-wrap-around=64k}}}\ +%:device_to_ld(%{mmcu=*:%*})\ +%:device_to_data_start(%{mmcu=*:%*})\ +%{shared:%eshared is not supported}" + +#define LIB_SPEC \ + "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lc }}}}}" + +#define LIBSTDCXX "gcc" +/* No libstdc++ for now. Empty string doesn't work. */ + +#define LIBGCC_SPEC \ + "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lgcc }}}}}" + +#define STARTFILE_SPEC "%:device_to_startfile(%{mmcu=*:%*})" + +#define ENDFILE_SPEC "" + +/* This is the default without any -mmcu=* option (AT90S*). */ +#define MULTILIB_DEFAULTS { "mmcu=avr2" } + +#define TEST_HARD_REG_CLASS(CLASS, REGNO) \ + TEST_HARD_REG_BIT (reg_class_contents[ (int) (CLASS)], REGNO) + +#define CR_TAB "\n\t" + +#define DWARF2_ADDR_SIZE 4 + +#define INCOMING_RETURN_ADDR_RTX avr_incoming_return_addr_rtx () +#define INCOMING_FRAME_SP_OFFSET (AVR_3_BYTE_PC ? 3 : 2) + +/* The caller's stack pointer value immediately before the call + is one byte below the first argument. */ +#define ARG_POINTER_CFA_OFFSET(FNDECL) -1 + +#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \ + avr_hard_regno_rename_ok (OLD_REG, NEW_REG) + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +struct GTY(()) machine_function +{ + /* 'true' - if current function is a naked function. */ + int is_naked; + + /* 'true' - if current function is an interrupt function + as specified by the "interrupt" attribute. */ + int is_interrupt; + + /* 'true' - if current function is a signal function + as specified by the "signal" attribute. */ + int is_signal; + + /* 'true' - if current function is a 'task' function + as specified by the "OS_task" attribute. */ + int is_OS_task; + + /* 'true' - if current function is a 'main' function + as specified by the "OS_main" attribute. */ + int is_OS_main; + + /* Current function stack size. */ + int stack_usage; + + /* 'true' if a callee might be tail called */ + int sibcall_fails; + + /* 'true' if the above is_foo predicates are sanity-checked to avoid + multiple diagnose for the same function. */ + int attributes_checked_p; +}; + +/* AVR does not round pushes, but the existence of this macro is + required in order for pushes to be generated. */ +#define PUSH_ROUNDING(X) (X) + +/* Define prototype here to avoid build warning. Some files using + ACCUMULATE_OUTGOING_ARGS (directly or indirectly) include + tm.h but not tm_p.h. */ +extern int avr_accumulate_outgoing_args (void); +#define ACCUMULATE_OUTGOING_ARGS avr_accumulate_outgoing_args() + +#define INIT_EXPANDERS avr_init_expanders() diff --git a/gcc-4.9/gcc/config/avr/avr.md b/gcc-4.9/gcc/config/avr/avr.md new file mode 100644 index 000000000..f2d8605cd --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr.md @@ -0,0 +1,6358 @@ +;; Machine description for GNU compiler, +;; for ATMEL AVR micro controllers. +;; Copyright (C) 1998-2014 Free Software Foundation, Inc. +;; Contributed by Denis Chertykov (chertykov@gmail.com) + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Special characters after '%': +;; A No effect (add 0). +;; B Add 1 to REG number, MEM address or CONST_INT. +;; C Add 2. +;; D Add 3. +;; j Branch condition. +;; k Reverse branch condition. +;;..m..Constant Direct Data memory address. +;; i Print the SFR address quivalent of a CONST_INT or a CONST_INT +;; RAM address. The resulting address is suitable to be used in IN/OUT. +;; o Displacement for (mem (plus (reg) (const_int))) operands. +;; p POST_INC or PRE_DEC address as a pointer (X, Y, Z) +;; r POST_INC or PRE_DEC address as a register (r26, r28, r30) +;; r Print a REG without the register prefix 'r'. +;; T/T Print operand suitable for BLD/BST instruction, i.e. register and +;; bit number. This gets 2 operands: The first %T gets a REG_P and +;; just cashes the operand for the next %T. The second %T gets +;; a CONST_INT that represents a bit position. +;; Example: With %0 = (reg:HI 18) and %1 = (const_int 13) +;; "%T0%T1" it will print "r19,5". +;; Notice that you must not write a comma between %T0 and %T1. +;; T/t Similar to above, but don't print the comma and the bit number. +;; Example: With %0 = (reg:HI 18) and %1 = (const_int 13) +;; "%T0%t1" it will print "r19". +;;..x..Constant Direct Program memory address. +;; ~ Output 'r' if not AVR_HAVE_JMP_CALL. +;; ! Output 'e' if AVR_HAVE_EIJMP_EICALL. + + +(define_constants + [(REG_X 26) + (REG_Y 28) + (REG_Z 30) + (REG_W 24) + (REG_SP 32) + (LPM_REGNO 0) ; implicit target register of LPM + (TMP_REGNO 0) ; temporary register r0 + (ZERO_REGNO 1) ; zero register r1 + ]) + +(define_c_enum "unspec" + [UNSPEC_STRLEN + UNSPEC_MOVMEM + UNSPEC_INDEX_JMP + UNSPEC_FMUL + UNSPEC_FMULS + UNSPEC_FMULSU + UNSPEC_COPYSIGN + UNSPEC_IDENTITY + UNSPEC_INSERT_BITS + UNSPEC_ROUND + ]) + +(define_c_enum "unspecv" + [UNSPECV_PROLOGUE_SAVES + UNSPECV_EPILOGUE_RESTORES + UNSPECV_WRITE_SP + UNSPECV_GOTO_RECEIVER + UNSPECV_ENABLE_IRQS + UNSPECV_MEMORY_BARRIER + UNSPECV_NOP + UNSPECV_SLEEP + UNSPECV_WDR + UNSPECV_DELAY_CYCLES + ]) + + +(include "predicates.md") +(include "constraints.md") + +;; Condition code settings. +(define_attr "cc" "none,set_czn,set_zn,set_n,compare,clobber, + plus,ldi" + (const_string "none")) + +(define_attr "type" "branch,branch1,arith,xcall" + (const_string "arith")) + +;; The size of instructions in bytes. +;; XXX may depend from "cc" + +(define_attr "length" "" + (cond [(eq_attr "type" "branch") + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -63)) + (le (minus (pc) (match_dup 0)) + (const_int 62))) + (const_int 1) + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -2045)) + (le (minus (pc) (match_dup 0)) + (const_int 2045))) + (const_int 2) + (const_int 3))) + (eq_attr "type" "branch1") + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -62)) + (le (minus (pc) (match_dup 0)) + (const_int 61))) + (const_int 2) + (if_then_else (and (ge (minus (pc) (match_dup 0)) + (const_int -2044)) + (le (minus (pc) (match_dup 0)) + (const_int 2043))) + (const_int 3) + (const_int 4))) + (eq_attr "type" "xcall") + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 1) + (const_int 2))] + (const_int 2))) + +;; Lengths of several insns are adjusted in avr.c:adjust_insn_length(). +;; Following insn attribute tells if and how the adjustment has to be +;; done: +;; no No adjustment needed; attribute "length" is fine. +;; Otherwise do special processing depending on the attribute. + +(define_attr "adjust_len" + "out_bitop, plus, addto_sp, + tsthi, tstpsi, tstsi, compare, compare64, call, + mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, + ufract, sfract, round, + xload, lpm, movmem, + ashlqi, ashrqi, lshrqi, + ashlhi, ashrhi, lshrhi, + ashlsi, ashrsi, lshrsi, + ashlpsi, ashrpsi, lshrpsi, + insert_bits, + no" + (const_string "no")) + +;; Flavours of instruction set architecture (ISA), used in enabled attribute + +;; mov : ISA has no MOVW movw : ISA has MOVW +;; rjmp : ISA has no CALL/JMP jmp : ISA has CALL/JMP +;; ijmp : ISA has no EICALL/EIJMP eijmp : ISA has EICALL/EIJMP +;; lpm : ISA has no LPMX lpmx : ISA has LPMX +;; elpm : ISA has ELPM but no ELPMX elpmx : ISA has ELPMX +;; no_xmega: non-XMEGA core xmega : XMEGA core + +(define_attr "isa" + "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega, + standard" + (const_string "standard")) + +(define_attr "enabled" "" + (cond [(eq_attr "isa" "standard") + (const_int 1) + + (and (eq_attr "isa" "mov") + (match_test "!AVR_HAVE_MOVW")) + (const_int 1) + + (and (eq_attr "isa" "movw") + (match_test "AVR_HAVE_MOVW")) + (const_int 1) + + (and (eq_attr "isa" "rjmp") + (match_test "!AVR_HAVE_JMP_CALL")) + (const_int 1) + + (and (eq_attr "isa" "jmp") + (match_test "AVR_HAVE_JMP_CALL")) + (const_int 1) + + (and (eq_attr "isa" "ijmp") + (match_test "!AVR_HAVE_EIJMP_EICALL")) + (const_int 1) + + (and (eq_attr "isa" "eijmp") + (match_test "AVR_HAVE_EIJMP_EICALL")) + (const_int 1) + + (and (eq_attr "isa" "lpm") + (match_test "!AVR_HAVE_LPMX")) + (const_int 1) + + (and (eq_attr "isa" "lpmx") + (match_test "AVR_HAVE_LPMX")) + (const_int 1) + + (and (eq_attr "isa" "elpm") + (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX")) + (const_int 1) + + (and (eq_attr "isa" "elpmx") + (match_test "AVR_HAVE_ELPMX")) + (const_int 1) + + (and (eq_attr "isa" "xmega") + (match_test "AVR_XMEGA")) + (const_int 1) + + (and (eq_attr "isa" "no_xmega") + (match_test "!AVR_XMEGA")) + (const_int 1) + ] (const_int 0))) + + +;; Define mode iterators +(define_mode_iterator QIHI [QI HI]) +(define_mode_iterator QIHI2 [QI HI]) +(define_mode_iterator QISI [QI HI PSI SI]) +(define_mode_iterator QIDI [QI HI PSI SI DI]) +(define_mode_iterator HISI [HI PSI SI]) + +(define_mode_iterator ALL1 [QI QQ UQQ]) +(define_mode_iterator ALL2 [HI HQ UHQ HA UHA]) +(define_mode_iterator ALL4 [SI SQ USQ SA USA]) + +;; All supported move-modes +(define_mode_iterator MOVMODE [QI QQ UQQ + HI HQ UHQ HA UHA + SI SQ USQ SA USA + SF PSI]) + +;; Supported ordered modes that are 2, 3, 4 bytes wide +(define_mode_iterator ORDERED234 [HI SI PSI + HQ UHQ HA UHA + SQ USQ SA USA]) + +;; Define code iterators +;; Define two incarnations so that we can build the cross product. +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_iterator any_extend2 [sign_extend zero_extend]) + +(define_code_iterator xior [xor ior]) +(define_code_iterator eqne [eq ne]) + +(define_code_iterator ss_addsub [ss_plus ss_minus]) +(define_code_iterator us_addsub [us_plus us_minus]) +(define_code_iterator ss_abs_neg [ss_abs ss_neg]) + +;; Define code attributes +(define_code_attr extend_su + [(sign_extend "s") + (zero_extend "u")]) + +(define_code_attr extend_u + [(sign_extend "") + (zero_extend "u")]) + +(define_code_attr extend_s + [(sign_extend "s") + (zero_extend "")]) + +;; Constrain input operand of widening multiply, i.e. MUL resp. MULS. +(define_code_attr mul_r_d + [(zero_extend "r") + (sign_extend "d")]) + +(define_code_attr abelian + [(ss_minus "") (us_minus "") + (ss_plus "%") (us_plus "%")]) + +;; Map RTX code to its standard insn name +(define_code_attr code_stdname + [(ashift "ashl") + (ashiftrt "ashr") + (lshiftrt "lshr") + (ior "ior") + (xor "xor") + (rotate "rotl") + (ss_plus "ssadd") (ss_minus "sssub") (ss_neg "ssneg") (ss_abs "ssabs") + (us_plus "usadd") (us_minus "ussub") (us_neg "usneg") + ]) + +;;======================================================================== +;; The following is used by nonlocal_goto and setjmp. +;; The receiver pattern will create no instructions since internally +;; virtual_stack_vars = hard_frame_pointer + 1 so the RTL become R28=R28 +;; This avoids creating add/sub offsets in frame_pointer save/resore. +;; The 'null' receiver also avoids problems with optimisation +;; not recognising incoming jmp and removing code that resets frame_pointer. +;; The code derived from builtins.c. + +(define_expand "nonlocal_goto_receiver" + [(set (reg:HI REG_Y) + (unspec_volatile:HI [(const_int 0)] UNSPECV_GOTO_RECEIVER))] + "" + { + emit_move_insn (virtual_stack_vars_rtx, + gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, + gen_int_mode (STARTING_FRAME_OFFSET, + Pmode))); + /* ; This might change the hard frame pointer in ways that aren't + ; apparent to early optimization passes, so force a clobber. */ + emit_clobber (hard_frame_pointer_rtx); + DONE; + }) + + +;; Defining nonlocal_goto_receiver means we must also define this. +;; even though its function is identical to that in builtins.c + +(define_expand "nonlocal_goto" + [(use (match_operand 0 "general_operand")) + (use (match_operand 1 "general_operand")) + (use (match_operand 2 "general_operand")) + (use (match_operand 3 "general_operand"))] + "" + { + rtx r_label = copy_to_reg (operands[1]); + rtx r_fp = operands[3]; + rtx r_sp = operands[2]; + + emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode))); + + emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx)); + + emit_move_insn (hard_frame_pointer_rtx, r_fp); + emit_stack_restore (SAVE_NONLOCAL, r_sp); + + emit_use (hard_frame_pointer_rtx); + emit_use (stack_pointer_rtx); + + emit_indirect_jump (r_label); + + DONE; + }) + +;; "pushqi1" +;; "pushqq1" "pushuqq1" +(define_insn "push1" + [(set (mem:ALL1 (post_dec:HI (reg:HI REG_SP))) + (match_operand:ALL1 0 "reg_or_0_operand" "r,Y00"))] + "" + "@ + push %0 + push __zero_reg__" + [(set_attr "length" "1,1")]) + +;; All modes for a multi-byte push. We must include complex modes here too, +;; lest emit_single_push_insn "helpfully" create the auto-inc itself. +(define_mode_iterator MPUSH + [CQI + HI CHI HA UHA HQ UHQ + SI CSI SA USA SQ USQ + DI CDI DA UDA DQ UDQ + TA UTA + SF SC + PSI]) + +(define_expand "push1" + [(match_operand:MPUSH 0 "" "")] + "" + { + int i; + for (i = GET_MODE_SIZE (mode) - 1; i >= 0; --i) + { + rtx part = simplify_gen_subreg (QImode, operands[0], mode, i); + if (part != const0_rtx) + part = force_reg (QImode, part); + emit_insn (gen_pushqi1 (part)); + } + DONE; + }) + +;; Notice a special-case when adding N to SP where N results in a +;; zero REG_ARGS_SIZE. This is equivalent to a move from FP. +(define_split + [(set (reg:HI REG_SP) + (match_operand:HI 0 "register_operand" ""))] + "reload_completed + && frame_pointer_needed + && !cfun->calls_alloca + && find_reg_note (insn, REG_ARGS_SIZE, const0_rtx)" + [(set (reg:HI REG_SP) + (reg:HI REG_Y))]) + +;;======================================================================== +;; Move stuff around + +;; Secondary input reload from non-generic 16-bit address spaces +(define_insn "reload_in" + [(set (match_operand:MOVMODE 0 "register_operand" "=r") + (match_operand:MOVMODE 1 "flash_operand" "m")) + (clobber (match_operand:QI 2 "d_register_operand" "=d"))] + ;; Fixme: The insn condition must not test the address space. + ;; Because the gen tools refuse to generate insns for address spaces + ;; and will generate insn-codes.h to look like: + ;; #define CODE_FOR_reload_inhi CODE_FOR_nothing + "reload_completed || reload_in_progress" + { + return avr_out_lpm (insn, operands, NULL); + } + [(set_attr "adjust_len" "lpm") + (set_attr "cc" "clobber")]) + + +;; "loadqi_libgcc" +;; "loadhi_libgcc" +;; "loadpsi_libgcc" +;; "loadsi_libgcc" +;; "loadsf_libgcc" +(define_expand "load_libgcc" + [(set (match_dup 3) + (match_dup 2)) + (set (reg:MOVMODE 22) + (match_operand:MOVMODE 1 "memory_operand" "")) + (set (match_operand:MOVMODE 0 "register_operand" "") + (reg:MOVMODE 22))] + "avr_load_libgcc_p (operands[1])" + { + operands[3] = gen_rtx_REG (HImode, REG_Z); + operands[2] = force_operand (XEXP (operands[1], 0), NULL_RTX); + operands[1] = replace_equiv_address (operands[1], operands[3]); + set_mem_addr_space (operands[1], ADDR_SPACE_FLASH); + }) + +;; "load_qi_libgcc" +;; "load_hi_libgcc" +;; "load_psi_libgcc" +;; "load_si_libgcc" +;; "load_sf_libgcc" +(define_insn "load__libgcc" + [(set (reg:MOVMODE 22) + (match_operand:MOVMODE 0 "memory_operand" "m,m"))] + "avr_load_libgcc_p (operands[0]) + && REG_P (XEXP (operands[0], 0)) + && REG_Z == REGNO (XEXP (operands[0], 0))" + { + operands[0] = GEN_INT (GET_MODE_SIZE (mode)); + return "%~call __load_%0"; + } + [(set_attr "length" "1,2") + (set_attr "isa" "rjmp,jmp") + (set_attr "cc" "clobber")]) + + +;; "xload8qi_A" +;; "xload8qq_A" "xload8uqq_A" +(define_insn_and_split "xload8_A" + [(set (match_operand:ALL1 0 "register_operand" "=r") + (match_operand:ALL1 1 "memory_operand" "m")) + (clobber (reg:HI REG_Z))] + "can_create_pseudo_p() + && !avr_xload_libgcc_p (mode) + && avr_mem_memx_p (operands[1]) + && REG_P (XEXP (operands[1], 0))" + { gcc_unreachable(); } + "&& 1" + [(clobber (const_int 0))] + { + /* ; Split away the high part of the address. GCC's register allocator + ; in not able to allocate segment registers and reload the resulting + ; expressions. Notice that no address register can hold a PSImode. */ + + rtx insn, addr = XEXP (operands[1], 0); + rtx hi8 = gen_reg_rtx (QImode); + rtx reg_z = gen_rtx_REG (HImode, REG_Z); + + emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0)); + emit_move_insn (hi8, simplify_gen_subreg (QImode, addr, PSImode, 2)); + + insn = emit_insn (gen_xload_8 (operands[0], hi8)); + set_mem_addr_space (SET_SRC (single_set (insn)), + MEM_ADDR_SPACE (operands[1])); + DONE; + }) + +;; "xloadqi_A" "xloadqq_A" "xloaduqq_A" +;; "xloadhi_A" "xloadhq_A" "xloaduhq_A" "xloadha_A" "xloaduha_A" +;; "xloadsi_A" "xloadsq_A" "xloadusq_A" "xloadsa_A" "xloadusa_A" +;; "xloadpsi_A" +;; "xloadsf_A" +(define_insn_and_split "xload_A" + [(set (match_operand:MOVMODE 0 "register_operand" "=r") + (match_operand:MOVMODE 1 "memory_operand" "m")) + (clobber (reg:MOVMODE 22)) + (clobber (reg:QI 21)) + (clobber (reg:HI REG_Z))] + "can_create_pseudo_p() + && avr_mem_memx_p (operands[1]) + && REG_P (XEXP (operands[1], 0))" + { gcc_unreachable(); } + "&& 1" + [(clobber (const_int 0))] + { + rtx addr = XEXP (operands[1], 0); + rtx reg_z = gen_rtx_REG (HImode, REG_Z); + rtx addr_hi8 = simplify_gen_subreg (QImode, addr, PSImode, 2); + addr_space_t as = MEM_ADDR_SPACE (operands[1]); + rtx insn; + + /* Split the address to R21:Z */ + emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0)); + emit_move_insn (gen_rtx_REG (QImode, 21), addr_hi8); + + /* Load with code from libgcc */ + insn = emit_insn (gen_xload__libgcc ()); + set_mem_addr_space (SET_SRC (single_set (insn)), as); + + /* Move to destination */ + emit_move_insn (operands[0], gen_rtx_REG (mode, 22)); + + DONE; + }) + +;; Move value from address space memx to a register +;; These insns must be prior to respective generic move insn. + +;; "xloadqi_8" +;; "xloadqq_8" "xloaduqq_8" +(define_insn "xload_8" + [(set (match_operand:ALL1 0 "register_operand" "=&r,r") + (mem:ALL1 (lo_sum:PSI (match_operand:QI 1 "register_operand" "r,r") + (reg:HI REG_Z))))] + "!avr_xload_libgcc_p (mode)" + { + return avr_out_xload (insn, operands, NULL); + } + [(set_attr "length" "4,4") + (set_attr "adjust_len" "*,xload") + (set_attr "isa" "lpmx,lpm") + (set_attr "cc" "none")]) + +;; R21:Z : 24-bit source address +;; R22 : 1-4 byte output + +;; "xload_qi_libgcc" "xload_qq_libgcc" "xload_uqq_libgcc" +;; "xload_hi_libgcc" "xload_hq_libgcc" "xload_uhq_libgcc" "xload_ha_libgcc" "xload_uha_libgcc" +;; "xload_si_libgcc" "xload_sq_libgcc" "xload_usq_libgcc" "xload_sa_libgcc" "xload_usa_libgcc" +;; "xload_sf_libgcc" +;; "xload_psi_libgcc" +(define_insn "xload__libgcc" + [(set (reg:MOVMODE 22) + (mem:MOVMODE (lo_sum:PSI (reg:QI 21) + (reg:HI REG_Z)))) + (clobber (reg:QI 21)) + (clobber (reg:HI REG_Z))] + "avr_xload_libgcc_p (mode)" + { + rtx x_bytes = GEN_INT (GET_MODE_SIZE (mode)); + + output_asm_insn ("%~call __xload_%0", &x_bytes); + return ""; + } + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; General move expanders + +;; "movqi" "movqq" "movuqq" +;; "movhi" "movhq" "movuhq" "movha" "movuha" +;; "movsi" "movsq" "movusq" "movsa" "movusa" +;; "movsf" +;; "movpsi" +(define_expand "mov" + [(set (match_operand:MOVMODE 0 "nonimmediate_operand" "") + (match_operand:MOVMODE 1 "general_operand" ""))] + "" + { + rtx dest = operands[0]; + rtx src = operands[1]; + + if (avr_mem_flash_p (dest)) + DONE; + + /* One of the operands has to be in a register. */ + if (!register_operand (dest, mode) + && !reg_or_0_operand (src, mode)) + { + operands[1] = src = copy_to_mode_reg (mode, src); + } + + if (avr_mem_memx_p (src)) + { + rtx addr = XEXP (src, 0); + + if (!REG_P (addr)) + src = replace_equiv_address (src, copy_to_mode_reg (PSImode, addr)); + + if (!avr_xload_libgcc_p (mode)) + /* ; No here because gen_xload8_A only iterates over ALL1. + ; insn-emit does not depend on the mode, it's all about operands. */ + emit_insn (gen_xload8qi_A (dest, src)); + else + emit_insn (gen_xload_A (dest, src)); + + DONE; + } + + if (avr_load_libgcc_p (src)) + { + /* For the small devices, do loads per libgcc call. */ + emit_insn (gen_load_libgcc (dest, src)); + DONE; + } + }) + +;;======================================================================== +;; move byte +;; The last alternative (any immediate constant to any register) is +;; very expensive. It should be optimized by peephole2 if a scratch +;; register is available, but then that register could just as well be +;; allocated for the variable we are loading. But, most of NO_LD_REGS +;; are call-saved registers, and most of LD_REGS are call-used registers, +;; so this may still be a win for registers live across function calls. + +;; "movqi_insn" +;; "movqq_insn" "movuqq_insn" +(define_insn "mov_insn" + [(set (match_operand:ALL1 0 "nonimmediate_operand" "=r ,d ,Qm ,r ,q,r,*r") + (match_operand:ALL1 1 "nox_general_operand" "r Y00,n Ynn,r Y00,Qm,r,q,i"))] + "register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode)" + { + return output_movqi (insn, operands, NULL); + } + [(set_attr "length" "1,1,5,5,1,1,4") + (set_attr "adjust_len" "mov8") + (set_attr "cc" "ldi,none,clobber,clobber,none,none,clobber")]) + +;; This is used in peephole2 to optimize loading immediate constants +;; if a scratch register from LD_REGS happens to be available. + +;; "*reload_inqi" +;; "*reload_inqq" "*reload_inuqq" +(define_insn "*reload_in" + [(set (match_operand:ALL1 0 "register_operand" "=l") + (match_operand:ALL1 1 "const_operand" "i")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + "ldi %2,lo8(%1) + mov %0,%2" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +(define_peephole2 + [(match_scratch:QI 2 "d") + (set (match_operand:ALL1 0 "l_register_operand" "") + (match_operand:ALL1 1 "const_operand" ""))] + ; No need for a clobber reg for 0x0, 0x01 or 0xff + "!satisfies_constraint_Y00 (operands[1]) + && !satisfies_constraint_Y01 (operands[1]) + && !satisfies_constraint_Ym1 (operands[1])" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (clobber (match_dup 2))])]) + +;;============================================================================ +;; move word (16 bit) + +;; Move register $1 to the Stack Pointer register SP. +;; This insn is emit during function prologue/epilogue generation. +;; $2 = 0: We know that IRQs are off +;; $2 = 1: We know that IRQs are on +;; $2 = 2: SP has 8 bits only, IRQ state does not matter +;; $2 = -1: We don't know anything about IRQ on/off +;; Always write SP via unspec, see PR50063 + +(define_insn "movhi_sp_r" + [(set (match_operand:HI 0 "stack_register_operand" "=q,q,q,q,q") + (unspec_volatile:HI [(match_operand:HI 1 "register_operand" "r,r,r,r,r") + (match_operand:HI 2 "const_int_operand" "L,P,N,K,LPN")] + UNSPECV_WRITE_SP))] + "" + "@ + out %B0,%B1\;out %A0,%A1 + cli\;out %B0,%B1\;sei\;out %A0,%A1 + in __tmp_reg__,__SREG__\;cli\;out %B0,%B1\;out __SREG__,__tmp_reg__\;out %A0,%A1 + out %A0,%A1 + out %A0,%A1\;out %B0,%B1" + [(set_attr "length" "2,4,5,1,2") + (set_attr "isa" "no_xmega,no_xmega,no_xmega,*,xmega") + (set_attr "cc" "none")]) + +(define_peephole2 + [(match_scratch:QI 2 "d") + (set (match_operand:ALL2 0 "l_register_operand" "") + (match_operand:ALL2 1 "const_or_immediate_operand" ""))] + "operands[1] != CONST0_RTX (mode)" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (clobber (match_dup 2))])]) + +;; '*' because it is not used in rtl generation, only in above peephole +;; "*reload_inhi" +;; "*reload_inhq" "*reload_inuhq" +;; "*reload_inha" "*reload_inuha" +(define_insn "*reload_in" + [(set (match_operand:ALL2 0 "l_register_operand" "=l") + (match_operand:ALL2 1 "immediate_operand" "i")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + { + return output_reload_inhi (operands, operands[2], NULL); + } + [(set_attr "length" "4") + (set_attr "adjust_len" "reload_in16") + (set_attr "cc" "clobber")]) + +;; "*movhi" +;; "*movhq" "*movuhq" +;; "*movha" "*movuha" +(define_insn "*mov" + [(set (match_operand:ALL2 0 "nonimmediate_operand" "=r,r ,r,m ,d,*r,q,r") + (match_operand:ALL2 1 "nox_general_operand" "r,Y00,m,r Y00,i,i ,r,q"))] + "register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode)" + { + return output_movhi (insn, operands, NULL); + } + [(set_attr "length" "2,2,6,7,2,6,5,2") + (set_attr "adjust_len" "mov16") + (set_attr "cc" "none,none,clobber,clobber,none,clobber,none,none")]) + +(define_peephole2 ; movw + [(set (match_operand:ALL1 0 "even_register_operand" "") + (match_operand:ALL1 1 "even_register_operand" "")) + (set (match_operand:ALL1 2 "odd_register_operand" "") + (match_operand:ALL1 3 "odd_register_operand" ""))] + "AVR_HAVE_MOVW + && REGNO (operands[0]) == REGNO (operands[2]) - 1 + && REGNO (operands[1]) == REGNO (operands[3]) - 1" + [(set (match_dup 4) + (match_dup 5))] + { + operands[4] = gen_rtx_REG (HImode, REGNO (operands[0])); + operands[5] = gen_rtx_REG (HImode, REGNO (operands[1])); + }) + +(define_peephole2 ; movw_r + [(set (match_operand:ALL1 0 "odd_register_operand" "") + (match_operand:ALL1 1 "odd_register_operand" "")) + (set (match_operand:ALL1 2 "even_register_operand" "") + (match_operand:ALL1 3 "even_register_operand" ""))] + "AVR_HAVE_MOVW + && REGNO (operands[2]) == REGNO (operands[0]) - 1 + && REGNO (operands[3]) == REGNO (operands[1]) - 1" + [(set (match_dup 4) + (match_dup 5))] + { + operands[4] = gen_rtx_REG (HImode, REGNO (operands[2])); + operands[5] = gen_rtx_REG (HImode, REGNO (operands[3])); + }) + +;; For LPM loads from AS1 we split +;; R = *Z +;; to +;; R = *Z++ +;; Z = Z - sizeof (R) +;; +;; so that the second instruction can be optimized out. + +(define_split ; "split-lpmx" + [(set (match_operand:HISI 0 "register_operand" "") + (match_operand:HISI 1 "memory_operand" ""))] + "reload_completed + && AVR_HAVE_LPMX" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 3) + (plus:HI (match_dup 3) + (match_dup 4)))] + { + rtx addr = XEXP (operands[1], 0); + + if (!avr_mem_flash_p (operands[1]) + || !REG_P (addr) + || reg_overlap_mentioned_p (addr, operands[0])) + { + FAIL; + } + + operands[2] = replace_equiv_address (operands[1], + gen_rtx_POST_INC (Pmode, addr)); + operands[3] = addr; + operands[4] = gen_int_mode (-GET_MODE_SIZE (mode), HImode); + }) + +;;========================================================================== +;; xpointer move (24 bit) + +(define_peephole2 ; *reload_inpsi + [(match_scratch:QI 2 "d") + (set (match_operand:PSI 0 "l_register_operand" "") + (match_operand:PSI 1 "immediate_operand" "")) + (match_dup 2)] + "operands[1] != const0_rtx + && operands[1] != constm1_rtx" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (clobber (match_dup 2))])]) + +;; '*' because it is not used in rtl generation. +(define_insn "*reload_inpsi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (match_operand:PSI 1 "immediate_operand" "i")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + { + return avr_out_reload_inpsi (operands, operands[2], NULL); + } + [(set_attr "length" "6") + (set_attr "adjust_len" "reload_in24") + (set_attr "cc" "clobber")]) + +(define_insn "*movpsi" + [(set (match_operand:PSI 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r") + (match_operand:PSI 1 "nox_general_operand" "r,L,Qm,rL,i ,i"))] + "register_operand (operands[0], PSImode) + || register_operand (operands[1], PSImode) + || const0_rtx == operands[1]" + { + return avr_out_movpsi (insn, operands, NULL); + } + [(set_attr "length" "3,3,8,9,4,10") + (set_attr "adjust_len" "mov24") + (set_attr "cc" "none,none,clobber,clobber,none,clobber")]) + +;;========================================================================== +;; move double word (32 bit) + +(define_peephole2 ; *reload_insi + [(match_scratch:QI 2 "d") + (set (match_operand:ALL4 0 "l_register_operand" "") + (match_operand:ALL4 1 "immediate_operand" "")) + (match_dup 2)] + "operands[1] != CONST0_RTX (mode)" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (clobber (match_dup 2))])]) + +;; '*' because it is not used in rtl generation. +;; "*reload_insi" +;; "*reload_insq" "*reload_inusq" +;; "*reload_insa" "*reload_inusa" +(define_insn "*reload_insi" + [(set (match_operand:ALL4 0 "register_operand" "=r") + (match_operand:ALL4 1 "immediate_operand" "n Ynn")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + { + return output_reload_insisf (operands, operands[2], NULL); + } + [(set_attr "length" "8") + (set_attr "adjust_len" "reload_in32") + (set_attr "cc" "clobber")]) + + +;; "*movsi" +;; "*movsq" "*movusq" +;; "*movsa" "*movusa" +(define_insn "*mov" + [(set (match_operand:ALL4 0 "nonimmediate_operand" "=r,r ,r ,Qm ,!d,r") + (match_operand:ALL4 1 "nox_general_operand" "r,Y00,Qm,r Y00,i ,i"))] + "register_operand (operands[0], mode) + || reg_or_0_operand (operands[1], mode)" + { + return output_movsisf (insn, operands, NULL); + } + [(set_attr "length" "4,4,8,9,4,10") + (set_attr "adjust_len" "mov32") + (set_attr "cc" "none,none,clobber,clobber,none,clobber")]) + +;; fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff +;; move floating point numbers (32 bit) + +(define_insn "*movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r") + (match_operand:SF 1 "nox_general_operand" "r,G,Qm,rG,F ,F"))] + "register_operand (operands[0], SFmode) + || reg_or_0_operand (operands[1], SFmode)" + { + return output_movsisf (insn, operands, NULL); + } + [(set_attr "length" "4,4,8,9,4,10") + (set_attr "adjust_len" "mov32") + (set_attr "cc" "none,none,clobber,clobber,none,clobber")]) + +(define_peephole2 ; *reload_insf + [(match_scratch:QI 2 "d") + (set (match_operand:SF 0 "l_register_operand" "") + (match_operand:SF 1 "const_double_operand" "")) + (match_dup 2)] + "operands[1] != CONST0_RTX (SFmode)" + [(parallel [(set (match_dup 0) + (match_dup 1)) + (clobber (match_dup 2))])]) + +;; '*' because it is not used in rtl generation. +(define_insn "*reload_insf" + [(set (match_operand:SF 0 "register_operand" "=r") + (match_operand:SF 1 "const_double_operand" "F")) + (clobber (match_operand:QI 2 "register_operand" "=&d"))] + "reload_completed" + { + return output_reload_insisf (operands, operands[2], NULL); + } + [(set_attr "length" "8") + (set_attr "adjust_len" "reload_in32") + (set_attr "cc" "clobber")]) + +;;========================================================================= +;; move string (like memcpy) + +(define_expand "movmemhi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:HI 2 "const_int_operand" "")) + (use (match_operand:HI 3 "const_int_operand" ""))])] + "" + { + if (avr_emit_movmemhi (operands)) + DONE; + + FAIL; + }) + +(define_mode_attr MOVMEM_r_d [(QI "r") + (HI "wd")]) + +;; $0 : Address Space +;; $1, $2 : Loop register +;; R30 : source address +;; R26 : destination address + +;; "movmem_qi" +;; "movmem_hi" +(define_insn "movmem_" + [(set (mem:BLK (reg:HI REG_X)) + (mem:BLK (reg:HI REG_Z))) + (unspec [(match_operand:QI 0 "const_int_operand" "n")] + UNSPEC_MOVMEM) + (use (match_operand:QIHI 1 "register_operand" "")) + (clobber (reg:HI REG_X)) + (clobber (reg:HI REG_Z)) + (clobber (reg:QI LPM_REGNO)) + (clobber (match_operand:QIHI 2 "register_operand" "=1"))] + "" + { + return avr_out_movmem (insn, operands, NULL); + } + [(set_attr "adjust_len" "movmem") + (set_attr "cc" "clobber")]) + + +;; $0 : Address Space +;; $1 : RAMPZ RAM address +;; R24 : #bytes and loop register +;; R23:Z : 24-bit source address +;; R26 : 16-bit destination address + +;; "movmemx_qi" +;; "movmemx_hi" +(define_insn "movmemx_" + [(set (mem:BLK (reg:HI REG_X)) + (mem:BLK (lo_sum:PSI (reg:QI 23) + (reg:HI REG_Z)))) + (unspec [(match_operand:QI 0 "const_int_operand" "n")] + UNSPEC_MOVMEM) + (use (reg:QIHI 24)) + (clobber (reg:HI REG_X)) + (clobber (reg:HI REG_Z)) + (clobber (reg:QI LPM_REGNO)) + (clobber (reg:HI 24)) + (clobber (reg:QI 23)) + (clobber (mem:QI (match_operand:QI 1 "io_address_operand" "n")))] + "" + "%~call __movmemx_" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 =%2 +;; memset (%0, %2, %1) + +(define_expand "setmemhi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand 2 "const_int_operand" "")) + (use (match_operand:HI 1 "const_int_operand" "")) + (use (match_operand:HI 3 "const_int_operand" "")) + (clobber (match_scratch:HI 4 "")) + (clobber (match_dup 5))])] + "" + { + rtx addr0; + enum machine_mode mode; + + /* If value to set is not zero, use the library routine. */ + if (operands[2] != const0_rtx) + FAIL; + + if (!CONST_INT_P (operands[1])) + FAIL; + + mode = u8_operand (operands[1], VOIDmode) ? QImode : HImode; + operands[5] = gen_rtx_SCRATCH (mode); + operands[1] = copy_to_mode_reg (mode, + gen_int_mode (INTVAL (operands[1]), mode)); + addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + operands[0] = gen_rtx_MEM (BLKmode, addr0); + }) + + +(define_insn "*clrmemqi" + [(set (mem:BLK (match_operand:HI 0 "register_operand" "e")) + (const_int 0)) + (use (match_operand:QI 1 "register_operand" "r")) + (use (match_operand:QI 2 "const_int_operand" "n")) + (clobber (match_scratch:HI 3 "=0")) + (clobber (match_scratch:QI 4 "=&1"))] + "" + "0:\;st %a0+,__zero_reg__\;dec %1\;brne 0b" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + + +(define_insn "*clrmemhi" + [(set (mem:BLK (match_operand:HI 0 "register_operand" "e,e")) + (const_int 0)) + (use (match_operand:HI 1 "register_operand" "!w,d")) + (use (match_operand:HI 2 "const_int_operand" "n,n")) + (clobber (match_scratch:HI 3 "=0,0")) + (clobber (match_scratch:HI 4 "=&1,&1"))] + "" + "@ + 0:\;st %a0+,__zero_reg__\;sbiw %A1,1\;brne 0b + 0:\;st %a0+,__zero_reg__\;subi %A1,1\;sbci %B1,0\;brne 0b" + [(set_attr "length" "3,4") + (set_attr "cc" "clobber,clobber")]) + +(define_expand "strlenhi" + [(set (match_dup 4) + (unspec:HI [(match_operand:BLK 1 "memory_operand" "") + (match_operand:QI 2 "const_int_operand" "") + (match_operand:HI 3 "immediate_operand" "")] + UNSPEC_STRLEN)) + (set (match_dup 4) + (plus:HI (match_dup 4) + (const_int -1))) + (parallel [(set (match_operand:HI 0 "register_operand" "") + (minus:HI (match_dup 4) + (match_dup 5))) + (clobber (scratch:QI))])] + "" + { + rtx addr; + if (operands[2] != const0_rtx) + FAIL; + addr = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + operands[1] = gen_rtx_MEM (BLKmode, addr); + operands[5] = addr; + operands[4] = gen_reg_rtx (HImode); + }) + +(define_insn "*strlenhi" + [(set (match_operand:HI 0 "register_operand" "=e") + (unspec:HI [(mem:BLK (match_operand:HI 1 "register_operand" "0")) + (const_int 0) + (match_operand:HI 2 "immediate_operand" "i")] + UNSPEC_STRLEN))] + "" + "0:\;ld __tmp_reg__,%a0+\;tst __tmp_reg__\;brne 0b" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +; add bytes + +;; "addqi3" +;; "addqq3" "adduqq3" +(define_insn "add3" + [(set (match_operand:ALL1 0 "register_operand" "=r,d ,r ,r ,r ,r") + (plus:ALL1 (match_operand:ALL1 1 "register_operand" "%0,0 ,0 ,0 ,0 ,0") + (match_operand:ALL1 2 "nonmemory_operand" "r,n Ynn,Y01,Ym1,Y02,Ym2")))] + "" + "@ + add %0,%2 + subi %0,lo8(-(%2)) + inc %0 + dec %0 + inc %0\;inc %0 + dec %0\;dec %0" + [(set_attr "length" "1,1,1,1,2,2") + (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")]) + +;; "addhi3" +;; "addhq3" "adduhq3" +;; "addha3" "adduha3" +(define_expand "add3" + [(set (match_operand:ALL2 0 "register_operand" "") + (plus:ALL2 (match_operand:ALL2 1 "register_operand" "") + (match_operand:ALL2 2 "nonmemory_or_const_operand" "")))] + "" + { + if (CONST_INT_P (operands[2])) + { + operands[2] = gen_int_mode (INTVAL (operands[2]), HImode); + + if (can_create_pseudo_p() + && !stack_register_operand (operands[0], HImode) + && !stack_register_operand (operands[1], HImode) + && !d_register_operand (operands[0], HImode) + && !d_register_operand (operands[1], HImode)) + { + emit_insn (gen_addhi3_clobber (operands[0], operands[1], operands[2])); + DONE; + } + } + + if (CONST_FIXED_P (operands[2])) + { + emit_insn (gen_add3_clobber (operands[0], operands[1], operands[2])); + DONE; + } + }) + + +(define_insn "*addhi3_zero_extend" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "register_operand" "0")))] + "" + "add %A0,%1\;adc %B0,__zero_reg__" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "*addhi3_zero_extend1" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] + "" + "add %A0,%2\;adc %B0,__zero_reg__" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "*addhi3.sign_extend1" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "register_operand" "0")))] + "" + { + return reg_overlap_mentioned_p (operands[0], operands[1]) + ? "mov __tmp_reg__,%1\;add %A0,%1\;adc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;dec %B0" + : "add %A0,%1\;adc %B0,__zero_reg__\;sbrc %1,7\;dec %B0"; + } + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "*addhi3_sp" + [(set (match_operand:HI 1 "stack_register_operand" "=q") + (plus:HI (match_operand:HI 2 "stack_register_operand" "q") + (match_operand:HI 0 "avr_sp_immediate_operand" "Csp")))] + "" + { + return avr_out_addto_sp (operands, NULL); + } + [(set_attr "length" "6") + (set_attr "adjust_len" "addto_sp")]) + +;; "*addhi3" +;; "*addhq3" "*adduhq3" +;; "*addha3" "*adduha3" +(define_insn "*add3" + [(set (match_operand:ALL2 0 "register_operand" "=??r,d,!w ,d") + (plus:ALL2 (match_operand:ALL2 1 "register_operand" "%0,0,0 ,0") + (match_operand:ALL2 2 "nonmemory_or_const_operand" "r,s,IJ YIJ,n Ynn")))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "length" "2") + (set_attr "adjust_len" "plus") + (set_attr "cc" "plus")]) + +;; Adding a constant to NO_LD_REGS might have lead to a reload of +;; that constant to LD_REGS. We don't add a scratch to *addhi3 +;; itself because that insn is special to reload. + +(define_peephole2 ; addhi3_clobber + [(set (match_operand:ALL2 0 "d_register_operand" "") + (match_operand:ALL2 1 "const_operand" "")) + (set (match_operand:ALL2 2 "l_register_operand" "") + (plus:ALL2 (match_dup 2) + (match_dup 0)))] + "peep2_reg_dead_p (2, operands[0])" + [(parallel [(set (match_dup 2) + (plus:ALL2 (match_dup 2) + (match_dup 1))) + (clobber (match_dup 3))])] + { + operands[3] = simplify_gen_subreg (QImode, operands[0], mode, 0); + }) + +;; Same, but with reload to NO_LD_REGS +;; Combine *reload_inhi with *addhi3 + +(define_peephole2 ; addhi3_clobber + [(parallel [(set (match_operand:ALL2 0 "l_register_operand" "") + (match_operand:ALL2 1 "const_operand" "")) + (clobber (match_operand:QI 2 "d_register_operand" ""))]) + (set (match_operand:ALL2 3 "l_register_operand" "") + (plus:ALL2 (match_dup 3) + (match_dup 0)))] + "peep2_reg_dead_p (2, operands[0])" + [(parallel [(set (match_dup 3) + (plus:ALL2 (match_dup 3) + (match_dup 1))) + (clobber (match_dup 2))])]) + +;; "addhi3_clobber" +;; "addhq3_clobber" "adduhq3_clobber" +;; "addha3_clobber" "adduha3_clobber" +(define_insn "add3_clobber" + [(set (match_operand:ALL2 0 "register_operand" "=!w ,d ,r") + (plus:ALL2 (match_operand:ALL2 1 "register_operand" "%0 ,0 ,0") + (match_operand:ALL2 2 "const_operand" "IJ YIJ,n Ynn,n Ynn"))) + (clobber (match_scratch:QI 3 "=X ,X ,&d"))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "length" "4") + (set_attr "adjust_len" "plus") + (set_attr "cc" "plus")]) + + +;; "addsi3" +;; "addsq3" "addusq3" +;; "addsa3" "addusa3" +(define_insn "add3" + [(set (match_operand:ALL4 0 "register_operand" "=??r,d ,r") + (plus:ALL4 (match_operand:ALL4 1 "register_operand" "%0,0 ,0") + (match_operand:ALL4 2 "nonmemory_operand" "r,i ,n Ynn"))) + (clobber (match_scratch:QI 3 "=X,X ,&d"))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "length" "4") + (set_attr "adjust_len" "plus") + (set_attr "cc" "plus")]) + +(define_insn "*addpsi3_zero_extend.qi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (plus:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r")) + (match_operand:PSI 2 "register_operand" "0")))] + "" + "add %A0,%A1\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "set_n")]) + +(define_insn "*addpsi3_zero_extend.hi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (plus:PSI (zero_extend:PSI (match_operand:HI 1 "register_operand" "r")) + (match_operand:PSI 2 "register_operand" "0")))] + "" + "add %A0,%A1\;adc %B0,%B1\;adc %C0,__zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "set_n")]) + +(define_insn "*addpsi3_sign_extend.hi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (plus:PSI (sign_extend:PSI (match_operand:HI 1 "register_operand" "r")) + (match_operand:PSI 2 "register_operand" "0")))] + "" + "add %A0,%1\;adc %B0,%B1\;adc %C0,__zero_reg__\;sbrc %B1,7\;dec %C0" + [(set_attr "length" "5") + (set_attr "cc" "set_n")]) + +(define_insn "*addsi3_zero_extend" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:QI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "0")))] + "" + "add %A0,%1\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +(define_insn "*addsi3_zero_extend.hi" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "0")))] + "" + "add %A0,%1\;adc %B0,%B1\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +(define_insn "addpsi3" + [(set (match_operand:PSI 0 "register_operand" "=??r,d ,d,r") + (plus:PSI (match_operand:PSI 1 "register_operand" "%0,0 ,0,0") + (match_operand:PSI 2 "nonmemory_operand" "r,s ,n,n"))) + (clobber (match_scratch:QI 3 "=X,X ,X,&d"))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "length" "3") + (set_attr "adjust_len" "plus") + (set_attr "cc" "plus")]) + +(define_insn "subpsi3" + [(set (match_operand:PSI 0 "register_operand" "=r") + (minus:PSI (match_operand:PSI 1 "register_operand" "0") + (match_operand:PSI 2 "register_operand" "r")))] + "" + "sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2" + [(set_attr "length" "3") + (set_attr "cc" "set_czn")]) + +(define_insn "*subpsi3_zero_extend.qi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (minus:PSI (match_operand:SI 1 "register_operand" "0") + (zero_extend:PSI (match_operand:QI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "set_czn")]) + +(define_insn "*subpsi3_zero_extend.hi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (minus:PSI (match_operand:PSI 1 "register_operand" "0") + (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,%B2\;sbc %C0,__zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "set_czn")]) + +(define_insn "*subpsi3_sign_extend.hi" + [(set (match_operand:PSI 0 "register_operand" "=r") + (minus:PSI (match_operand:PSI 1 "register_operand" "0") + (sign_extend:PSI (match_operand:HI 2 "register_operand" "r"))))] + "" + "sub %A0,%A2\;sbc %B0,%B2\;sbc %C0,__zero_reg__\;sbrc %B2,7\;inc %C0" + [(set_attr "length" "5") + (set_attr "cc" "set_czn")]) + +;----------------------------------------------------------------------------- +; sub bytes + +;; "subqi3" +;; "subqq3" "subuqq3" +(define_insn "sub3" + [(set (match_operand:ALL1 0 "register_operand" "=??r,d ,r ,r ,r ,r") + (minus:ALL1 (match_operand:ALL1 1 "register_operand" "0,0 ,0 ,0 ,0 ,0") + (match_operand:ALL1 2 "nonmemory_or_const_operand" "r,n Ynn,Y01,Ym1,Y02,Ym2")))] + "" + "@ + sub %0,%2 + subi %0,lo8(%2) + dec %0 + inc %0 + dec %0\;dec %0 + inc %0\;inc %0" + [(set_attr "length" "1,1,1,1,2,2") + (set_attr "cc" "set_czn,set_czn,set_zn,set_zn,set_zn,set_zn")]) + +;; "subhi3" +;; "subhq3" "subuhq3" +;; "subha3" "subuha3" +(define_insn "sub3" + [(set (match_operand:ALL2 0 "register_operand" "=??r,d ,*r") + (minus:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,0") + (match_operand:ALL2 2 "nonmemory_or_const_operand" "r,i Ynn,Ynn"))) + (clobber (match_scratch:QI 3 "=X,X ,&d"))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "adjust_len" "plus") + (set_attr "cc" "plus")]) + +(define_insn "*subhi3_zero_extend1" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,__zero_reg__" + [(set_attr "length" "2") + (set_attr "cc" "set_czn")]) + +(define_insn "*subhi3.sign_extend2" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 1 "register_operand" "0") + (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))] + "" + { + return reg_overlap_mentioned_p (operands[0], operands[2]) + ? "mov __tmp_reg__,%2\;sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;inc %B0" + : "sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc %2,7\;inc %B0"; + } + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +;; "subsi3" +;; "subsq3" "subusq3" +;; "subsa3" "subusa3" +(define_insn "sub3" + [(set (match_operand:ALL4 0 "register_operand" "=??r,d ,r") + (minus:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,0") + (match_operand:ALL4 2 "nonmemory_or_const_operand" "r,n Ynn,Ynn"))) + (clobber (match_scratch:QI 3 "=X,X ,&d"))] + "" + { + return avr_out_plus (insn, operands); + } + [(set_attr "adjust_len" "plus") + (set_attr "cc" "plus")]) + +(define_insn "*subsi3_zero_extend" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (zero_extend:SI (match_operand:QI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_czn")]) + +(define_insn "*subsi3_zero_extend.hi" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,%B2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_czn")]) + +;****************************************************************************** +; mul + +(define_expand "mulqi3" + [(set (match_operand:QI 0 "register_operand" "") + (mult:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "register_operand" "")))] + "" + { + if (!AVR_HAVE_MUL) + { + emit_insn (gen_mulqi3_call (operands[0], operands[1], operands[2])); + DONE; + } + }) + +(define_insn "*mulqi3_enh" + [(set (match_operand:QI 0 "register_operand" "=r") + (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%2 + mov %0,r0 + clr r1" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_expand "mulqi3_call" + [(set (reg:QI 24) (match_operand:QI 1 "register_operand" "")) + (set (reg:QI 22) (match_operand:QI 2 "register_operand" "")) + (parallel [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22))]) + (set (match_operand:QI 0 "register_operand" "") (reg:QI 24))]) + +(define_insn "*mulqi3_call" + [(set (reg:QI 24) (mult:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22))] + "!AVR_HAVE_MUL" + "%~call __mulqi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "umulqi3_highpart" +;; "smulqi3_highpart" +(define_insn "mulqi3_highpart" + [(set (match_operand:QI 0 "register_operand" "=r") + (truncate:QI + (lshiftrt:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (any_extend:HI (match_operand:QI 2 "register_operand" ""))) + (const_int 8))))] + "AVR_HAVE_MUL" + "mul %1,%2 + mov %0,r1 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + + +;; Used when expanding div or mod inline for some special values +(define_insn "*subqi3.ashiftrt7" + [(set (match_operand:QI 0 "register_operand" "=r") + (minus:QI (match_operand:QI 1 "register_operand" "0") + (ashiftrt:QI (match_operand:QI 2 "register_operand" "r") + (const_int 7))))] + "" + "sbrc %2,7\;inc %0" + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + +(define_insn "*addqi3.lt0" + [(set (match_operand:QI 0 "register_operand" "=r") + (plus:QI (lt:QI (match_operand:QI 1 "register_operand" "r") + (const_int 0)) + (match_operand:QI 2 "register_operand" "0")))] + "" + "sbrc %1,7\;inc %0" + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + +(define_insn "*addhi3.lt0" + [(set (match_operand:HI 0 "register_operand" "=w,r") + (plus:HI (lt:HI (match_operand:QI 1 "register_operand" "r,r") + (const_int 0)) + (match_operand:HI 2 "register_operand" "0,0"))) + (clobber (match_scratch:QI 3 "=X,&1"))] + "" + "@ + sbrc %1,7\;adiw %0,1 + lsl %1\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__" + [(set_attr "length" "2,3") + (set_attr "cc" "clobber")]) + +(define_insn "*addpsi3.lt0" + [(set (match_operand:PSI 0 "register_operand" "=r") + (plus:PSI (lshiftrt:PSI (match_operand:PSI 1 "register_operand" "r") + (const_int 23)) + (match_operand:PSI 2 "register_operand" "0")))] + "" + "mov __tmp_reg__,%C1\;lsl __tmp_reg__ + adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "*addsi3.lt0" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (const_int 31)) + (match_operand:SI 2 "register_operand" "0")))] + "" + "mov __tmp_reg__,%D1\;lsl __tmp_reg__ + adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__" + [(set_attr "length" "6") + (set_attr "cc" "clobber")]) + +(define_insn "*umulqihi3.call" + [(set (reg:HI 24) + (mult:HI (zero_extend:HI (reg:QI 22)) + (zero_extend:HI (reg:QI 24)))) + (clobber (reg:QI 21)) + (clobber (reg:HI 22))] + "!AVR_HAVE_MUL" + "%~call __umulqihi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "umulqihi3" +;; "mulqihi3" +(define_insn "mulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (any_extend:HI (match_operand:QI 2 "register_operand" ""))))] + "AVR_HAVE_MUL" + "mul %1,%2 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "usmulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %2,%1 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;; Above insn is not canonicalized by insn combine, so here is a version with +;; operands swapped. + +(define_insn "*sumulqihi3" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (zero_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %1,%2 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +;; One-extend operand 1 + +(define_insn "*osmulqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "a")))) + (sign_extend:HI (match_operand:QI 2 "register_operand" "a"))))] + "AVR_HAVE_MUL" + "mulsu %2,%1 + movw %0,r0 + sub %B0,%2 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "*oumulqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "r")))) + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %2,%1 + movw %0,r0 + sub %B0,%2 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +;****************************************************************************** +; multiply-add/sub QI: $0 = $3 +/- $1*$2 +;****************************************************************************** + +(define_insn "*maddqi4" + [(set (match_operand:QI 0 "register_operand" "=r") + (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (match_operand:QI 3 "register_operand" "0")))] + + "AVR_HAVE_MUL" + "mul %1,%2 + add %A0,r0 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "*msubqi4" + [(set (match_operand:QI 0 "register_operand" "=r") + (minus:QI (match_operand:QI 3 "register_operand" "0") + (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %1,%2 + sub %A0,r0 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*maddqi4.const" + [(set (match_operand:QI 0 "register_operand" "=r") + (plus:QI (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand" "n")) + (match_operand:QI 3 "register_operand" "0"))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *maddqi4 + (set (match_dup 0) + (plus:QI (mult:QI (match_dup 1) + (match_dup 4)) + (match_dup 3)))]) + +(define_insn_and_split "*msubqi4.const" + [(set (match_operand:QI 0 "register_operand" "=r") + (minus:QI (match_operand:QI 3 "register_operand" "0") + (mult:QI (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "const_int_operand" "n")))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *msubqi4 + (set (match_dup 0) + (minus:QI (match_dup 3) + (mult:QI (match_dup 1) + (match_dup 4))))]) + + +;****************************************************************************** +; multiply-add/sub HI: $0 = $3 +/- $1*$2 with 8-bit values $1, $2 +;****************************************************************************** + +;; We don't use standard insns/expanders as they lead to cumbersome code for, +;; e.g, +;; +;; int foo (unsigned char z) +;; { +;; extern int aInt[]; +;; return aInt[3*z+2]; +;; } +;; +;; because the constant +4 then is added explicitely instead of consuming it +;; with the aInt symbol. Therefore, we rely on insn combine which takes costs +;; into account more accurately and doesn't do burte-force multiply-add/sub. +;; The implementational effort is the same so we are fine with that approach. + + +;; "*maddqihi4" +;; "*umaddqihi4" +(define_insn "*maddqihi4" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (any_extend:HI (match_operand:QI 2 "register_operand" ""))) + (match_operand:HI 3 "register_operand" "0")))] + + "AVR_HAVE_MUL" + "mul %1,%2 + add %A0,r0 + adc %B0,r1 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +;; "*msubqihi4" +;; "*umsubqihi4" +(define_insn "*msubqihi4" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (any_extend:HI (match_operand:QI 2 "register_operand" "")))))] + "AVR_HAVE_MUL" + "mul %1,%2 + sub %A0,r0 + sbc %B0,r1 + clr __zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +;; "*usmaddqihi4" +;; "*sumaddqihi4" +(define_insn "*msubqihi4" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "a")) + (any_extend2:HI (match_operand:QI 2 "register_operand" "a"))) + (match_operand:HI 3 "register_operand" "0")))] + "AVR_HAVE_MUL + && reload_completed + && != " + { + output_asm_insn ( == SIGN_EXTEND + ? "mulsu %1,%2" : "mulsu %2,%1", operands); + + return "add %A0,r0\;adc %B0,r1\;clr __zero_reg__"; + } + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +;; "*usmsubqihi4" +;; "*sumsubqihi4" +(define_insn "*msubqihi4" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "a")) + (any_extend2:HI (match_operand:QI 2 "register_operand" "a")))))] + "AVR_HAVE_MUL + && reload_completed + && != " + { + output_asm_insn ( == SIGN_EXTEND + ? "mulsu %1,%2" : "mulsu %2,%1", operands); + + return "sub %A0,r0\;sbc %B0,r1\;clr __zero_reg__"; + } + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +;; Handle small constants + +;; Special case of a += 2*b as frequently seen with accesses to int arrays. +;; This is shorter, faster than MUL and has lower register pressure. + +(define_insn_and_split "*umaddqihi4.2" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (const_int 2)) + (match_operand:HI 2 "register_operand" "r")))] + "!reload_completed + && !reg_overlap_mentioned_p (operands[0], operands[1])" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 0) + (match_dup 2)) + ; *addhi3_zero_extend + (set (match_dup 0) + (plus:HI (zero_extend:HI (match_dup 1)) + (match_dup 0))) + ; *addhi3_zero_extend + (set (match_dup 0) + (plus:HI (zero_extend:HI (match_dup 1)) + (match_dup 0)))]) + +;; "umaddqihi4.uconst" +;; "maddqihi4.sconst" +(define_insn_and_split "*maddqihi4.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (match_operand:HI 2 "8_operand" "n")) + (match_operand:HI 3 "register_operand" "0"))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *umaddqihi4 resp. *maddqihi4 + (set (match_dup 0) + (plus:HI (mult:HI (any_extend:HI (match_dup 1)) + (any_extend:HI (match_dup 4))) + (match_dup 3)))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +;; "*umsubqihi4.uconst" +;; "*msubqihi4.sconst" +(define_insn_and_split "*msubqihi4.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (match_operand:HI 2 "8_operand" "n")))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *umsubqihi4 resp. *msubqihi4 + (set (match_dup 0) + (minus:HI (match_dup 3) + (mult:HI (any_extend:HI (match_dup 1)) + (any_extend:HI (match_dup 4)))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +;; Same as the insn above, but combiner tries versions canonicalized to ASHIFT +;; for MULT with power of 2 and skips trying MULT insn above. + +(define_insn_and_split "*umsubqihi4.uconst.ashift" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "const_2_to_7_operand" "n")))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *umsubqihi4 + (set (match_dup 0) + (minus:HI (match_dup 3) + (mult:HI (zero_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 4)))))] + { + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode); + }) + +;; Same as the insn above, but combiner tries versions canonicalized to ASHIFT +;; for MULT with power of 2 and skips trying MULT insn above. We omit 128 +;; because this would require an extra pattern for just one value. + +(define_insn_and_split "*msubqihi4.sconst.ashift" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (match_operand:HI 2 "const_1_to_6_operand" "M")))) + (clobber (match_scratch:QI 4 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *smsubqihi4 + (set (match_dup 0) + (minus:HI (match_dup 3) + (mult:HI (sign_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 4)))))] + { + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode); + }) + +;; For signed/unsigned combinations that require narrow constraint "a" +;; just provide a pattern if signed/unsigned combination is actually needed. + +(define_insn_and_split "*sumaddqihi4.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (plus:HI (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "u8_operand" "M")) + (match_operand:HI 3 "register_operand" "0"))) + (clobber (match_scratch:QI 4 "=&a"))] + "AVR_HAVE_MUL + && !s8_operand (operands[2], VOIDmode)" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *sumaddqihi4 + (set (match_dup 0) + (plus:HI (mult:HI (sign_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 4))) + (match_dup 3)))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +(define_insn_and_split "*sumsubqihi4.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 3 "register_operand" "0") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "u8_operand" "M")))) + (clobber (match_scratch:QI 4 "=&a"))] + "AVR_HAVE_MUL + && !s8_operand (operands[2], VOIDmode)" + "#" + "&& reload_completed" + [(set (match_dup 4) + (match_dup 2)) + ; *sumsubqihi4 + (set (match_dup 0) + (minus:HI (match_dup 3) + (mult:HI (sign_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 4)))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +;****************************************************************************** +; mul HI: $1 = sign/zero-extend, $2 = small constant +;****************************************************************************** + +;; "*muluqihi3.uconst" +;; "*mulsqihi3.sconst" +(define_insn_and_split "*mulqihi3.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (any_extend:HI (match_operand:QI 1 "register_operand" "")) + (match_operand:HI 2 "8_operand" "n"))) + (clobber (match_scratch:QI 3 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 3) + (match_dup 2)) + ; umulqihi3 resp. mulqihi3 + (set (match_dup 0) + (mult:HI (any_extend:HI (match_dup 1)) + (any_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +(define_insn_and_split "*muluqihi3.sconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "s8_operand" "n"))) + (clobber (match_scratch:QI 3 "=&a"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +(define_insn_and_split "*mulsqihi3.uconst" + [(set (match_operand:HI 0 "register_operand" "=r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "u8_operand" "M"))) + (clobber (match_scratch:QI 3 "=&a"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 3) + (match_dup 2)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 3)) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +(define_insn_and_split "*mulsqihi3.oconst" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "o8_operand" "n"))) + (clobber (match_scratch:QI 3 "=&a"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 3) + (match_dup 2)) + ; *osmulqihi3 + (set (match_dup 0) + (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 3)))) + (sign_extend:HI (match_dup 1))))] + { + operands[2] = gen_int_mode (INTVAL (operands[2]), QImode); + }) + +;; The EXTEND of $1 only appears in combine, we don't see it in expand so that +;; expand decides to use ASHIFT instead of MUL because ASHIFT costs are cheaper +;; at that time. Fix that. + +(define_insn "*ashiftqihi2.signx.1" + [(set (match_operand:HI 0 "register_operand" "=r,*r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0,r")) + (const_int 1)))] + "" + "@ + lsl %A0\;sbc %B0,%B0 + mov %A0,%1\;lsl %A0\;sbc %B0,%B0" + [(set_attr "length" "2,3") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*ashifthi3.signx.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "d")) + (match_operand:HI 2 "const_2_to_6_operand" "I"))) + (clobber (match_scratch:QI 3 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 3) + (match_dup 2)) + ; mulqihi3 + (set (match_dup 0) + (mult:HI (sign_extend:HI (match_dup 1)) + (sign_extend:HI (match_dup 3))))] + { + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + }) + +(define_insn_and_split "*ashifthi3.signx.const7" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (const_int 7))) + (clobber (match_scratch:QI 2 "=&a"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 2) + (match_dup 3)) + ; usmulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 2)) + (sign_extend:HI (match_dup 1))))] + { + operands[3] = gen_int_mode (1 << 7, QImode); + }) + +(define_insn_and_split "*ashifthi3.zerox.const" + [(set (match_operand:HI 0 "register_operand" "=r") + (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "const_2_to_7_operand" "I"))) + (clobber (match_scratch:QI 3 "=&d"))] + "AVR_HAVE_MUL" + "#" + "&& reload_completed" + [(set (match_dup 3) + (match_dup 2)) + ; umulqihi3 + (set (match_dup 0) + (mult:HI (zero_extend:HI (match_dup 1)) + (zero_extend:HI (match_dup 3))))] + { + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode); + }) + +;****************************************************************************** +; mul HI: $1 = sign-/zero-/one-extend, $2 = reg +;****************************************************************************** + +(define_insn "mulsqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "a")) + (match_operand:HI 2 "register_operand" "a")))] + "AVR_HAVE_MUL" + "mulsu %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + clr __zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "muluqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + clr __zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +;; one-extend operand 1 + +(define_insn "muloqihi3" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (not:HI (zero_extend:HI (not:QI (match_operand:QI 1 "register_operand" "r")))) + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %0,r0 + mul %1,%B2 + add %B0,r0 + sub %B0,%A2 + clr __zero_reg__" + [(set_attr "length" "6") + (set_attr "cc" "clobber")]) + +;****************************************************************************** + +(define_expand "mulhi3" + [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "register_or_s9_operand" "")))] + "" + { + if (!AVR_HAVE_MUL) + { + if (!register_operand (operands[2], HImode)) + operands[2] = force_reg (HImode, operands[2]); + + emit_insn (gen_mulhi3_call (operands[0], operands[1], operands[2])); + DONE; + } + + /* For small constants we can do better by extending them on the fly. + The constant can be loaded in one instruction and the widening + multiplication is shorter. First try the unsigned variant because it + allows constraint "d" instead of "a" for the signed version. */ + + if (s9_operand (operands[2], HImode)) + { + rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); + + if (u8_operand (operands[2], HImode)) + { + emit_insn (gen_muluqihi3 (operands[0], reg, operands[1])); + } + else if (s8_operand (operands[2], HImode)) + { + emit_insn (gen_mulsqihi3 (operands[0], reg, operands[1])); + } + else + { + emit_insn (gen_muloqihi3 (operands[0], reg, operands[1])); + } + + DONE; + } + + if (!register_operand (operands[2], HImode)) + operands[2] = force_reg (HImode, operands[2]); + }) + +(define_insn "*mulhi3_enh" + [(set (match_operand:HI 0 "register_operand" "=&r") + (mult:HI (match_operand:HI 1 "register_operand" "r") + (match_operand:HI 2 "register_operand" "r")))] + "AVR_HAVE_MUL" + { + return REGNO (operands[1]) == REGNO (operands[2]) + ? "mul %A1,%A1\;movw %0,r0\;mul %A1,%B1\;add %B0,r0\;add %B0,r0\;clr r1" + : "mul %A1,%A2\;movw %0,r0\;mul %A1,%B2\;add %B0,r0\;mul %B1,%A2\;add %B0,r0\;clr r1"; + } + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_expand "mulhi3_call" + [(set (reg:HI 24) (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 22) (match_operand:HI 2 "register_operand" "")) + (parallel [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 22)) + (clobber (reg:QI 21))]) + (set (match_operand:HI 0 "register_operand" "") (reg:HI 24))]) + +(define_insn "*mulhi3_call" + [(set (reg:HI 24) (mult:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 22)) + (clobber (reg:QI 21))] + "!AVR_HAVE_MUL" + "%~call __mulhi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; To support widening multiplication with constant we postpone +;; expanding to the implicit library call until post combine and +;; prior to register allocation. Clobber all hard registers that +;; might be used by the (widening) multiply until it is split and +;; it's final register footprint is worked out. + +(define_expand "mulsi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + if (u16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1])); + DONE; + } + + if (o16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1])); + DONE; + } + }) + +(define_insn_and_split "*mulsi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r") + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:SI 18) + (match_dup 1)) + (set (reg:SI 22) + (match_dup 2)) + (parallel [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:HI 26))]) + (set (match_dup 0) + (reg:SI 22))] + { + if (u16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1])); + DONE; + } + + if (o16_operand (operands[2], SImode)) + { + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1])); + DONE; + } + }) + +;; "muluqisi3" +;; "muluhisi3" +(define_insn_and_split "mulusi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 26) + (match_dup 1)) + (set (reg:SI 18) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (zero_extend:SI (reg:HI 26)) + (reg:SI 18))) + (set (match_dup 0) + (reg:SI 22))] + { + /* Do the QI -> HI extension explicitely before the multiplication. */ + /* Do the HI -> SI extension implicitely and after the multiplication. */ + + if (QImode == mode) + operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]); + + if (u16_operand (operands[2], SImode)) + { + operands[1] = force_reg (HImode, operands[1]); + operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2])); + DONE; + } + }) + +;; "mulsqisi3" +;; "mulshisi3" +(define_insn_and_split "mulssi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 26) + (match_dup 1)) + (set (reg:SI 18) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (sign_extend:SI (reg:HI 26)) + (reg:SI 18))) + (set (match_dup 0) + (reg:SI 22))] + { + /* Do the QI -> HI extension explicitely before the multiplication. */ + /* Do the HI -> SI extension implicitely and after the multiplication. */ + + if (QImode == mode) + operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]); + + if (u16_operand (operands[2], SImode) + || s16_operand (operands[2], SImode)) + { + rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); + + operands[1] = force_reg (HImode, operands[1]); + + if (u16_operand (operands[2], SImode)) + emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1])); + else + emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2)); + + DONE; + } + }) + +;; One-extend operand 1 + +(define_insn_and_split "mulohisi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (not:SI (zero_extend:SI + (not:HI (match_operand:HI 1 "pseudo_register_operand" "r")))) + (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 26) + (match_dup 1)) + (set (reg:SI 18) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26)))) + (reg:SI 18))) + (set (match_dup 0) + (reg:SI 22))]) + +;; "mulhisi3" +;; "umulhisi3" +(define_expand "mulhisi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (any_extend:SI (match_operand:HI 1 "register_operand" "")) + (any_extend:SI (match_operand:HI 2 "register_operand" "")))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL") + +(define_expand "usmulhisi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "")))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL") + +;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3" +;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3" +;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3" +;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3" +(define_insn_and_split + "*mulsi3" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r")) + (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r")))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 18) + (match_dup 1)) + (set (reg:HI 26) + (match_dup 2)) + (set (reg:SI 22) + (mult:SI (match_dup 3) + (match_dup 4))) + (set (match_dup 0) + (reg:SI 22))] + { + rtx xop1 = operands[1]; + rtx xop2 = operands[2]; + + /* Do the QI -> HI extension explicitely before the multiplication. */ + /* Do the HI -> SI extension implicitely and after the multiplication. */ + + if (QImode == mode) + xop1 = gen_rtx_fmt_e (, HImode, xop1); + + if (QImode == mode) + xop2 = gen_rtx_fmt_e (, HImode, xop2); + + if ( == + || == ZERO_EXTEND) + { + operands[1] = xop1; + operands[2] = xop2; + operands[3] = gen_rtx_fmt_e (, SImode, gen_rtx_REG (HImode, 18)); + operands[4] = gen_rtx_fmt_e (, SImode, gen_rtx_REG (HImode, 26)); + } + else + { + /* = SIGN_EXTEND */ + /* = ZERO_EXTEND */ + + operands[1] = xop2; + operands[2] = xop1; + operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18)); + operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26)); + } + }) + +;; "smulhi3_highpart" +;; "umulhi3_highpart" +(define_expand "mulhi3_highpart" + [(set (reg:HI 18) + (match_operand:HI 1 "nonmemory_operand" "")) + (set (reg:HI 26) + (match_operand:HI 2 "nonmemory_operand" "")) + (parallel [(set (reg:HI 24) + (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18)) + (any_extend:SI (reg:HI 26))) + (const_int 16)))) + (clobber (reg:HI 22))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "AVR_HAVE_MUL") + + +(define_insn "*mulsi3_call" + [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:HI 26))] + "AVR_HAVE_MUL" + "%~call __mulsi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "*mulhisi3_call" +;; "*umulhisi3_call" +(define_insn "*mulhisi3_call" + [(set (reg:SI 22) + (mult:SI (any_extend:SI (reg:HI 18)) + (any_extend:SI (reg:HI 26))))] + "AVR_HAVE_MUL" + "%~call __mulhisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; "*umulhi3_highpart_call" +;; "*smulhi3_highpart_call" +(define_insn "*mulhi3_highpart_call" + [(set (reg:HI 24) + (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18)) + (any_extend:SI (reg:HI 26))) + (const_int 16)))) + (clobber (reg:HI 22))] + "AVR_HAVE_MUL" + "%~call __mulhisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*usmulhisi3_call" + [(set (reg:SI 22) + (mult:SI (zero_extend:SI (reg:HI 18)) + (sign_extend:SI (reg:HI 26))))] + "AVR_HAVE_MUL" + "%~call __usmulhisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*mulhisi3_call" + [(set (reg:SI 22) + (mult:SI (any_extend:SI (reg:HI 26)) + (reg:SI 18)))] + "AVR_HAVE_MUL" + "%~call __mulhisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*mulohisi3_call" + [(set (reg:SI 22) + (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26)))) + (reg:SI 18)))] + "AVR_HAVE_MUL" + "%~call __mulohisi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % +; divmod + +;; Generate lib1funcs.S calls ourselves, because: +;; - we know exactly which registers are clobbered (for QI and HI +;; modes, some of the call-used registers are preserved) +;; - we get both the quotient and the remainder at no extra cost +;; - we split the patterns only after the first CSE passes because +;; CSE has problems to operate on hard regs. +;; +(define_insn_and_split "divmodqi4" + [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "") + (div:QI (match_operand:QI 1 "pseudo_register_operand" "") + (match_operand:QI 2 "pseudo_register_operand" ""))) + (set (match_operand:QI 3 "pseudo_register_operand" "") + (mod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23)) + (clobber (reg:QI 24)) + (clobber (reg:QI 25))])] + "" + "this divmodqi4 pattern should have been splitted;" + "" + [(set (reg:QI 24) (match_dup 1)) + (set (reg:QI 22) (match_dup 2)) + (parallel [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23))]) + (set (match_dup 0) (reg:QI 24)) + (set (match_dup 3) (reg:QI 25))]) + +(define_insn "*divmodqi4_call" + [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23))] + "" + "%~call __divmodqi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodqi4" + [(parallel [(set (match_operand:QI 0 "pseudo_register_operand" "") + (udiv:QI (match_operand:QI 1 "pseudo_register_operand" "") + (match_operand:QI 2 "pseudo_register_operand" ""))) + (set (match_operand:QI 3 "pseudo_register_operand" "") + (umod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23)) + (clobber (reg:QI 24)) + (clobber (reg:QI 25))])] + "" + "this udivmodqi4 pattern should have been splitted;" + "" + [(set (reg:QI 24) (match_dup 1)) + (set (reg:QI 22) (match_dup 2)) + (parallel [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 23))]) + (set (match_dup 0) (reg:QI 24)) + (set (match_dup 3) (reg:QI 25))]) + +(define_insn "*udivmodqi4_call" + [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22))) + (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22))) + (clobber (reg:QI 23))] + "" + "%~call __udivmodqi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "divmodhi4" + [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "") + (div:HI (match_operand:HI 1 "pseudo_register_operand" "") + (match_operand:HI 2 "pseudo_register_operand" ""))) + (set (match_operand:HI 3 "pseudo_register_operand" "") + (mod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 21)) + (clobber (reg:HI 22)) + (clobber (reg:HI 24)) + (clobber (reg:HI 26))])] + "" + "this should have been splitted;" + "" + [(set (reg:HI 24) (match_dup 1)) + (set (reg:HI 22) (match_dup 2)) + (parallel [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))]) + (set (match_dup 0) (reg:HI 22)) + (set (match_dup 3) (reg:HI 24))]) + +(define_insn "*divmodhi4_call" + [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))] + "" + "%~call __divmodhi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodhi4" + [(parallel [(set (match_operand:HI 0 "pseudo_register_operand" "") + (udiv:HI (match_operand:HI 1 "pseudo_register_operand" "") + (match_operand:HI 2 "pseudo_register_operand" ""))) + (set (match_operand:HI 3 "pseudo_register_operand" "") + (umod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 21)) + (clobber (reg:HI 22)) + (clobber (reg:HI 24)) + (clobber (reg:HI 26))])] + "" + "this udivmodhi4 pattern should have been splitted.;" + "" + [(set (reg:HI 24) (match_dup 1)) + (set (reg:HI 22) (match_dup 2)) + (parallel [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))]) + (set (match_dup 0) (reg:HI 22)) + (set (match_dup 3) (reg:HI 24))]) + +(define_insn "*udivmodhi4_call" + [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22))) + (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22))) + (clobber (reg:HI 26)) + (clobber (reg:QI 21))] + "" + "%~call __udivmodhi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 24-bit multiply + +;; To support widening multiplication with constant we postpone +;; expanding to the implicit library call until post combine and +;; prior to register allocation. Clobber all hard registers that +;; might be used by the (widening) multiply until it is split and +;; it's final register footprint is worked out. + +(define_expand "mulpsi3" + [(parallel [(set (match_operand:PSI 0 "register_operand" "") + (mult:PSI (match_operand:PSI 1 "register_operand" "") + (match_operand:PSI 2 "nonmemory_operand" ""))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))])] + "AVR_HAVE_MUL" + { + if (s8_operand (operands[2], PSImode)) + { + rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); + emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); + DONE; + } + }) + +(define_insn "*umulqihipsi3" + [(set (match_operand:PSI 0 "register_operand" "=&r") + (mult:PSI (zero_extend:PSI (match_operand:QI 1 "register_operand" "r")) + (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %A0,r0 + mul %1,%B2 + clr %C0 + add %B0,r0 + adc %C0,r1 + clr __zero_reg__" + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_insn "*umulhiqipsi3" + [(set (match_operand:PSI 0 "register_operand" "=&r") + (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r")) + (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))] + "AVR_HAVE_MUL" + "mul %1,%A2 + movw %A0,r0 + mul %1,%B2 + add %B0,r0 + mov %C0,r1 + clr __zero_reg__ + adc %C0,__zero_reg__" + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "mulsqipsi3" + [(set (match_operand:PSI 0 "pseudo_register_operand" "=r") + (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" "r")) + (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:QI 25) + (match_dup 1)) + (set (reg:PSI 22) + (match_dup 2)) + (set (reg:PSI 18) + (mult:PSI (sign_extend:PSI (reg:QI 25)) + (reg:PSI 22))) + (set (match_dup 0) + (reg:PSI 18))]) + +(define_insn_and_split "*mulpsi3" + [(set (match_operand:PSI 0 "pseudo_register_operand" "=r") + (mult:PSI (match_operand:PSI 1 "pseudo_register_operand" "r") + (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn"))) + (clobber (reg:HI 26)) + (clobber (reg:DI 18))] + "AVR_HAVE_MUL && !reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:PSI 18) + (match_dup 1)) + (set (reg:PSI 22) + (match_dup 2)) + (parallel [(set (reg:PSI 22) + (mult:PSI (reg:PSI 22) + (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) + (clobber (reg:HI 26))]) + (set (match_dup 0) + (reg:PSI 22))] + { + if (s8_operand (operands[2], PSImode)) + { + rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); + emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); + DONE; + } + }) + +(define_insn "*mulsqipsi3.libgcc" + [(set (reg:PSI 18) + (mult:PSI (sign_extend:PSI (reg:QI 25)) + (reg:PSI 22)))] + "AVR_HAVE_MUL" + "%~call __mulsqipsi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*mulpsi3.libgcc" + [(set (reg:PSI 22) + (mult:PSI (reg:PSI 22) + (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) + (clobber (reg:HI 26))] + "AVR_HAVE_MUL" + "%~call __mulpsi3" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; 24-bit signed/unsigned division and modulo. +;; Notice that the libgcc implementation return the quotient in R22 +;; and the remainder in R18 whereas the 32-bit [u]divmodsi4 +;; implementation works the other way round. + +(define_insn_and_split "divmodpsi4" + [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "") + (div:PSI (match_operand:PSI 1 "pseudo_register_operand" "") + (match_operand:PSI 2 "pseudo_register_operand" ""))) + (set (match_operand:PSI 3 "pseudo_register_operand" "") + (mod:PSI (match_dup 1) + (match_dup 2))) + (clobber (reg:DI 18)) + (clobber (reg:QI 26))])] + "" + { gcc_unreachable(); } + "" + [(set (reg:PSI 22) (match_dup 1)) + (set (reg:PSI 18) (match_dup 2)) + (parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18))) + (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) + (clobber (reg:QI 26))]) + (set (match_dup 0) (reg:PSI 22)) + (set (match_dup 3) (reg:PSI 18))]) + +(define_insn "*divmodpsi4_call" + [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18))) + (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) + (clobber (reg:QI 26))] + "" + "%~call __divmodpsi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodpsi4" + [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "") + (udiv:PSI (match_operand:PSI 1 "pseudo_register_operand" "") + (match_operand:PSI 2 "pseudo_register_operand" ""))) + (set (match_operand:PSI 3 "pseudo_register_operand" "") + (umod:PSI (match_dup 1) + (match_dup 2))) + (clobber (reg:DI 18)) + (clobber (reg:QI 26))])] + "" + { gcc_unreachable(); } + "" + [(set (reg:PSI 22) (match_dup 1)) + (set (reg:PSI 18) (match_dup 2)) + (parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18))) + (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) + (clobber (reg:QI 26))]) + (set (match_dup 0) (reg:PSI 22)) + (set (match_dup 3) (reg:PSI 18))]) + +(define_insn "*udivmodpsi4_call" + [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18))) + (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18))) + (clobber (reg:QI 21)) + (clobber (reg:QI 25)) + (clobber (reg:QI 26))] + "" + "%~call __udivmodpsi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn_and_split "divmodsi4" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (div:SI (match_operand:SI 1 "pseudo_register_operand" "") + (match_operand:SI 2 "pseudo_register_operand" ""))) + (set (match_operand:SI 3 "pseudo_register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:SI 18)) + (clobber (reg:SI 22)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))])] + "" + "this divmodsi4 pattern should have been splitted;" + "" + [(set (reg:SI 22) (match_dup 1)) + (set (reg:SI 18) (match_dup 2)) + (parallel [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_dup 0) (reg:SI 18)) + (set (match_dup 3) (reg:SI 22))]) + +(define_insn "*divmodsi4_call" + [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "" + "%~call __divmodsi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "udivmodsi4" + [(parallel [(set (match_operand:SI 0 "pseudo_register_operand" "") + (udiv:SI (match_operand:SI 1 "pseudo_register_operand" "") + (match_operand:SI 2 "pseudo_register_operand" ""))) + (set (match_operand:SI 3 "pseudo_register_operand" "") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:SI 18)) + (clobber (reg:SI 22)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))])] + "" + "this udivmodsi4 pattern should have been splitted;" + "" + [(set (reg:SI 22) (match_dup 1)) + (set (reg:SI 18) (match_dup 2)) + (parallel [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_dup 0) (reg:SI 18)) + (set (match_dup 3) (reg:SI 22))]) + +(define_insn "*udivmodsi4_call" + [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18))) + (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "" + "%~call __udivmodsi4" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& +; and + +(define_insn "andqi3" + [(set (match_operand:QI 0 "register_operand" "=??r,d") + (and:QI (match_operand:QI 1 "register_operand" "%0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i")))] + "" + "@ + and %0,%2 + andi %0,lo8(%2)" + [(set_attr "length" "1,1") + (set_attr "cc" "set_zn,set_zn")]) + +(define_insn "andhi3" + [(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r") + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0 ,0") + (match_operand:HI 2 "nonmemory_operand" "r,s,n,Ca2,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "and %A0,%A2\;and %B0,%B2"; + else if (which_alternative == 1) + return "andi %A0,lo8(%2)\;andi %B0,hi8(%2)"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "2,2,2,4,4") + (set_attr "adjust_len" "*,*,out_bitop,out_bitop,out_bitop") + (set_attr "cc" "set_n,set_n,clobber,clobber,clobber")]) + +(define_insn "andpsi3" + [(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r") + (and:PSI (match_operand:PSI 1 "register_operand" "%0,0,0 ,0") + (match_operand:PSI 2 "nonmemory_operand" "r,n,Ca3,n"))) + (clobber (match_scratch:QI 3 "=X,X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "and %A0,%A2" CR_TAB + "and %B0,%B2" CR_TAB + "and %C0,%C2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "3,3,6,6") + (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber,clobber")]) + +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r") + (and:SI (match_operand:SI 1 "register_operand" "%0,0,0 ,0") + (match_operand:SI 2 "nonmemory_operand" "r,n,Ca4,n"))) + (clobber (match_scratch:QI 3 "=X,X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "and %0,%2" CR_TAB + "and %B0,%B2" CR_TAB + "and %C0,%C2" CR_TAB + "and %D0,%D2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "4,4,8,8") + (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber,clobber")]) + +(define_peephole2 ; andi + [(set (match_operand:QI 0 "d_register_operand" "") + (and:QI (match_dup 0) + (match_operand:QI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:QI (match_dup 0) + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1)))] + { + operands[1] = GEN_INT (INTVAL (operands[1]) & INTVAL (operands[2])); + }) + +;;||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +;; ior + +(define_insn "iorqi3" + [(set (match_operand:QI 0 "register_operand" "=??r,d") + (ior:QI (match_operand:QI 1 "register_operand" "%0,0") + (match_operand:QI 2 "nonmemory_operand" "r,i")))] + "" + "@ + or %0,%2 + ori %0,lo8(%2)" + [(set_attr "length" "1,1") + (set_attr "cc" "set_zn,set_zn")]) + +(define_insn "iorhi3" + [(set (match_operand:HI 0 "register_operand" "=??r,d,d,r ,r") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0,0,0 ,0") + (match_operand:HI 2 "nonmemory_operand" "r,s,n,Co2,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "or %A0,%A2\;or %B0,%B2"; + else if (which_alternative == 1) + return "ori %A0,lo8(%2)\;ori %B0,hi8(%2)"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "2,2,2,4,4") + (set_attr "adjust_len" "*,*,out_bitop,out_bitop,out_bitop") + (set_attr "cc" "set_n,set_n,clobber,clobber,clobber")]) + +(define_insn "iorpsi3" + [(set (match_operand:PSI 0 "register_operand" "=??r,d,r ,r") + (ior:PSI (match_operand:PSI 1 "register_operand" "%0,0,0 ,0") + (match_operand:PSI 2 "nonmemory_operand" "r,n,Co3,n"))) + (clobber (match_scratch:QI 3 "=X,X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "or %A0,%A2" CR_TAB + "or %B0,%B2" CR_TAB + "or %C0,%C2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "3,3,6,6") + (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber,clobber")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=??r,d,r ,r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0 ,0") + (match_operand:SI 2 "nonmemory_operand" "r,n,Co4,n"))) + (clobber (match_scratch:QI 3 "=X,X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "or %0,%2" CR_TAB + "or %B0,%B2" CR_TAB + "or %C0,%C2" CR_TAB + "or %D0,%D2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "4,4,8,8") + (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber,clobber")]) + +;;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +;; xor + +(define_insn "xorqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (xor:QI (match_operand:QI 1 "register_operand" "%0") + (match_operand:QI 2 "register_operand" "r")))] + "" + "eor %0,%2" + [(set_attr "length" "1") + (set_attr "cc" "set_zn")]) + +(define_insn "xorhi3" + [(set (match_operand:HI 0 "register_operand" "=??r,r ,r") + (xor:HI (match_operand:HI 1 "register_operand" "%0,0 ,0") + (match_operand:HI 2 "nonmemory_operand" "r,Cx2,n"))) + (clobber (match_scratch:QI 3 "=X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "eor %A0,%A2\;eor %B0,%B2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "2,2,4") + (set_attr "adjust_len" "*,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber")]) + +(define_insn "xorpsi3" + [(set (match_operand:PSI 0 "register_operand" "=??r,r ,r") + (xor:PSI (match_operand:PSI 1 "register_operand" "%0,0 ,0") + (match_operand:PSI 2 "nonmemory_operand" "r,Cx3,n"))) + (clobber (match_scratch:QI 3 "=X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "eor %A0,%A2" CR_TAB + "eor %B0,%B2" CR_TAB + "eor %C0,%C2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "3,6,6") + (set_attr "adjust_len" "*,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=??r,r ,r") + (xor:SI (match_operand:SI 1 "register_operand" "%0,0 ,0") + (match_operand:SI 2 "nonmemory_operand" "r,Cx4,n"))) + (clobber (match_scratch:QI 3 "=X,X ,&d"))] + "" + { + if (which_alternative == 0) + return "eor %0,%2" CR_TAB + "eor %B0,%B2" CR_TAB + "eor %C0,%C2" CR_TAB + "eor %D0,%D2"; + + return avr_out_bitop (insn, operands, NULL); + } + [(set_attr "length" "4,8,8") + (set_attr "adjust_len" "*,out_bitop,out_bitop") + (set_attr "cc" "set_n,clobber,clobber")]) + +;; swap swap swap swap swap swap swap swap swap swap swap swap swap swap swap +;; swap + +(define_expand "rotlqi3" + [(set (match_operand:QI 0 "register_operand" "") + (rotate:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "const_0_to_7_operand" "")))] + "" + { + if (!CONST_INT_P (operands[2])) + FAIL; + + operands[2] = gen_int_mode (INTVAL (operands[2]) & 7, QImode); + }) + +;; Expander used by __builtin_avr_swap +(define_expand "rotlqi3_4" + [(set (match_operand:QI 0 "register_operand" "") + (rotate:QI (match_operand:QI 1 "register_operand" "") + (const_int 4)))]) + +(define_insn "*rotlqi3" + [(set (match_operand:QI 0 "register_operand" "=r,r,r ,r ,r ,r ,r ,r") + (rotate:QI (match_operand:QI 1 "register_operand" "0,0,0 ,0 ,0 ,0 ,0 ,0") + (match_operand:QI 2 "const_0_to_7_operand" "P,K,C03,C04,C05,C06,C07,L")))] + "" + "@ + lsl %0\;adc %0,__zero_reg__ + lsl %0\;adc %0,__zero_reg__\;lsl %0\;adc %0,__zero_reg__ + swap %0\;bst %0,0\;ror %0\;bld %0,7 + swap %0 + swap %0\;lsl %0\;adc %0,__zero_reg__ + swap %0\;lsl %0\;adc %0,__zero_reg__\;lsl %0\;adc %0,__zero_reg__ + bst %0,0\;ror %0\;bld %0,7 + " + [(set_attr "length" "2,4,4,1,3,5,3,0") + (set_attr "cc" "set_n,set_n,clobber,none,set_n,set_n,clobber,none")]) + +;; Split all rotates of HI,SI and PSImode registers where rotation is by +;; a whole number of bytes. The split creates the appropriate moves and +;; considers all overlap situations. + +;; HImode does not need scratch. Use attribute for this constraint. + +(define_mode_attr rotx [(SI "&r,&r,X") (PSI "&r,&r,X") (HI "X,X,X")]) +(define_mode_attr rotsmode [(SI "HI") (PSI "QI") (HI "QI")]) + +;; "rotlhi3" +;; "rotlpsi3" +;; "rotlsi3" +(define_expand "rotl3" + [(parallel [(set (match_operand:HISI 0 "register_operand" "") + (rotate:HISI (match_operand:HISI 1 "register_operand" "") + (match_operand:VOID 2 "const_int_operand" ""))) + (clobber (match_dup 3))])] + "" + { + int offset; + + if (!CONST_INT_P (operands[2])) + FAIL; + + offset = INTVAL (operands[2]); + + if (0 == offset % 8) + { + if (AVR_HAVE_MOVW && 0 == offset % 16) + operands[3] = gen_rtx_SCRATCH (mode); + else + operands[3] = gen_rtx_SCRATCH (QImode); + } + else if (offset == 1 + || offset == GET_MODE_BITSIZE (mode) -1) + { + /*; Support rotate left/right by 1 */ + + emit_move_insn (operands[0], + gen_rtx_ROTATE (mode, operands[1], operands[2])); + DONE; + } + else + FAIL; + }) + +(define_insn "*rotlhi2.1" + [(set (match_operand:HI 0 "register_operand" "=r") + (rotate:HI (match_operand:HI 1 "register_operand" "0") + (const_int 1)))] + "" + "lsl %A0\;rol %B0\;adc %A0,__zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "*rotlhi2.15" + [(set (match_operand:HI 0 "register_operand" "=r") + (rotate:HI (match_operand:HI 1 "register_operand" "0") + (const_int 15)))] + "" + "bst %A0,0\;ror %B0\;ror %A0\;bld %B0,7" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "*rotlpsi2.1" + [(set (match_operand:PSI 0 "register_operand" "=r") + (rotate:PSI (match_operand:PSI 1 "register_operand" "0") + (const_int 1)))] + "" + "lsl %A0\;rol %B0\;rol %C0\;adc %A0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "*rotlpsi2.23" + [(set (match_operand:PSI 0 "register_operand" "=r") + (rotate:PSI (match_operand:PSI 1 "register_operand" "0") + (const_int 23)))] + "" + "bst %A0,0\;ror %C0\;ror %B0\;ror %A0\;bld %C0,7" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "*rotlsi2.1" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotate:SI (match_operand:SI 1 "register_operand" "0") + (const_int 1)))] + "" + "lsl %A0\;rol %B0\;rol %C0\;rol %D0\;adc %A0,__zero_reg__" + [(set_attr "length" "5") + (set_attr "cc" "clobber")]) + +(define_insn "*rotlsi2.31" + [(set (match_operand:SI 0 "register_operand" "=r") + (rotate:SI (match_operand:SI 1 "register_operand" "0") + (const_int 31)))] + "" + "bst %A0,0\;ror %D0\;ror %C0\;ror %B0\;ror %A0\;bld %D0,7" + [(set_attr "length" "6") + (set_attr "cc" "clobber")]) + +;; Overlapping non-HImode registers often (but not always) need a scratch. +;; The best we can do is use early clobber alternative "#&r" so that +;; completely non-overlapping operands dont get a scratch but # so register +;; allocation does not prefer non-overlapping. + + +;; Split word aligned rotates using scratch that is mode dependent. + +;; "*rotwhi" +;; "*rotwsi" +(define_insn_and_split "*rotw" + [(set (match_operand:HISI 0 "register_operand" "=r,r,#&r") + (rotate:HISI (match_operand:HISI 1 "register_operand" "0,r,r") + (match_operand 2 "const_int_operand" "n,n,n"))) + (clobber (match_scratch: 3 "="))] + "AVR_HAVE_MOVW + && CONST_INT_P (operands[2]) + && GET_MODE_SIZE (mode) % 2 == 0 + && 0 == INTVAL (operands[2]) % 16" + "#" + "&& reload_completed" + [(const_int 0)] + { + avr_rotate_bytes (operands); + DONE; + }) + + +;; Split byte aligned rotates using scratch that is always QI mode. + +;; "*rotbhi" +;; "*rotbpsi" +;; "*rotbsi" +(define_insn_and_split "*rotb" + [(set (match_operand:HISI 0 "register_operand" "=r,r,#&r") + (rotate:HISI (match_operand:HISI 1 "register_operand" "0,r,r") + (match_operand 2 "const_int_operand" "n,n,n"))) + (clobber (match_scratch:QI 3 "="))] + "CONST_INT_P (operands[2]) + && (8 == INTVAL (operands[2]) % 16 + || ((!AVR_HAVE_MOVW + || GET_MODE_SIZE (mode) % 2 != 0) + && 0 == INTVAL (operands[2]) % 16))" + "#" + "&& reload_completed" + [(const_int 0)] + { + avr_rotate_bytes (operands); + DONE; + }) + + +;;<< << << << << << << << << << << << << << << << << << << << << << << << << << +;; arithmetic shift left + +;; "ashlqi3" +;; "ashlqq3" "ashluqq3" +(define_expand "ashl3" + [(set (match_operand:ALL1 0 "register_operand" "") + (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "") + (match_operand:QI 2 "nop_general_operand" "")))]) + +(define_split ; ashlqi3_const4 + [(set (match_operand:ALL1 0 "d_register_operand" "") + (ashift:ALL1 (match_dup 0) + (const_int 4)))] + "" + [(set (match_dup 1) + (rotate:QI (match_dup 1) + (const_int 4))) + (set (match_dup 1) + (and:QI (match_dup 1) + (const_int -16)))] + { + operands[1] = avr_to_int_mode (operands[0]); + }) + +(define_split ; ashlqi3_const5 + [(set (match_operand:ALL1 0 "d_register_operand" "") + (ashift:ALL1 (match_dup 0) + (const_int 5)))] + "" + [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4))) + (set (match_dup 1) (ashift:QI (match_dup 1) (const_int 1))) + (set (match_dup 1) (and:QI (match_dup 1) (const_int -32)))] + { + operands[1] = avr_to_int_mode (operands[0]); + }) + +(define_split ; ashlqi3_const6 + [(set (match_operand:ALL1 0 "d_register_operand" "") + (ashift:ALL1 (match_dup 0) + (const_int 6)))] + "" + [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4))) + (set (match_dup 1) (ashift:QI (match_dup 1) (const_int 2))) + (set (match_dup 1) (and:QI (match_dup 1) (const_int -64)))] + { + operands[1] = avr_to_int_mode (operands[0]); + }) + +;; "*ashlqi3" +;; "*ashlqq3" "*ashluqq3" +(define_insn "*ashl3" + [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,!d,r,r") + (ashift:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,0 ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,K,n ,n,Qm")))] + "" + { + return ashlqi3_out (insn, operands, NULL); + } + [(set_attr "length" "5,0,1,2,4,6,9") + (set_attr "adjust_len" "ashlqi") + (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")]) + +(define_insn "ashl3" + [(set (match_operand:ALL2 0 "register_operand" "=r,r,r,r,r,r,r") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))] + "" + { + return ashlhi3_out (insn, operands, NULL); + } + [(set_attr "length" "6,0,2,2,4,10,10") + (set_attr "adjust_len" "ashlhi") + (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")]) + + +;; Insns like the following are generated when (implicitly) extending 8-bit shifts +;; like char1 = char2 << char3. Only the low-byte is needed in that situation. + +;; "*ashluqihiqi3" +;; "*ashlsqihiqi3" +(define_insn_and_split "*ashlqihiqi3" + [(set (match_operand:QI 0 "register_operand" "=r") + (subreg:QI (ashift:HI (any_extend:HI (match_operand:QI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "r")) + 0))] + "" + "#" + "" + [(set (match_dup 0) + (ashift:QI (match_dup 1) + (match_dup 2)))]) + +;; ??? Combiner does not recognize that it could split the following insn; +;; presumably because he has no register handy? + +;; "*ashluqihiqi3.mem" +;; "*ashlsqihiqi3.mem" +(define_insn_and_split "*ashlqihiqi3.mem" + [(set (match_operand:QI 0 "memory_operand" "=m") + (subreg:QI (ashift:HI (any_extend:HI (match_operand:QI 1 "register_operand" "r")) + (match_operand:QI 2 "register_operand" "r")) + 0))] + "!reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 3) + (ashift:QI (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (match_dup 3))] + { + operands[3] = gen_reg_rtx (QImode); + }) + +;; Similar. + +(define_insn_and_split "*ashlhiqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r") + (subreg:QI (ashift:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "register_operand" "r")) 0))] + "!reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (match_dup 4) + (ashift:QI (match_dup 3) + (match_dup 2))) + (set (match_dup 0) + (match_dup 4))] + { + operands[3] = simplify_gen_subreg (QImode, operands[1], HImode, 0); + operands[4] = gen_reg_rtx (QImode); + }) + +;; High part of 16-bit shift is unused after the instruction: +;; No need to compute it, map to 8-bit shift. + +(define_peephole2 + [(set (match_operand:HI 0 "register_operand" "") + (ashift:HI (match_dup 0) + (match_operand:QI 1 "register_operand" "")))] + "" + [(set (match_dup 2) + (ashift:QI (match_dup 2) + (match_dup 1))) + (clobber (match_dup 3))] + { + operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 1); + + if (!peep2_reg_dead_p (1, operands[3])) + FAIL; + + operands[2] = simplify_gen_subreg (QImode, operands[0], HImode, 0); + }) + + +;; "ashlsi3" +;; "ashlsq3" "ashlusq3" +;; "ashlsa3" "ashlusa3" +(define_insn "ashl3" + [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r,r,r,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))] + "" + { + return ashlsi3_out (insn, operands, NULL); + } + [(set_attr "length" "8,0,4,4,8,10,12") + (set_attr "adjust_len" "ashlsi") + (set_attr "cc" "clobber,none,set_n,clobber,set_n,clobber,clobber")]) + +;; Optimize if a scratch register from LD_REGS happens to be available. + +(define_peephole2 ; ashlqi3_l_const4 + [(set (match_operand:ALL1 0 "l_register_operand" "") + (ashift:ALL1 (match_dup 0) + (const_int 4))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4))) + (set (match_dup 1) (const_int -16)) + (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))] + { + operands[2] = avr_to_int_mode (operands[0]); + }) + +(define_peephole2 ; ashlqi3_l_const5 + [(set (match_operand:ALL1 0 "l_register_operand" "") + (ashift:ALL1 (match_dup 0) + (const_int 5))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4))) + (set (match_dup 2) (ashift:QI (match_dup 2) (const_int 1))) + (set (match_dup 1) (const_int -32)) + (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))] + { + operands[2] = avr_to_int_mode (operands[0]); + }) + +(define_peephole2 ; ashlqi3_l_const6 + [(set (match_operand:ALL1 0 "l_register_operand" "") + (ashift:ALL1 (match_dup 0) + (const_int 6))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4))) + (set (match_dup 2) (ashift:QI (match_dup 2) (const_int 2))) + (set (match_dup 1) (const_int -64)) + (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))] + { + operands[2] = avr_to_int_mode (operands[0]); + }) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:ALL2 0 "register_operand" "") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) + (ashift:ALL2 (match_dup 1) + (match_dup 2))) + (clobber (match_dup 3))])]) + +;; "*ashlhi3_const" +;; "*ashlhq3_const" "*ashluhq3_const" +;; "*ashlha3_const" "*ashluha3_const" +(define_insn "*ashl3_const" + [(set (match_operand:ALL2 0 "register_operand" "=r,r,r,r,r") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "reload_completed" + { + return ashlhi3_out (insn, operands, NULL); + } + [(set_attr "length" "0,2,2,4,10") + (set_attr "adjust_len" "ashlhi") + (set_attr "cc" "none,set_n,clobber,set_n,clobber")]) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:ALL4 0 "register_operand" "") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) + (ashift:ALL4 (match_dup 1) + (match_dup 2))) + (clobber (match_dup 3))])]) + +;; "*ashlsi3_const" +;; "*ashlsq3_const" "*ashlusq3_const" +;; "*ashlsa3_const" "*ashlusa3_const" +(define_insn "*ashl3_const" + [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "reload_completed" + { + return ashlsi3_out (insn, operands, NULL); + } + [(set_attr "length" "0,4,4,10") + (set_attr "adjust_len" "ashlsi") + (set_attr "cc" "none,set_n,clobber,clobber")]) + +(define_expand "ashlpsi3" + [(parallel [(set (match_operand:PSI 0 "register_operand" "") + (ashift:PSI (match_operand:PSI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (scratch:QI))])] + "" + { + if (AVR_HAVE_MUL + && CONST_INT_P (operands[2])) + { + if (IN_RANGE (INTVAL (operands[2]), 3, 6)) + { + rtx xoffset = force_reg (QImode, gen_int_mode (1 << INTVAL (operands[2]), QImode)); + emit_insn (gen_mulsqipsi3 (operands[0], xoffset, operands[1])); + DONE; + } + else if (optimize_insn_for_speed_p () + && INTVAL (operands[2]) != 16 + && IN_RANGE (INTVAL (operands[2]), 9, 22)) + { + rtx xoffset = force_reg (PSImode, gen_int_mode (1 << INTVAL (operands[2]), PSImode)); + emit_insn (gen_mulpsi3 (operands[0], operands[1], xoffset)); + DONE; + } + } + }) + +(define_insn "*ashlpsi3" + [(set (match_operand:PSI 0 "register_operand" "=r,r,r,r") + (ashift:PSI (match_operand:PSI 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "nonmemory_operand" "r,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "" + { + return avr_out_ashlpsi3 (insn, operands, NULL); + } + [(set_attr "adjust_len" "ashlpsi") + (set_attr "cc" "clobber")]) + +;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> +;; arithmetic shift right + +;; "ashrqi3" +;; "ashrqq3" "ashruqq3" +(define_insn "ashr3" + [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,r ,r ,r") + (ashiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,0 ,0 ,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,K,C03 C04 C05,C06 C07,Qm")))] + "" + { + return ashrqi3_out (insn, operands, NULL); + } + [(set_attr "length" "5,0,1,2,5,4,9") + (set_attr "adjust_len" "ashrqi") + (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,clobber,clobber")]) + +;; "ashrhi3" +;; "ashrhq3" "ashruhq3" +;; "ashrha3" "ashruha3" +(define_insn "ashr3" + [(set (match_operand:ALL2 0 "register_operand" "=r,r,r,r,r,r,r") + (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))] + "" + { + return ashrhi3_out (insn, operands, NULL); + } + [(set_attr "length" "6,0,2,4,4,10,10") + (set_attr "adjust_len" "ashrhi") + (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")]) + +(define_insn "ashrpsi3" + [(set (match_operand:PSI 0 "register_operand" "=r,r,r,r,r") + (ashiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0,0,r,0") + (match_operand:QI 2 "nonmemory_operand" "r,P,K,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "" + { + return avr_out_ashrpsi3 (insn, operands, NULL); + } + [(set_attr "adjust_len" "ashrpsi") + (set_attr "cc" "clobber")]) + +;; "ashrsi3" +;; "ashrsq3" "ashrusq3" +;; "ashrsa3" "ashrusa3" +(define_insn "ashr3" + [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r,r,r,r") + (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))] + "" + { + return ashrsi3_out (insn, operands, NULL); + } + [(set_attr "length" "8,0,4,6,8,10,12") + (set_attr "adjust_len" "ashrsi") + (set_attr "cc" "clobber,none,clobber,set_n,clobber,clobber,clobber")]) + +;; Optimize if a scratch register from LD_REGS happens to be available. + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:ALL2 0 "register_operand" "") + (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) + (ashiftrt:ALL2 (match_dup 1) + (match_dup 2))) + (clobber (match_dup 3))])]) + +;; "*ashrhi3_const" +;; "*ashrhq3_const" "*ashruhq3_const" +;; "*ashrha3_const" "*ashruha3_const" +(define_insn "*ashr3_const" + [(set (match_operand:ALL2 0 "register_operand" "=r,r,r,r,r") + (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "reload_completed" + { + return ashrhi3_out (insn, operands, NULL); + } + [(set_attr "length" "0,2,4,4,10") + (set_attr "adjust_len" "ashrhi") + (set_attr "cc" "none,clobber,set_n,clobber,clobber")]) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:ALL4 0 "register_operand" "") + (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) + (ashiftrt:ALL4 (match_dup 1) + (match_dup 2))) + (clobber (match_dup 3))])]) + +;; "*ashrsi3_const" +;; "*ashrsq3_const" "*ashrusq3_const" +;; "*ashrsa3_const" "*ashrusa3_const" +(define_insn "*ashr3_const" + [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r") + (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "reload_completed" + { + return ashrsi3_out (insn, operands, NULL); + } + [(set_attr "length" "0,4,4,10") + (set_attr "adjust_len" "ashrsi") + (set_attr "cc" "none,clobber,set_n,clobber")]) + +;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> +;; logical shift right + +;; "lshrqi3" +;; "lshrqq3 "lshruqq3" +(define_expand "lshr3" + [(set (match_operand:ALL1 0 "register_operand" "") + (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "") + (match_operand:QI 2 "nop_general_operand" "")))]) + +(define_split ; lshrqi3_const4 + [(set (match_operand:ALL1 0 "d_register_operand" "") + (lshiftrt:ALL1 (match_dup 0) + (const_int 4)))] + "" + [(set (match_dup 1) + (rotate:QI (match_dup 1) + (const_int 4))) + (set (match_dup 1) + (and:QI (match_dup 1) + (const_int 15)))] + { + operands[1] = avr_to_int_mode (operands[0]); + }) + +(define_split ; lshrqi3_const5 + [(set (match_operand:ALL1 0 "d_register_operand" "") + (lshiftrt:ALL1 (match_dup 0) + (const_int 5)))] + "" + [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4))) + (set (match_dup 1) (lshiftrt:QI (match_dup 1) (const_int 1))) + (set (match_dup 1) (and:QI (match_dup 1) (const_int 7)))] + { + operands[1] = avr_to_int_mode (operands[0]); + }) + +(define_split ; lshrqi3_const6 + [(set (match_operand:QI 0 "d_register_operand" "") + (lshiftrt:QI (match_dup 0) + (const_int 6)))] + "" + [(set (match_dup 1) (rotate:QI (match_dup 1) (const_int 4))) + (set (match_dup 1) (lshiftrt:QI (match_dup 1) (const_int 2))) + (set (match_dup 1) (and:QI (match_dup 1) (const_int 3)))] + { + operands[1] = avr_to_int_mode (operands[0]); + }) + +;; "*lshrqi3" +;; "*lshrqq3" +;; "*lshruqq3" +(define_insn "*lshr3" + [(set (match_operand:ALL1 0 "register_operand" "=r,r,r,r,!d,r,r") + (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand" "0,0,0,0,0 ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,K,n ,n,Qm")))] + "" + { + return lshrqi3_out (insn, operands, NULL); + } + [(set_attr "length" "5,0,1,2,4,6,9") + (set_attr "adjust_len" "lshrqi") + (set_attr "cc" "clobber,none,set_czn,set_czn,set_czn,set_czn,clobber")]) + +;; "lshrhi3" +;; "lshrhq3" "lshruhq3" +;; "lshrha3" "lshruha3" +(define_insn "lshr3" + [(set (match_operand:ALL2 0 "register_operand" "=r,r,r,r,r,r,r") + (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))] + "" + { + return lshrhi3_out (insn, operands, NULL); + } + [(set_attr "length" "6,0,2,2,4,10,10") + (set_attr "adjust_len" "lshrhi") + (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")]) + +(define_insn "lshrpsi3" + [(set (match_operand:PSI 0 "register_operand" "=r,r,r,r,r") + (lshiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "nonmemory_operand" "r,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "" + { + return avr_out_lshrpsi3 (insn, operands, NULL); + } + [(set_attr "adjust_len" "lshrpsi") + (set_attr "cc" "clobber")]) + +;; "lshrsi3" +;; "lshrsq3" "lshrusq3" +;; "lshrsa3" "lshrusa3" +(define_insn "lshr3" + [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r,r,r,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,0,r,0,0,0") + (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))] + "" + { + return lshrsi3_out (insn, operands, NULL); + } + [(set_attr "length" "8,0,4,4,8,10,12") + (set_attr "adjust_len" "lshrsi") + (set_attr "cc" "clobber,none,clobber,clobber,clobber,clobber,clobber")]) + +;; Optimize if a scratch register from LD_REGS happens to be available. + +(define_peephole2 ; lshrqi3_l_const4 + [(set (match_operand:ALL1 0 "l_register_operand" "") + (lshiftrt:ALL1 (match_dup 0) + (const_int 4))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4))) + (set (match_dup 1) (const_int 15)) + (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))] + { + operands[2] = avr_to_int_mode (operands[0]); + }) + +(define_peephole2 ; lshrqi3_l_const5 + [(set (match_operand:ALL1 0 "l_register_operand" "") + (lshiftrt:ALL1 (match_dup 0) + (const_int 5))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4))) + (set (match_dup 2) (lshiftrt:QI (match_dup 2) (const_int 1))) + (set (match_dup 1) (const_int 7)) + (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))] + { + operands[2] = avr_to_int_mode (operands[0]); + }) + +(define_peephole2 ; lshrqi3_l_const6 + [(set (match_operand:ALL1 0 "l_register_operand" "") + (lshiftrt:ALL1 (match_dup 0) + (const_int 6))) + (match_scratch:QI 1 "d")] + "" + [(set (match_dup 2) (rotate:QI (match_dup 2) (const_int 4))) + (set (match_dup 2) (lshiftrt:QI (match_dup 2) (const_int 2))) + (set (match_dup 1) (const_int 3)) + (set (match_dup 2) (and:QI (match_dup 2) (match_dup 1)))] + { + operands[2] = avr_to_int_mode (operands[0]); + }) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:ALL2 0 "register_operand" "") + (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) + (lshiftrt:ALL2 (match_dup 1) + (match_dup 2))) + (clobber (match_dup 3))])]) + +;; "*lshrhi3_const" +;; "*lshrhq3_const" "*lshruhq3_const" +;; "*lshrha3_const" "*lshruha3_const" +(define_insn "*lshr3_const" + [(set (match_operand:ALL2 0 "register_operand" "=r,r,r,r,r") + (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0,r,0,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,K,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,X,&d"))] + "reload_completed" + { + return lshrhi3_out (insn, operands, NULL); + } + [(set_attr "length" "0,2,2,4,10") + (set_attr "adjust_len" "lshrhi") + (set_attr "cc" "none,clobber,clobber,clobber,clobber")]) + +(define_peephole2 + [(match_scratch:QI 3 "d") + (set (match_operand:ALL4 0 "register_operand" "") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))] + "" + [(parallel [(set (match_dup 0) + (lshiftrt:ALL4 (match_dup 1) + (match_dup 2))) + (clobber (match_dup 3))])]) + +;; "*lshrsi3_const" +;; "*lshrsq3_const" "*lshrusq3_const" +;; "*lshrsa3_const" "*lshrusa3_const" +(define_insn "*lshr3_const" + [(set (match_operand:ALL4 0 "register_operand" "=r,r,r,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r,0") + (match_operand:QI 2 "const_int_operand" "L,P,O,n"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "reload_completed" + { + return lshrsi3_out (insn, operands, NULL); + } + [(set_attr "length" "0,4,4,10") + (set_attr "adjust_len" "lshrsi") + (set_attr "cc" "none,clobber,clobber,clobber")]) + +;; abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) +;; abs + +(define_insn "absqi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (abs:QI (match_operand:QI 1 "register_operand" "0")))] + "" + "sbrc %0,7 + neg %0" + [(set_attr "length" "2") + (set_attr "cc" "clobber")]) + + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=d,r") + (abs:SF (match_operand:SF 1 "register_operand" "0,0")))] + "" + "@ + andi %D0,0x7f + clt\;bld %D0,7" + [(set_attr "length" "1,2") + (set_attr "cc" "set_n,clobber")]) + +;; 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x 0 - x +;; neg + +(define_insn "negqi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (neg:QI (match_operand:QI 1 "register_operand" "0")))] + "" + "neg %0" + [(set_attr "length" "1") + (set_attr "cc" "set_zn")]) + +(define_insn "*negqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (neg:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0"))))] + "" + "clr %B0\;neg %A0\;brge .+2\;com %B0" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +(define_insn "neghi2" + [(set (match_operand:HI 0 "register_operand" "=r,&r") + (neg:HI (match_operand:HI 1 "register_operand" "0,r")))] + "" + "@ + neg %B0\;neg %A0\;sbc %B0,__zero_reg__ + clr %A0\;clr %B0\;sub %A0,%A1\;sbc %B0,%B1" + [(set_attr "length" "3,4") + (set_attr "cc" "set_czn")]) + +(define_insn "negpsi2" + [(set (match_operand:PSI 0 "register_operand" "=!d,r,&r") + (neg:PSI (match_operand:PSI 1 "register_operand" "0,0,r")))] + "" + "@ + com %C0\;com %B0\;neg %A0\;sbci %B0,-1\;sbci %C0,-1 + com %C0\;com %B0\;com %A0\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__ + clr %A0\;clr %B0\;clr %C0\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1" + [(set_attr "length" "5,6,6") + (set_attr "cc" "set_czn,set_n,set_czn")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=!d,r,&r,&r") + (neg:SI (match_operand:SI 1 "register_operand" "0,0,r ,r")))] + "" + "@ + com %D0\;com %C0\;com %B0\;neg %A0\;sbci %B0,lo8(-1)\;sbci %C0,lo8(-1)\;sbci %D0,lo8(-1) + com %D0\;com %C0\;com %B0\;com %A0\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__ + clr %A0\;clr %B0\;clr %C0\;clr %D0\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1\;sbc %D0,%D1 + clr %A0\;clr %B0\;movw %C0,%A0\;sub %A0,%A1\;sbc %B0,%B1\;sbc %C0,%C1\;sbc %D0,%D1" + [(set_attr "length" "7,8,8,7") + (set_attr "isa" "*,*,mov,movw") + (set_attr "cc" "set_czn,set_n,set_czn,set_czn")]) + +(define_insn "negsf2" + [(set (match_operand:SF 0 "register_operand" "=d,r") + (neg:SF (match_operand:SF 1 "register_operand" "0,0")))] + "" + "@ + subi %D0,0x80 + bst %D0,7\;com %D0\;bld %D0,7\;com %D0" + [(set_attr "length" "1,4") + (set_attr "cc" "set_n,set_n")]) + +;; !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +;; not + +(define_insn "one_cmplqi2" + [(set (match_operand:QI 0 "register_operand" "=r") + (not:QI (match_operand:QI 1 "register_operand" "0")))] + "" + "com %0" + [(set_attr "length" "1") + (set_attr "cc" "set_czn")]) + +(define_insn "one_cmplhi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (not:HI (match_operand:HI 1 "register_operand" "0")))] + "" + "com %0 + com %B0" + [(set_attr "length" "2") + (set_attr "cc" "set_n")]) + +(define_insn "one_cmplpsi2" + [(set (match_operand:PSI 0 "register_operand" "=r") + (not:PSI (match_operand:PSI 1 "register_operand" "0")))] + "" + "com %0\;com %B0\;com %C0" + [(set_attr "length" "3") + (set_attr "cc" "set_n")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "0")))] + "" + "com %0 + com %B0 + com %C0 + com %D0" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x +;; sign extend + +;; We keep combiner from inserting hard registers into the input of sign- and +;; zero-extends. A hard register in the input operand is not wanted because +;; 32-bit multiply patterns clobber some hard registers and extends with a +;; hard register that overlaps these clobbers won't be combined to a widening +;; multiplication. There is no need for combine to propagate hard registers, +;; register allocation can do it just as well. + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))] + "" + "@ + clr %B0\;sbrc %0,7\;com %B0 + mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0" + [(set_attr "length" "3,4") + (set_attr "cc" "set_n,set_n")]) + +(define_insn "extendqipsi2" + [(set (match_operand:PSI 0 "register_operand" "=r,r") + (sign_extend:PSI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))] + "" + "@ + clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0 + mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0" + [(set_attr "length" "4,5") + (set_attr "cc" "set_n,set_n")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))] + "" + "@ + clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0 + mov %A0,%A1\;clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0" + [(set_attr "length" "5,6") + (set_attr "cc" "set_n,set_n")]) + +(define_insn "extendhipsi2" + [(set (match_operand:PSI 0 "register_operand" "=r,r ,r") + (sign_extend:PSI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r,*r")))] + "" + "@ + clr %C0\;sbrc %B0,7\;com %C0 + mov %A0,%A1\;mov %B0,%B1\;clr %C0\;sbrc %B0,7\;com %C0 + movw %A0,%A1\;clr %C0\;sbrc %B0,7\;com %C0" + [(set_attr "length" "3,5,4") + (set_attr "isa" "*,mov,movw") + (set_attr "cc" "set_n")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r ,r") + (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r,*r")))] + "" + "@ + clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0 + mov %A0,%A1\;mov %B0,%B1\;clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0 + movw %A0,%A1\;clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0" + [(set_attr "length" "4,6,5") + (set_attr "isa" "*,mov,movw") + (set_attr "cc" "set_n")]) + +(define_insn "extendpsisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:PSI 1 "combine_pseudo_register_operand" "0")))] + "" + "clr %D0\;sbrc %C0,7\;com %D0" + [(set_attr "length" "3") + (set_attr "cc" "set_n")]) + +;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x +;; zero extend + +(define_insn_and_split "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] + { + unsigned int low_off = subreg_lowpart_offset (QImode, HImode); + unsigned int high_off = subreg_highpart_offset (QImode, HImode); + + operands[2] = simplify_gen_subreg (QImode, operands[0], HImode, low_off); + operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, high_off); + }) + +(define_insn_and_split "zero_extendqipsi2" + [(set (match_operand:PSI 0 "register_operand" "=r") + (zero_extend:PSI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0)) + (set (match_dup 4) (const_int 0))] + { + operands[2] = simplify_gen_subreg (QImode, operands[0], PSImode, 0); + operands[3] = simplify_gen_subreg (QImode, operands[0], PSImode, 1); + operands[4] = simplify_gen_subreg (QImode, operands[0], PSImode, 2); + }) + +(define_insn_and_split "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (zero_extend:HI (match_dup 1))) + (set (match_dup 3) (const_int 0))] + { + unsigned int low_off = subreg_lowpart_offset (HImode, SImode); + unsigned int high_off = subreg_highpart_offset (HImode, SImode); + + operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off); + operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off); + }) + +(define_insn_and_split "zero_extendhipsi2" + [(set (match_operand:PSI 0 "register_operand" "=r") + (zero_extend:PSI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] + { + operands[2] = simplify_gen_subreg (HImode, operands[0], PSImode, 0); + operands[3] = simplify_gen_subreg (QImode, operands[0], PSImode, 2); + }) + +(define_insn_and_split "n_extendhipsi2" + [(set (match_operand:PSI 0 "register_operand" "=r,r,d,r") + (lo_sum:PSI (match_operand:QI 1 "const_int_operand" "L,P,n,n") + (match_operand:HI 2 "register_operand" "r,r,r,r"))) + (clobber (match_scratch:QI 3 "=X,X,X,&d"))] + "" + "#" + "reload_completed" + [(set (match_dup 4) (match_dup 2)) + (set (match_dup 3) (match_dup 6)) + ; no-op move in the case where no scratch is needed + (set (match_dup 5) (match_dup 3))] + { + operands[4] = simplify_gen_subreg (HImode, operands[0], PSImode, 0); + operands[5] = simplify_gen_subreg (QImode, operands[0], PSImode, 2); + operands[6] = operands[1]; + + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = operands[5]; + }) + +(define_insn_and_split "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] + { + unsigned int low_off = subreg_lowpart_offset (HImode, SImode); + unsigned int high_off = subreg_highpart_offset (HImode, SImode); + + operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, low_off); + operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, high_off); + }) + +(define_insn_and_split "zero_extendpsisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:PSI 1 "combine_pseudo_register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] + { + operands[2] = simplify_gen_subreg (PSImode, operands[0], SImode, 0); + operands[3] = simplify_gen_subreg (QImode, operands[0], SImode, 3); + }) + +(define_insn_and_split "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (zero_extend:SI (match_dup 1))) + (set (match_dup 3) (const_int 0))] + { + unsigned int low_off = subreg_lowpart_offset (SImode, DImode); + unsigned int high_off = subreg_highpart_offset (SImode, DImode); + + operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off); + operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off); + }) + +(define_insn_and_split "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (zero_extend:SI (match_dup 1))) + (set (match_dup 3) (const_int 0))] + { + unsigned int low_off = subreg_lowpart_offset (SImode, DImode); + unsigned int high_off = subreg_highpart_offset (SImode, DImode); + + operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off); + operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off); + }) + +(define_insn_and_split "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "register_operand" "r")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 3) (const_int 0))] + { + unsigned int low_off = subreg_lowpart_offset (SImode, DImode); + unsigned int high_off = subreg_highpart_offset (SImode, DImode); + + operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, low_off); + operands[3] = simplify_gen_subreg (SImode, operands[0], DImode, high_off); + }) + +;;<=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=><=> +;; compare + +; Optimize negated tests into reverse compare if overflow is undefined. +(define_insn "*negated_tstqi" + [(set (cc0) + (compare (neg:QI (match_operand:QI 0 "register_operand" "r")) + (const_int 0)))] + "!flag_wrapv && !flag_trapv && flag_strict_overflow" + "cp __zero_reg__,%0" + [(set_attr "cc" "compare") + (set_attr "length" "1")]) + +(define_insn "*reversed_tstqi" + [(set (cc0) + (compare (const_int 0) + (match_operand:QI 0 "register_operand" "r")))] + "" + "cp __zero_reg__,%0" +[(set_attr "cc" "compare") + (set_attr "length" "2")]) + +(define_insn "*negated_tsthi" + [(set (cc0) + (compare (neg:HI (match_operand:HI 0 "register_operand" "r")) + (const_int 0)))] + "!flag_wrapv && !flag_trapv && flag_strict_overflow" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0" +[(set_attr "cc" "compare") + (set_attr "length" "2")]) + +;; Leave here the clobber used by the cmphi pattern for simplicity, even +;; though it is unused, because this pattern is synthesized by avr_reorg. +(define_insn "*reversed_tsthi" + [(set (cc0) + (compare (const_int 0) + (match_operand:HI 0 "register_operand" "r"))) + (clobber (match_scratch:QI 1 "=X"))] + "" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0" +[(set_attr "cc" "compare") + (set_attr "length" "2")]) + +(define_insn "*negated_tstpsi" + [(set (cc0) + (compare (neg:PSI (match_operand:PSI 0 "register_operand" "r")) + (const_int 0)))] + "!flag_wrapv && !flag_trapv && flag_strict_overflow" + "cp __zero_reg__,%A0\;cpc __zero_reg__,%B0\;cpc __zero_reg__,%C0" + [(set_attr "cc" "compare") + (set_attr "length" "3")]) + +(define_insn "*reversed_tstpsi" + [(set (cc0) + (compare (const_int 0) + (match_operand:PSI 0 "register_operand" "r"))) + (clobber (match_scratch:QI 1 "=X"))] + "" + "cp __zero_reg__,%A0\;cpc __zero_reg__,%B0\;cpc __zero_reg__,%C0" + [(set_attr "cc" "compare") + (set_attr "length" "3")]) + +(define_insn "*negated_tstsi" + [(set (cc0) + (compare (neg:SI (match_operand:SI 0 "register_operand" "r")) + (const_int 0)))] + "!flag_wrapv && !flag_trapv && flag_strict_overflow" + "cp __zero_reg__,%A0 + cpc __zero_reg__,%B0 + cpc __zero_reg__,%C0 + cpc __zero_reg__,%D0" + [(set_attr "cc" "compare") + (set_attr "length" "4")]) + +;; "*reversed_tstsi" +;; "*reversed_tstsq" "*reversed_tstusq" +;; "*reversed_tstsa" "*reversed_tstusa" +(define_insn "*reversed_tst" + [(set (cc0) + (compare (match_operand:ALL4 0 "const0_operand" "Y00") + (match_operand:ALL4 1 "register_operand" "r"))) + (clobber (match_scratch:QI 2 "=X"))] + "" + "cp __zero_reg__,%A1 + cpc __zero_reg__,%B1 + cpc __zero_reg__,%C1 + cpc __zero_reg__,%D1" + [(set_attr "cc" "compare") + (set_attr "length" "4")]) + + +;; "*cmpqi" +;; "*cmpqq" "*cmpuqq" +(define_insn "*cmp" + [(set (cc0) + (compare (match_operand:ALL1 0 "register_operand" "r ,r,d") + (match_operand:ALL1 1 "nonmemory_operand" "Y00,r,i")))] + "" + "@ + tst %0 + cp %0,%1 + cpi %0,lo8(%1)" + [(set_attr "cc" "compare,compare,compare") + (set_attr "length" "1,1,1")]) + +(define_insn "*cmpqi_sign_extend" + [(set (cc0) + (compare (sign_extend:HI (match_operand:QI 0 "register_operand" "d")) + (match_operand:HI 1 "s8_operand" "n")))] + "" + "cpi %0,lo8(%1)" + [(set_attr "cc" "compare") + (set_attr "length" "1")]) + +;; "*cmphi" +;; "*cmphq" "*cmpuhq" +;; "*cmpha" "*cmpuha" +(define_insn "*cmp" + [(set (cc0) + (compare (match_operand:ALL2 0 "register_operand" "!w ,r ,r,d ,r ,d,r") + (match_operand:ALL2 1 "nonmemory_operand" "Y00,Y00,r,s ,s ,M,n Ynn"))) + (clobber (match_scratch:QI 2 "=X ,X ,X,&d,&d ,X,&d"))] + "" + { + switch (which_alternative) + { + case 0: + case 1: + return avr_out_tsthi (insn, operands, NULL); + + case 2: + return "cp %A0,%A1\;cpc %B0,%B1"; + + case 3: + if (mode != HImode) + break; + return reg_unused_after (insn, operands[0]) + ? "subi %A0,lo8(%1)\;sbci %B0,hi8(%1)" + : "ldi %2,hi8(%1)\;cpi %A0,lo8(%1)\;cpc %B0,%2"; + + case 4: + if (mode != HImode) + break; + return "ldi %2,lo8(%1)\;cp %A0,%2\;ldi %2,hi8(%1)\;cpc %B0,%2"; + } + + return avr_out_compare (insn, operands, NULL); + } + [(set_attr "cc" "compare") + (set_attr "length" "1,2,2,3,4,2,4") + (set_attr "adjust_len" "tsthi,tsthi,*,*,*,compare,compare")]) + +(define_insn "*cmppsi" + [(set (cc0) + (compare (match_operand:PSI 0 "register_operand" "r,r,d ,r ,d,r") + (match_operand:PSI 1 "nonmemory_operand" "L,r,s ,s ,M,n"))) + (clobber (match_scratch:QI 2 "=X,X,&d,&d ,X,&d"))] + "" + { + switch (which_alternative) + { + case 0: + return avr_out_tstpsi (insn, operands, NULL); + + case 1: + return "cp %A0,%A1\;cpc %B0,%B1\;cpc %C0,%C1"; + + case 2: + return reg_unused_after (insn, operands[0]) + ? "subi %A0,lo8(%1)\;sbci %B0,hi8(%1)\;sbci %C0,hh8(%1)" + : "cpi %A0,lo8(%1)\;ldi %2,hi8(%1)\;cpc %B0,%2\;ldi %2,hh8(%1)\;cpc %C0,%2"; + + case 3: + return "ldi %2,lo8(%1)\;cp %A0,%2\;ldi %2,hi8(%1)\;cpc %B0,%2\;ldi %2,hh8(%1)\;cpc %C0,%2"; + } + + return avr_out_compare (insn, operands, NULL); + } + [(set_attr "cc" "compare") + (set_attr "length" "3,3,5,6,3,7") + (set_attr "adjust_len" "tstpsi,*,*,*,compare,compare")]) + +;; "*cmpsi" +;; "*cmpsq" "*cmpusq" +;; "*cmpsa" "*cmpusa" +(define_insn "*cmp" + [(set (cc0) + (compare (match_operand:ALL4 0 "register_operand" "r ,r ,d,r ,r") + (match_operand:ALL4 1 "nonmemory_operand" "Y00,r ,M,M ,n Ynn"))) + (clobber (match_scratch:QI 2 "=X ,X ,X,&d,&d"))] + "" + { + if (0 == which_alternative) + return avr_out_tstsi (insn, operands, NULL); + else if (1 == which_alternative) + return "cp %A0,%A1\;cpc %B0,%B1\;cpc %C0,%C1\;cpc %D0,%D1"; + + return avr_out_compare (insn, operands, NULL); + } + [(set_attr "cc" "compare") + (set_attr "length" "4,4,4,5,8") + (set_attr "adjust_len" "tstsi,*,compare,compare,compare")]) + + +;; ---------------------------------------------------------------------- +;; JUMP INSTRUCTIONS +;; ---------------------------------------------------------------------- +;; Conditional jump instructions + +;; "cbranchqi4" +;; "cbranchqq4" "cbranchuqq4" +(define_expand "cbranch4" + [(set (cc0) + (compare (match_operand:ALL1 1 "register_operand" "") + (match_operand:ALL1 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))]) + +;; "cbranchhi4" "cbranchhq4" "cbranchuhq4" "cbranchha4" "cbranchuha4" +;; "cbranchsi4" "cbranchsq4" "cbranchusq4" "cbranchsa4" "cbranchusa4" +;; "cbranchpsi4" +(define_expand "cbranch4" + [(parallel [(set (cc0) + (compare (match_operand:ORDERED234 1 "register_operand" "") + (match_operand:ORDERED234 2 "nonmemory_operand" ""))) + (clobber (match_scratch:QI 4 ""))]) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(cc0) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))]) + + +;; Test a single bit in a QI/HI/SImode register. +;; Combine will create zero extract patterns for single bit tests. +;; permit any mode in source pattern by using VOIDmode. + +(define_insn "*sbrx_branch" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(zero_extract:QIDI + (match_operand:VOID 1 "register_operand" "r") + (const_int 1) + (match_operand 2 "const_int_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + { + return avr_out_sbxx_branch (insn, operands); + } + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2046))) + (const_int 2) + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Same test based on bitwise AND. Keep this in case gcc changes patterns. +;; or for old peepholes. +;; Fixme - bitwise Mask will not work for DImode + +(define_insn "*sbrx_and_branch" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(and:QISI + (match_operand:QISI 1 "register_operand" "r") + (match_operand:QISI 2 "single_one_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + { + HOST_WIDE_INT bitnumber; + bitnumber = exact_log2 (GET_MODE_MASK (mode) & INTVAL (operands[2])); + operands[2] = GEN_INT (bitnumber); + return avr_out_sbxx_branch (insn, operands); + } + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2046))) + (const_int 2) + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Convert sign tests to bit 7/15/31 tests that match the above insns. +(define_peephole2 + [(set (cc0) (compare (match_operand:QI 0 "register_operand" "") + (const_int 0))) + (set (pc) (if_then_else (ge (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (eq (zero_extract:HI (match_dup 0) + (const_int 1) + (const_int 7)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))]) + +(define_peephole2 + [(set (cc0) (compare (match_operand:QI 0 "register_operand" "") + (const_int 0))) + (set (pc) (if_then_else (lt (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (ne (zero_extract:HI (match_dup 0) + (const_int 1) + (const_int 7)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))]) + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:HI 2 ""))]) + (set (pc) (if_then_else (ge (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (eq (and:HI (match_dup 0) (const_int -32768)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))]) + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:HI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:HI 2 ""))]) + (set (pc) (if_then_else (lt (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (ne (and:HI (match_dup 0) (const_int -32768)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))]) + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:SI 2 ""))]) + (set (pc) (if_then_else (ge (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (eq (and:SI (match_dup 0) (match_dup 2)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "operands[2] = gen_int_mode (-2147483647 - 1, SImode);") + +(define_peephole2 + [(parallel [(set (cc0) (compare (match_operand:SI 0 "register_operand" "") + (const_int 0))) + (clobber (match_operand:SI 2 ""))]) + (set (pc) (if_then_else (lt (cc0) (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) (if_then_else (ne (and:SI (match_dup 0) (match_dup 2)) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "operands[2] = gen_int_mode (-2147483647 - 1, SImode);") + +;; ************************************************************************ +;; Implementation of conditional jumps here. +;; Compare with 0 (test) jumps +;; ************************************************************************ + +(define_insn "branch" + [(set (pc) + (if_then_else (match_operator 1 "simple_comparison_operator" + [(cc0) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + { + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0); + } + [(set_attr "type" "branch") + (set_attr "cc" "clobber")]) + + +;; Same as above but wrap SET_SRC so that this branch won't be transformed +;; or optimized in the remainder. + +(define_insn "branch_unspec" + [(set (pc) + (unspec [(if_then_else (match_operator 1 "simple_comparison_operator" + [(cc0) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)) + ] UNSPEC_IDENTITY))] + "" + { + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0); + } + [(set_attr "type" "branch") + (set_attr "cc" "none")]) + +;; **************************************************************** +;; AVR does not have following conditional jumps: LE,LEU,GT,GTU. +;; Convert them all to proper jumps. +;; ****************************************************************/ + +(define_insn "difficult_branch" + [(set (pc) + (if_then_else (match_operator 1 "difficult_comparison_operator" + [(cc0) + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + { + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0); + } + [(set_attr "type" "branch1") + (set_attr "cc" "clobber")]) + +;; revers branch + +(define_insn "rvbranch" + [(set (pc) + (if_then_else (match_operator 1 "simple_comparison_operator" + [(cc0) + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + { + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1); + } + [(set_attr "type" "branch1") + (set_attr "cc" "clobber")]) + +(define_insn "difficult_rvbranch" + [(set (pc) + (if_then_else (match_operator 1 "difficult_comparison_operator" + [(cc0) + (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + { + return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 1); + } + [(set_attr "type" "branch") + (set_attr "cc" "clobber")]) + +;; ************************************************************************** +;; Unconditional and other jump instructions. + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + { + return AVR_HAVE_JMP_CALL && get_attr_length (insn) != 1 + ? "jmp %x0" + : "rjmp %x0"; + } + [(set (attr "length") + (if_then_else (match_operand 0 "symbol_ref_operand" "") + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 1) + (const_int 2)) + (if_then_else (and (ge (minus (pc) (match_dup 0)) (const_int -2047)) + (le (minus (pc) (match_dup 0)) (const_int 2047))) + (const_int 1) + (const_int 2)))) + (set_attr "cc" "none")]) + +;; call + +;; Operand 1 not used on the AVR. +;; Operand 2 is 1 for tail-call, 0 otherwise. +(define_expand "call" + [(parallel[(call (match_operand:HI 0 "call_insn_operand" "") + (match_operand:HI 1 "general_operand" "")) + (use (const_int 0))])]) + +;; Operand 1 not used on the AVR. +;; Operand 2 is 1 for tail-call, 0 otherwise. +(define_expand "sibcall" + [(parallel[(call (match_operand:HI 0 "call_insn_operand" "") + (match_operand:HI 1 "general_operand" "")) + (use (const_int 1))])]) + +;; call value + +;; Operand 2 not used on the AVR. +;; Operand 3 is 1 for tail-call, 0 otherwise. +(define_expand "call_value" + [(parallel[(set (match_operand 0 "register_operand" "") + (call (match_operand:HI 1 "call_insn_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (use (const_int 0))])]) + +;; Operand 2 not used on the AVR. +;; Operand 3 is 1 for tail-call, 0 otherwise. +(define_expand "sibcall_value" + [(parallel[(set (match_operand 0 "register_operand" "") + (call (match_operand:HI 1 "call_insn_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (use (const_int 1))])]) + +(define_insn "call_insn" + [(parallel[(call (mem:HI (match_operand:HI 0 "nonmemory_operand" "z,s,z,s")) + (match_operand:HI 1 "general_operand" "X,X,X,X")) + (use (match_operand:HI 2 "const_int_operand" "L,L,P,P"))])] + ;; Operand 1 not used on the AVR. + ;; Operand 2 is 1 for tail-call, 0 otherwise. + "" + "@ + %!icall + %~call %x0 + %!ijmp + %~jmp %x0" + [(set_attr "cc" "clobber") + (set_attr "length" "1,*,1,*") + (set_attr "adjust_len" "*,call,*,call")]) + +(define_insn "call_value_insn" + [(parallel[(set (match_operand 0 "register_operand" "=r,r,r,r") + (call (mem:HI (match_operand:HI 1 "nonmemory_operand" "z,s,z,s")) + (match_operand:HI 2 "general_operand" "X,X,X,X"))) + (use (match_operand:HI 3 "const_int_operand" "L,L,P,P"))])] + ;; Operand 2 not used on the AVR. + ;; Operand 3 is 1 for tail-call, 0 otherwise. + "" + "@ + %!icall + %~call %x1 + %!ijmp + %~jmp %x1" + [(set_attr "cc" "clobber") + (set_attr "length" "1,*,1,*") + (set_attr "adjust_len" "*,call,*,call")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +; indirect jump + +(define_expand "indirect_jump" + [(set (pc) + (match_operand:HI 0 "nonmemory_operand" ""))] + "" + { + if (!AVR_HAVE_JMP_CALL && !register_operand (operands[0], HImode)) + { + operands[0] = copy_to_mode_reg (HImode, operands[0]); + } + }) + +; indirect jump +(define_insn "*indirect_jump" + [(set (pc) + (match_operand:HI 0 "nonmemory_operand" "i,i,!z,*r,z"))] + "" + "@ + rjmp %x0 + jmp %x0 + ijmp + push %A0\;push %B0\;ret + eijmp" + [(set_attr "length" "1,2,1,3,1") + (set_attr "isa" "rjmp,jmp,ijmp,ijmp,eijmp") + (set_attr "cc" "none")]) + +;; table jump +;; For entries in jump table see avr_output_addr_vec_elt. + +;; Table made from +;; "rjmp .L" instructions for <= 8K devices +;; ".word gs(.L)" addresses for > 8K devices +(define_insn "*tablejump" + [(set (pc) + (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")] + UNSPEC_INDEX_JMP)) + (use (label_ref (match_operand 1 "" ""))) + (clobber (match_dup 0))] + "" + "@ + ijmp + push %A0\;push %B0\;ret + jmp __tablejump2__" + [(set_attr "length" "1,3,2") + (set_attr "isa" "rjmp,rjmp,jmp") + (set_attr "cc" "none,none,clobber")]) + + +(define_expand "casesi" + [(parallel [(set (match_dup 6) + (minus:HI (subreg:HI (match_operand:SI 0 "register_operand" "") 0) + (match_operand:HI 1 "register_operand" ""))) + (clobber (scratch:QI))]) + (parallel [(set (cc0) + (compare (match_dup 6) + (match_operand:HI 2 "register_operand" ""))) + (clobber (match_scratch:QI 9 ""))]) + + (set (pc) + (if_then_else (gtu (cc0) + (const_int 0)) + (label_ref (match_operand 4 "" "")) + (pc))) + + (set (match_dup 6) + (plus:HI (match_dup 6) (label_ref (match_operand:HI 3 "" "")))) + + (parallel [(set (pc) (unspec:HI [(match_dup 6)] UNSPEC_INDEX_JMP)) + (use (label_ref (match_dup 3))) + (clobber (match_dup 6))])] + "" + { + operands[6] = gen_reg_rtx (HImode); + }) + + +;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +;; This instruction sets Z flag + +(define_insn "sez" + [(set (cc0) (const_int 0))] + "" + "sez" + [(set_attr "length" "1") + (set_attr "cc" "compare")]) + +;; Clear/set/test a single bit in I/O address space. + +(define_insn "*cbi" + [(set (mem:QI (match_operand 0 "low_io_address_operand" "n")) + (and:QI (mem:QI (match_dup 0)) + (match_operand:QI 1 "single_zero_operand" "n")))] + "" + { + operands[2] = GEN_INT (exact_log2 (~INTVAL (operands[1]) & 0xff)); + return "cbi %i0,%2"; + } + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +(define_insn "*sbi" + [(set (mem:QI (match_operand 0 "low_io_address_operand" "n")) + (ior:QI (mem:QI (match_dup 0)) + (match_operand:QI 1 "single_one_operand" "n")))] + "" + { + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1]) & 0xff)); + return "sbi %i0,%2"; + } + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; Lower half of the I/O space - use sbic/sbis directly. +(define_insn "*sbix_branch" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(zero_extract:QIHI + (mem:QI (match_operand 1 "low_io_address_operand" "n")) + (const_int 1) + (match_operand 2 "const_int_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + { + return avr_out_sbxx_branch (insn, operands); + } + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2046))) + (const_int 2) + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Tests of bit 7 are pessimized to sign tests, so we need this too... +(define_insn "*sbix_branch_bit7" + [(set (pc) + (if_then_else + (match_operator 0 "gelt_operator" + [(mem:QI (match_operand 1 "low_io_address_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { + operands[3] = operands[2]; + operands[2] = GEN_INT (7); + return avr_out_sbxx_branch (insn, operands); + } + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046)) + (le (minus (pc) (match_dup 2)) (const_int 2046))) + (const_int 2) + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 2) + (const_int 4)))) + (set_attr "cc" "clobber")]) + +;; Upper half of the I/O space - read port to __tmp_reg__ and use sbrc/sbrs. +(define_insn "*sbix_branch_tmp" + [(set (pc) + (if_then_else + (match_operator 0 "eqne_operator" + [(zero_extract:QIHI + (mem:QI (match_operand 1 "high_io_address_operand" "n")) + (const_int 1) + (match_operand 2 "const_int_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + { + return avr_out_sbxx_branch (insn, operands); + } + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 3)) (const_int -2046)) + (le (minus (pc) (match_dup 3)) (const_int 2045))) + (const_int 3) + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 3) + (const_int 5)))) + (set_attr "cc" "clobber")]) + +(define_insn "*sbix_branch_tmp_bit7" + [(set (pc) + (if_then_else + (match_operator 0 "gelt_operator" + [(mem:QI (match_operand 1 "high_io_address_operand" "n")) + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { + operands[3] = operands[2]; + operands[2] = GEN_INT (7); + return avr_out_sbxx_branch (insn, operands); + } + [(set (attr "length") + (if_then_else (and (ge (minus (pc) (match_dup 2)) (const_int -2046)) + (le (minus (pc) (match_dup 2)) (const_int 2045))) + (const_int 3) + (if_then_else (match_test "!AVR_HAVE_JMP_CALL") + (const_int 3) + (const_int 5)))) + (set_attr "cc" "clobber")]) + +;; ************************* Peepholes ******************************** + +(define_peephole ; "*dec-and-branchsi!=-1.d.clobber" + [(parallel [(set (match_operand:SI 0 "d_register_operand" "") + (plus:SI (match_dup 0) + (const_int -1))) + (clobber (scratch:QI))]) + (parallel [(set (cc0) + (compare (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) + (set (pc) + (if_then_else (eqne (cc0) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { + const char *op; + int jump_mode; + CC_STATUS_INIT; + if (test_hard_reg_class (ADDW_REGS, operands[0])) + output_asm_insn ("sbiw %0,1" CR_TAB + "sbc %C0,__zero_reg__" CR_TAB + "sbc %D0,__zero_reg__", operands); + else + output_asm_insn ("subi %A0,1" CR_TAB + "sbc %B0,__zero_reg__" CR_TAB + "sbc %C0,__zero_reg__" CR_TAB + "sbc %D0,__zero_reg__", operands); + + jump_mode = avr_jump_mode (operands[2], insn); + op = ((EQ == ) ^ (jump_mode == 1)) ? "brcc" : "brcs"; + operands[1] = gen_rtx_CONST_STRING (VOIDmode, op); + + switch (jump_mode) + { + case 1: return "%1 %2"; + case 2: return "%1 .+2\;rjmp %2"; + case 3: return "%1 .+4\;jmp %2"; + } + + gcc_unreachable(); + return ""; + }) + +(define_peephole ; "*dec-and-branchhi!=-1" + [(set (match_operand:HI 0 "d_register_operand" "") + (plus:HI (match_dup 0) + (const_int -1))) + (parallel [(set (cc0) + (compare (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) + (set (pc) + (if_then_else (eqne (cc0) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { + const char *op; + int jump_mode; + CC_STATUS_INIT; + if (test_hard_reg_class (ADDW_REGS, operands[0])) + output_asm_insn ("sbiw %0,1", operands); + else + output_asm_insn ("subi %A0,1" CR_TAB + "sbc %B0,__zero_reg__", operands); + + jump_mode = avr_jump_mode (operands[2], insn); + op = ((EQ == ) ^ (jump_mode == 1)) ? "brcc" : "brcs"; + operands[1] = gen_rtx_CONST_STRING (VOIDmode, op); + + switch (jump_mode) + { + case 1: return "%1 %2"; + case 2: return "%1 .+2\;rjmp %2"; + case 3: return "%1 .+4\;jmp %2"; + } + + gcc_unreachable(); + return ""; + }) + +;; Same as above but with clobber flavour of addhi3 +(define_peephole ; "*dec-and-branchhi!=-1.d.clobber" + [(parallel [(set (match_operand:HI 0 "d_register_operand" "") + (plus:HI (match_dup 0) + (const_int -1))) + (clobber (scratch:QI))]) + (parallel [(set (cc0) + (compare (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) + (set (pc) + (if_then_else (eqne (cc0) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { + const char *op; + int jump_mode; + CC_STATUS_INIT; + if (test_hard_reg_class (ADDW_REGS, operands[0])) + output_asm_insn ("sbiw %0,1", operands); + else + output_asm_insn ("subi %A0,1" CR_TAB + "sbc %B0,__zero_reg__", operands); + + jump_mode = avr_jump_mode (operands[2], insn); + op = ((EQ == ) ^ (jump_mode == 1)) ? "brcc" : "brcs"; + operands[1] = gen_rtx_CONST_STRING (VOIDmode, op); + + switch (jump_mode) + { + case 1: return "%1 %2"; + case 2: return "%1 .+2\;rjmp %2"; + case 3: return "%1 .+4\;jmp %2"; + } + + gcc_unreachable(); + return ""; + }) + +;; Same as above but with clobber flavour of addhi3 +(define_peephole ; "*dec-and-branchhi!=-1.l.clobber" + [(parallel [(set (match_operand:HI 0 "l_register_operand" "") + (plus:HI (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 3 "d_register_operand" ""))]) + (parallel [(set (cc0) + (compare (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) + (set (pc) + (if_then_else (eqne (cc0) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + { + const char *op; + int jump_mode; + CC_STATUS_INIT; + output_asm_insn ("ldi %3,1" CR_TAB + "sub %A0,%3" CR_TAB + "sbc %B0,__zero_reg__", operands); + + jump_mode = avr_jump_mode (operands[2], insn); + op = ((EQ == ) ^ (jump_mode == 1)) ? "brcc" : "brcs"; + operands[1] = gen_rtx_CONST_STRING (VOIDmode, op); + + switch (jump_mode) + { + case 1: return "%1 %2"; + case 2: return "%1 .+2\;rjmp %2"; + case 3: return "%1 .+4\;jmp %2"; + } + + gcc_unreachable(); + return ""; + }) + +(define_peephole ; "*dec-and-branchqi!=-1" + [(set (match_operand:QI 0 "d_register_operand" "") + (plus:QI (match_dup 0) + (const_int -1))) + (set (cc0) + (compare (match_dup 0) + (const_int -1))) + (set (pc) + (if_then_else (eqne (cc0) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + { + const char *op; + int jump_mode; + CC_STATUS_INIT; + cc_status.value1 = operands[0]; + cc_status.flags |= CC_OVERFLOW_UNUSABLE; + + output_asm_insn ("subi %A0,1", operands); + + jump_mode = avr_jump_mode (operands[1], insn); + op = ((EQ == ) ^ (jump_mode == 1)) ? "brcc" : "brcs"; + operands[0] = gen_rtx_CONST_STRING (VOIDmode, op); + + switch (jump_mode) + { + case 1: return "%0 %1"; + case 2: return "%0 .+2\;rjmp %1"; + case 3: return "%0 .+4\;jmp %1"; + } + + gcc_unreachable(); + return ""; + }) + + +(define_peephole ; "*cpse.eq" + [(set (cc0) + (compare (match_operand:ALL1 1 "register_operand" "r,r") + (match_operand:ALL1 2 "reg_or_0_operand" "r,Y00"))) + (set (pc) + (if_then_else (eq (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "jump_over_one_insn_p (insn, operands[0])" + "@ + cpse %1,%2 + cpse %1,__zero_reg__") + +;; This peephole avoids code like +;; +;; TST Rn ; *cmpqi +;; BREQ .+2 ; branch +;; RJMP .Lm +;; +;; Notice that the peephole is always shorter than cmpqi + branch. +;; The reason to write it as peephole is that sequences like +;; +;; AND Rm, Rn +;; BRNE .La +;; +;; shall not be superseeded. With a respective combine pattern +;; the latter sequence would be +;; +;; AND Rm, Rn +;; CPSE Rm, __zero_reg__ +;; RJMP .La +;; +;; and thus longer and slower and not easy to be rolled back. + +(define_peephole ; "*cpse.ne" + [(set (cc0) + (compare (match_operand:ALL1 1 "register_operand" "") + (match_operand:ALL1 2 "reg_or_0_operand" ""))) + (set (pc) + (if_then_else (ne (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "!AVR_HAVE_JMP_CALL + || !avr_current_device->errata_skip" + { + if (operands[2] == CONST0_RTX (mode)) + operands[2] = zero_reg_rtx; + + return 3 == avr_jump_mode (operands[0], insn) + ? "cpse %1,%2\;jmp %0" + : "cpse %1,%2\;rjmp %0"; + }) + +;;pppppppppppppppppppppppppppppppppppppppppppppppppppp +;;prologue/epilogue support instructions + +(define_insn "popqi" + [(set (match_operand:QI 0 "register_operand" "=r") + (mem:QI (pre_inc:HI (reg:HI REG_SP))))] + "" + "pop %0" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +;; Enable Interrupts +(define_expand "enable_interrupt" + [(clobber (const_int 0))] + "" + { + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + emit_insn (gen_cli_sei (const1_rtx, mem)); + DONE; + }) + +;; Disable Interrupts +(define_expand "disable_interrupt" + [(clobber (const_int 0))] + "" + { + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + emit_insn (gen_cli_sei (const0_rtx, mem)); + DONE; + }) + +(define_insn "cli_sei" + [(unspec_volatile [(match_operand:QI 0 "const_int_operand" "L,P")] + UNSPECV_ENABLE_IRQS) + (set (match_operand:BLK 1 "" "") + (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))] + "" + "@ + cli + sei" + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; Library prologue saves +(define_insn "call_prologue_saves" + [(unspec_volatile:HI [(const_int 0)] UNSPECV_PROLOGUE_SAVES) + (match_operand:HI 0 "immediate_operand" "i,i") + (set (reg:HI REG_SP) + (minus:HI (reg:HI REG_SP) + (match_operand:HI 1 "immediate_operand" "i,i"))) + (use (reg:HI REG_X)) + (clobber (reg:HI REG_Z))] + "" + "ldi r30,lo8(gs(1f)) + ldi r31,hi8(gs(1f)) + %~jmp __prologue_saves__+((18 - %0) * 2) +1:" + [(set_attr "length" "5,6") + (set_attr "cc" "clobber") + (set_attr "isa" "rjmp,jmp")]) + +; epilogue restores using library +(define_insn "epilogue_restores" + [(unspec_volatile:QI [(const_int 0)] UNSPECV_EPILOGUE_RESTORES) + (set (reg:HI REG_Y) + (plus:HI (reg:HI REG_Y) + (match_operand:HI 0 "immediate_operand" "i,i"))) + (set (reg:HI REG_SP) + (plus:HI (reg:HI REG_Y) + (match_dup 0))) + (clobber (reg:QI REG_Z))] + "" + "ldi r30, lo8(%0) + %~jmp __epilogue_restores__ + ((18 - %0) * 2)" + [(set_attr "length" "2,3") + (set_attr "cc" "clobber") + (set_attr "isa" "rjmp,jmp")]) + +; return +(define_insn "return" + [(return)] + "reload_completed && avr_simple_epilogue ()" + "ret" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +(define_insn "return_from_epilogue" + [(return)] + "reload_completed + && cfun->machine + && !(cfun->machine->is_interrupt || cfun->machine->is_signal) + && !cfun->machine->is_naked" + "ret" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +(define_insn "return_from_interrupt_epilogue" + [(return)] + "reload_completed + && cfun->machine + && (cfun->machine->is_interrupt || cfun->machine->is_signal) + && !cfun->machine->is_naked" + "reti" + [(set_attr "cc" "none") + (set_attr "length" "1")]) + +(define_insn "return_from_naked_epilogue" + [(return)] + "reload_completed + && cfun->machine + && cfun->machine->is_naked" + "" + [(set_attr "cc" "none") + (set_attr "length" "0")]) + +(define_expand "prologue" + [(const_int 0)] + "" + { + avr_expand_prologue (); + DONE; + }) + +(define_expand "epilogue" + [(const_int 0)] + "" + { + avr_expand_epilogue (false /* sibcall_p */); + DONE; + }) + +(define_expand "sibcall_epilogue" + [(const_int 0)] + "" + { + avr_expand_epilogue (true /* sibcall_p */); + DONE; + }) + +;; Some instructions resp. instruction sequences available +;; via builtins. + +(define_insn "delay_cycles_1" + [(unspec_volatile [(match_operand:QI 0 "const_int_operand" "n") + (const_int 1)] + UNSPECV_DELAY_CYCLES) + (set (match_operand:BLK 1 "" "") + (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER)) + (clobber (match_scratch:QI 2 "=&d"))] + "" + "ldi %2,lo8(%0) + 1: dec %2 + brne 1b" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "delay_cycles_2" + [(unspec_volatile [(match_operand:HI 0 "const_int_operand" "n") + (const_int 2)] + UNSPECV_DELAY_CYCLES) + (set (match_operand:BLK 1 "" "") + (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER)) + (clobber (match_scratch:HI 2 "=&w"))] + "" + "ldi %A2,lo8(%0) + ldi %B2,hi8(%0) + 1: sbiw %A2,1 + brne 1b" + [(set_attr "length" "4") + (set_attr "cc" "clobber")]) + +(define_insn "delay_cycles_3" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n") + (const_int 3)] + UNSPECV_DELAY_CYCLES) + (set (match_operand:BLK 1 "" "") + (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER)) + (clobber (match_scratch:QI 2 "=&d")) + (clobber (match_scratch:QI 3 "=&d")) + (clobber (match_scratch:QI 4 "=&d"))] + "" + "ldi %2,lo8(%0) + ldi %3,hi8(%0) + ldi %4,hlo8(%0) + 1: subi %2,1 + sbci %3,0 + sbci %4,0 + brne 1b" + [(set_attr "length" "7") + (set_attr "cc" "clobber")]) + +(define_insn "delay_cycles_4" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n") + (const_int 4)] + UNSPECV_DELAY_CYCLES) + (set (match_operand:BLK 1 "" "") + (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER)) + (clobber (match_scratch:QI 2 "=&d")) + (clobber (match_scratch:QI 3 "=&d")) + (clobber (match_scratch:QI 4 "=&d")) + (clobber (match_scratch:QI 5 "=&d"))] + "" + "ldi %2,lo8(%0) + ldi %3,hi8(%0) + ldi %4,hlo8(%0) + ldi %5,hhi8(%0) + 1: subi %2,1 + sbci %3,0 + sbci %4,0 + sbci %5,0 + brne 1b" + [(set_attr "length" "9") + (set_attr "cc" "clobber")]) + + +;; __builtin_avr_insert_bits + +(define_insn "insert_bits" + [(set (match_operand:QI 0 "register_operand" "=r ,d ,r") + (unspec:QI [(match_operand:SI 1 "const_int_operand" "C0f,Cxf,C0f") + (match_operand:QI 2 "register_operand" "r ,r ,r") + (match_operand:QI 3 "nonmemory_operand" "n ,0 ,0")] + UNSPEC_INSERT_BITS))] + "" + { + return avr_out_insert_bits (operands, NULL); + } + [(set_attr "adjust_len" "insert_bits") + (set_attr "cc" "clobber")]) + + +;; __builtin_avr_flash_segment + +;; Just a helper for the next "official" expander. + +(define_expand "flash_segment1" + [(set (match_operand:QI 0 "register_operand" "") + (subreg:QI (match_operand:PSI 1 "register_operand" "") + 2)) + (set (cc0) + (compare (match_dup 0) + (const_int 0))) + (set (pc) + (if_then_else (ge (cc0) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc))) + (set (match_dup 0) + (const_int -1))]) + +(define_expand "flash_segment" + [(parallel [(match_operand:QI 0 "register_operand" "") + (match_operand:PSI 1 "register_operand" "")])] + "" + { + rtx label = gen_label_rtx (); + emit (gen_flash_segment1 (operands[0], operands[1], label)); + emit_label (label); + DONE; + }) + +;; Actually, it's too late now to work out address spaces known at compiletime. +;; Best place would be to fold ADDR_SPACE_CONVERT_EXPR in avr_fold_builtin. +;; However, avr_addr_space_convert can add some built-in knowledge for PSTR +;; so that ADDR_SPACE_CONVERT_EXPR in the built-in must not be resolved. + +(define_insn_and_split "*split.flash_segment" + [(set (match_operand:QI 0 "register_operand" "=d") + (subreg:QI (lo_sum:PSI (match_operand:QI 1 "nonmemory_operand" "ri") + (match_operand:HI 2 "register_operand" "r")) + 2))] + "" + { gcc_unreachable(); } + "" + [(set (match_dup 0) + (match_dup 1))]) + + +;; Parity + +;; Postpone expansion of 16-bit parity to libgcc call until after combine for +;; better 8-bit parity recognition. + +(define_expand "parityhi2" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (parity:HI (match_operand:HI 1 "register_operand" ""))) + (clobber (reg:HI 24))])]) + +(define_insn_and_split "*parityhi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (parity:HI (match_operand:HI 1 "register_operand" "r"))) + (clobber (reg:HI 24))] + "!reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:HI 24) + (match_dup 1)) + (set (reg:HI 24) + (parity:HI (reg:HI 24))) + (set (match_dup 0) + (reg:HI 24))]) + +(define_insn_and_split "*parityqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (parity:HI (match_operand:QI 1 "register_operand" "r"))) + (clobber (reg:HI 24))] + "!reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:QI 24) + (match_dup 1)) + (set (reg:HI 24) + (zero_extend:HI (parity:QI (reg:QI 24)))) + (set (match_dup 0) + (reg:HI 24))]) + +(define_expand "paritysi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:HI 24) + (truncate:HI (parity:SI (reg:SI 22)))) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*parityhi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:HI 24)))] + "" + "%~call __parityhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*parityqihi2.libgcc" + [(set (reg:HI 24) + (zero_extend:HI (parity:QI (reg:QI 24))))] + "" + "%~call __parityqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*paritysihi2.libgcc" + [(set (reg:HI 24) + (truncate:HI (parity:SI (reg:SI 22))))] + "" + "%~call __paritysi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; Popcount + +(define_expand "popcounthi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "popcountsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:HI 24) + (truncate:HI (popcount:SI (reg:SI 22)))) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*popcounthi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:HI 24)))] + "" + "%~call __popcounthi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountsi2.libgcc" + [(set (reg:HI 24) + (truncate:HI (popcount:SI (reg:SI 22))))] + "" + "%~call __popcountsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountqi2.libgcc" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24)))] + "" + "%~call __popcountqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*popcountqihi2.libgcc" + [(set (reg:HI 24) + (zero_extend:HI (popcount:QI (reg:QI 24))))] + "" + "#" + "" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24))) + (set (reg:QI 25) + (const_int 0))]) + +;; Count Leading Zeros + +(define_expand "clzhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (clz:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))]) + +(define_expand "clzsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (truncate:HI (clz:SI (reg:SI 22)))) + (clobber (reg:QI 26))]) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*clzhi2.libgcc" + [(set (reg:HI 24) + (clz:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __clzhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*clzsihi2.libgcc" + [(set (reg:HI 24) + (truncate:HI (clz:SI (reg:SI 22)))) + (clobber (reg:QI 26))] + "" + "%~call __clzsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Count Trailing Zeros + +(define_expand "ctzhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ctz:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))]) + +(define_expand "ctzsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (truncate:HI (ctz:SI (reg:SI 22)))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))]) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*ctzhi2.libgcc" + [(set (reg:HI 24) + (ctz:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __ctzhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*ctzsihi2.libgcc" + [(set (reg:HI 24) + (truncate:HI (ctz:SI (reg:SI 22)))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))] + "" + "%~call __ctzsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Find First Set + +(define_expand "ffshi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (ffs:HI (reg:HI 24))) + (clobber (reg:QI 26))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))]) + +(define_expand "ffssi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (reg:HI 24) + (truncate:HI (ffs:SI (reg:SI 22)))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))]) + (set (match_dup 2) + (reg:HI 24)) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_dup 2)))] + "" + { + operands[2] = gen_reg_rtx (HImode); + }) + +(define_insn "*ffshi2.libgcc" + [(set (reg:HI 24) + (ffs:HI (reg:HI 24))) + (clobber (reg:QI 26))] + "" + "%~call __ffshi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*ffssihi2.libgcc" + [(set (reg:HI 24) + (truncate:HI (ffs:SI (reg:SI 22)))) + (clobber (reg:QI 22)) + (clobber (reg:QI 26))] + "" + "%~call __ffssi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; Copysign + +(define_insn "copysignsf3" + [(set (match_operand:SF 0 "register_operand" "=r") + (unspec:SF [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "register_operand" "r")] + UNSPEC_COPYSIGN))] + "" + "bst %D2,7\;bld %D0,7" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Swap Bytes (change byte-endianess) + +(define_expand "bswapsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (set (reg:SI 22) + (bswap:SI (reg:SI 22))) + (set (match_operand:SI 0 "register_operand" "") + (reg:SI 22))]) + +(define_insn "*bswapsi2.libgcc" + [(set (reg:SI 22) + (bswap:SI (reg:SI 22)))] + "" + "%~call __bswapsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; CPU instructions + +;; NOP taking 1 or 2 Ticks +(define_expand "nopv" + [(parallel [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] + UNSPECV_NOP) + (set (match_dup 1) + (unspec_volatile:BLK [(match_dup 1)] + UNSPECV_MEMORY_BARRIER))])] + "" + { + operands[1] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[1]) = 1; + }) + +(define_insn "*nopv" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "P,K")] + UNSPECV_NOP) + (set (match_operand:BLK 1 "" "") + (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))] + "" + "@ + nop + rjmp ." + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; SLEEP +(define_expand "sleep" + [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_SLEEP) + (set (match_dup 0) + (unspec_volatile:BLK [(match_dup 0)] + UNSPECV_MEMORY_BARRIER))])] + "" + { + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; + }) + +(define_insn "*sleep" + [(unspec_volatile [(const_int 0)] UNSPECV_SLEEP) + (set (match_operand:BLK 0 "" "") + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMORY_BARRIER))] + "" + "sleep" + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; WDR +(define_expand "wdr" + [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_WDR) + (set (match_dup 0) + (unspec_volatile:BLK [(match_dup 0)] + UNSPECV_MEMORY_BARRIER))])] + "" + { + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; + }) + +(define_insn "*wdr" + [(unspec_volatile [(const_int 0)] UNSPECV_WDR) + (set (match_operand:BLK 0 "" "") + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMORY_BARRIER))] + "" + "wdr" + [(set_attr "length" "1") + (set_attr "cc" "none")]) + +;; FMUL +(define_expand "fmul" + [(set (reg:QI 24) + (match_operand:QI 1 "register_operand" "")) + (set (reg:QI 25) + (match_operand:QI 2 "register_operand" "")) + (parallel [(set (reg:HI 22) + (unspec:HI [(reg:QI 24) + (reg:QI 25)] UNSPEC_FMUL)) + (clobber (reg:HI 24))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 22))] + "" + { + if (AVR_HAVE_MUL) + { + emit_insn (gen_fmul_insn (operand0, operand1, operand2)); + DONE; + } + }) + +(define_insn "fmul_insn" + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI [(match_operand:QI 1 "register_operand" "a") + (match_operand:QI 2 "register_operand" "a")] + UNSPEC_FMUL))] + "AVR_HAVE_MUL" + "fmul %1,%2 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "*fmul.call" + [(set (reg:HI 22) + (unspec:HI [(reg:QI 24) + (reg:QI 25)] UNSPEC_FMUL)) + (clobber (reg:HI 24))] + "!AVR_HAVE_MUL" + "%~call __fmul" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; FMULS +(define_expand "fmuls" + [(set (reg:QI 24) + (match_operand:QI 1 "register_operand" "")) + (set (reg:QI 25) + (match_operand:QI 2 "register_operand" "")) + (parallel [(set (reg:HI 22) + (unspec:HI [(reg:QI 24) + (reg:QI 25)] UNSPEC_FMULS)) + (clobber (reg:HI 24))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 22))] + "" + { + if (AVR_HAVE_MUL) + { + emit_insn (gen_fmuls_insn (operand0, operand1, operand2)); + DONE; + } + }) + +(define_insn "fmuls_insn" + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI [(match_operand:QI 1 "register_operand" "a") + (match_operand:QI 2 "register_operand" "a")] + UNSPEC_FMULS))] + "AVR_HAVE_MUL" + "fmuls %1,%2 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "*fmuls.call" + [(set (reg:HI 22) + (unspec:HI [(reg:QI 24) + (reg:QI 25)] UNSPEC_FMULS)) + (clobber (reg:HI 24))] + "!AVR_HAVE_MUL" + "%~call __fmuls" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +;; FMULSU +(define_expand "fmulsu" + [(set (reg:QI 24) + (match_operand:QI 1 "register_operand" "")) + (set (reg:QI 25) + (match_operand:QI 2 "register_operand" "")) + (parallel [(set (reg:HI 22) + (unspec:HI [(reg:QI 24) + (reg:QI 25)] UNSPEC_FMULSU)) + (clobber (reg:HI 24))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 22))] + "" + { + if (AVR_HAVE_MUL) + { + emit_insn (gen_fmulsu_insn (operand0, operand1, operand2)); + DONE; + } + }) + +(define_insn "fmulsu_insn" + [(set (match_operand:HI 0 "register_operand" "=r") + (unspec:HI [(match_operand:QI 1 "register_operand" "a") + (match_operand:QI 2 "register_operand" "a")] + UNSPEC_FMULSU))] + "AVR_HAVE_MUL" + "fmulsu %1,%2 + movw %0,r0 + clr __zero_reg__" + [(set_attr "length" "3") + (set_attr "cc" "clobber")]) + +(define_insn "*fmulsu.call" + [(set (reg:HI 22) + (unspec:HI [(reg:QI 24) + (reg:QI 25)] UNSPEC_FMULSU)) + (clobber (reg:HI 24))] + "!AVR_HAVE_MUL" + "%~call __fmulsu" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + + +;; Some combiner patterns dealing with bits. +;; See PR42210 + +;; Move bit $3.0 into bit $0.$4 +(define_insn "*movbitqi.1-6.a" + [(set (match_operand:QI 0 "register_operand" "=r") + (ior:QI (and:QI (match_operand:QI 1 "register_operand" "0") + (match_operand:QI 2 "single_zero_operand" "n")) + (and:QI (ashift:QI (match_operand:QI 3 "register_operand" "r") + (match_operand:QI 4 "const_0_to_7_operand" "n")) + (match_operand:QI 5 "single_one_operand" "n"))))] + "INTVAL(operands[4]) == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode)) + && INTVAL(operands[4]) == exact_log2 (INTVAL(operands[5]) & GET_MODE_MASK (QImode))" + "bst %3,0\;bld %0,%4" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Move bit $3.0 into bit $0.$4 +;; Variation of above. Unfortunately, there is no canonicalized representation +;; of moving around bits. So what we see here depends on how user writes down +;; bit manipulations. +(define_insn "*movbitqi.1-6.b" + [(set (match_operand:QI 0 "register_operand" "=r") + (ior:QI (and:QI (match_operand:QI 1 "register_operand" "0") + (match_operand:QI 2 "single_zero_operand" "n")) + (ashift:QI (and:QI (match_operand:QI 3 "register_operand" "r") + (const_int 1)) + (match_operand:QI 4 "const_0_to_7_operand" "n"))))] + "INTVAL(operands[4]) == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))" + "bst %3,0\;bld %0,%4" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Move bit $3.0 into bit $0.0. +;; For bit 0, combiner generates slightly different pattern. +(define_insn "*movbitqi.0" + [(set (match_operand:QI 0 "register_operand" "=r") + (ior:QI (and:QI (match_operand:QI 1 "register_operand" "0") + (match_operand:QI 2 "single_zero_operand" "n")) + (and:QI (match_operand:QI 3 "register_operand" "r") + (const_int 1))))] + "0 == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))" + "bst %3,0\;bld %0,0" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Move bit $2.0 into bit $0.7. +;; For bit 7, combiner generates slightly different pattern +(define_insn "*movbitqi.7" + [(set (match_operand:QI 0 "register_operand" "=r") + (ior:QI (and:QI (match_operand:QI 1 "register_operand" "0") + (const_int 127)) + (ashift:QI (match_operand:QI 2 "register_operand" "r") + (const_int 7))))] + "" + "bst %2,0\;bld %0,7" + [(set_attr "length" "2") + (set_attr "cc" "none")]) + +;; Combiner transforms above four pattern into ZERO_EXTRACT if it sees MEM +;; and input/output match. We provide a special pattern for this, because +;; in contrast to a IN/BST/BLD/OUT sequence we need less registers and the +;; operation on I/O is atomic. +(define_insn "*insv.io" + [(set (zero_extract:QI (mem:QI (match_operand 0 "low_io_address_operand" "n,n,n")) + (const_int 1) + (match_operand:QI 1 "const_0_to_7_operand" "n,n,n")) + (match_operand:QI 2 "nonmemory_operand" "L,P,r"))] + "" + "@ + cbi %i0,%1 + sbi %i0,%1 + sbrc %2,0\;sbi %i0,%1\;sbrs %2,0\;cbi %i0,%1" + [(set_attr "length" "1,1,4") + (set_attr "cc" "none")]) + +(define_insn "*insv.not.io" + [(set (zero_extract:QI (mem:QI (match_operand 0 "low_io_address_operand" "n")) + (const_int 1) + (match_operand:QI 1 "const_0_to_7_operand" "n")) + (not:QI (match_operand:QI 2 "register_operand" "r")))] + "" + "sbrs %2,0\;sbi %i0,%1\;sbrc %2,0\;cbi %i0,%1" + [(set_attr "length" "4") + (set_attr "cc" "none")]) + +;; The insv expander. +;; We only support 1-bit inserts +(define_expand "insv" + [(set (zero_extract:QI (match_operand:QI 0 "register_operand" "") + (match_operand:QI 1 "const1_operand" "") ; width + (match_operand:QI 2 "const_0_to_7_operand" "")) ; pos + (match_operand:QI 3 "nonmemory_operand" ""))] + "optimize") + +;; Insert bit $2.0 into $0.$1 +(define_insn "*insv.reg" + [(set (zero_extract:QI (match_operand:QI 0 "register_operand" "+r,d,d,l,l") + (const_int 1) + (match_operand:QI 1 "const_0_to_7_operand" "n,n,n,n,n")) + (match_operand:QI 2 "nonmemory_operand" "r,L,P,L,P"))] + "" + "@ + bst %2,0\;bld %0,%1 + andi %0,lo8(~(1<<%1)) + ori %0,lo8(1<<%1) + clt\;bld %0,%1 + set\;bld %0,%1" + [(set_attr "length" "2,1,1,2,2") + (set_attr "cc" "none,set_zn,set_zn,none,none")]) + + +;; Some combine patterns that try to fix bad code when a value is composed +;; from byte parts like in PR27663. +;; The patterns give some release but the code still is not optimal, +;; in particular when subreg lowering (-fsplit-wide-types) is turned on. +;; That switch obfuscates things here and in many other places. + +;; "*iorhiqi.byte0" "*iorpsiqi.byte0" "*iorsiqi.byte0" +;; "*xorhiqi.byte0" "*xorpsiqi.byte0" "*xorsiqi.byte0" +(define_insn_and_split "*qi.byte0" + [(set (match_operand:HISI 0 "register_operand" "=r") + (xior:HISI + (zero_extend:HISI (match_operand:QI 1 "register_operand" "r")) + (match_operand:HISI 2 "register_operand" "0")))] + "" + "#" + "reload_completed" + [(set (match_dup 3) + (xior:QI (match_dup 3) + (match_dup 1)))] + { + operands[3] = simplify_gen_subreg (QImode, operands[0], mode, 0); + }) + +;; "*iorhiqi.byte1-3" "*iorpsiqi.byte1-3" "*iorsiqi.byte1-3" +;; "*xorhiqi.byte1-3" "*xorpsiqi.byte1-3" "*xorsiqi.byte1-3" +(define_insn_and_split "*qi.byte1-3" + [(set (match_operand:HISI 0 "register_operand" "=r") + (xior:HISI + (ashift:HISI (zero_extend:HISI (match_operand:QI 1 "register_operand" "r")) + (match_operand:QI 2 "const_8_16_24_operand" "n")) + (match_operand:HISI 3 "register_operand" "0")))] + "INTVAL(operands[2]) < GET_MODE_BITSIZE (mode)" + "#" + "&& reload_completed" + [(set (match_dup 4) + (xior:QI (match_dup 4) + (match_dup 1)))] + { + int byteno = INTVAL(operands[2]) / BITS_PER_UNIT; + operands[4] = simplify_gen_subreg (QImode, operands[0], mode, byteno); + }) + +(define_expand "extzv" + [(set (match_operand:QI 0 "register_operand" "") + (zero_extract:QI (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "const1_operand" "") + (match_operand:QI 3 "const_0_to_7_operand" "")))]) + +(define_insn "*extzv" + [(set (match_operand:QI 0 "register_operand" "=*d,*d,*d,*d,r") + (zero_extract:QI (match_operand:QI 1 "register_operand" "0,r,0,0,r") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "L,L,P,C04,n")))] + "" + "@ + andi %0,1 + mov %0,%1\;andi %0,1 + lsr %0\;andi %0,1 + swap %0\;andi %0,1 + bst %1,%2\;clr %0\;bld %0,0" + [(set_attr "length" "1,2,2,2,3") + (set_attr "cc" "set_zn,set_zn,set_zn,set_zn,clobber")]) + +(define_insn_and_split "*extzv.qihi1" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extract:HI (match_operand:QI 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "n")))] + "" + "#" + "" + [(set (match_dup 3) + (zero_extract:QI (match_dup 1) + (const_int 1) + (match_dup 2))) + (set (match_dup 4) + (const_int 0))] + { + operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 0); + operands[4] = simplify_gen_subreg (QImode, operands[0], HImode, 1); + }) + +(define_insn_and_split "*extzv.qihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI + (zero_extract:QI (match_operand:QI 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const_0_to_7_operand" "n"))))] + "" + "#" + "" + [(set (match_dup 3) + (zero_extract:QI (match_dup 1) + (const_int 1) + (match_dup 2))) + (set (match_dup 4) + (const_int 0))] + { + operands[3] = simplify_gen_subreg (QImode, operands[0], HImode, 0); + operands[4] = simplify_gen_subreg (QImode, operands[0], HImode, 1); + }) + + +;; Fixed-point instructions +(include "avr-fixed.md") + +;; Operations on 64-bit registers +(include "avr-dimode.md") diff --git a/gcc-4.9/gcc/config/avr/avr.opt b/gcc-4.9/gcc/config/avr/avr.opt new file mode 100644 index 000000000..5be80aa2d --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avr.opt @@ -0,0 +1,84 @@ +; Options for the ATMEL AVR port of the compiler. + +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mcall-prologues +Target Report Mask(CALL_PROLOGUES) +Use subroutines for function prologues and epilogues + +mmcu= +Target RejectNegative Joined Var(avr_mcu_index) Init(0) Enum(avr_mcu) +-mmcu=MCU Select the target MCU + +mdeb +Target Report Undocumented Mask(ALL_DEBUG) + +mlog= +Target RejectNegative Joined Undocumented Var(avr_log_details) + +mint8 +Target Report Mask(INT8) +Use an 8-bit 'int' type + +mno-interrupts +Target Report RejectNegative Mask(NO_INTERRUPTS) +Change the stack pointer without disabling interrupts + +mbranch-cost= +Target Report Joined RejectNegative UInteger Var(avr_branch_cost) Init(0) +Set the branch costs for conditional branch instructions. Reasonable values are small, non-negative integers. The default branch cost is 0. + +morder1 +Target Report Undocumented Mask(ORDER_1) + +morder2 +Target Report Undocumented Mask(ORDER_2) + +mtiny-stack +Target Report Mask(TINY_STACK) +Change only the low 8 bits of the stack pointer + +mrelax +Target Report +Relax branches + +mpmem-wrap-around +Target Report +Make the linker relaxation machine assume that a program counter wrap-around occurs. + +maccumulate-args +Target Report Mask(ACCUMULATE_OUTGOING_ARGS) +Accumulate outgoing function arguments and acquire/release the needed stack space for outpoing function arguments in function prologue/epilogue. Without this option, outgoing arguments are pushed before calling a function and popped afterwards. This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf. + +mstrict-X +Target Report Var(avr_strict_X) Init(0) +When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X. + +;; For rationale behind -msp8 see explanation in avr.h. +msp8 +Target Report RejectNegative Var(avr_sp8) Init(0) +The device has no SPH special function register. This option will be overridden by the compiler driver with the correct setting if presence/absence of SPH can be deduced from -mmcu=MCU. + +Waddr-space-convert +Warning C Report Var(avr_warn_addr_space_convert) Init(0) +Warn if the address space of an address is changed. + +mfract-convert-truncate +Target Report Mask(FRACT_CONV_TRUNC) +Allow to use truncation instead of rounding towards 0 for fractional int types diff --git a/gcc-4.9/gcc/config/avr/avrlibc.h b/gcc-4.9/gcc/config/avr/avrlibc.h new file mode 100644 index 000000000..fee685b6a --- /dev/null +++ b/gcc-4.9/gcc/config/avr/avrlibc.h @@ -0,0 +1,30 @@ +/* Definitions of target machine for the GNU compiler collection + for Atmel AVR micro controller if configured for AVR-Libc. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by Georg-Johann Lay (avr@gjlay.de) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* AVR-Libc implements functions from libgcc.a in libm.a, see PR54461. */ + +#undef LIBGCC_SPEC +#define LIBGCC_SPEC \ + "%{!mmcu=at90s1*:%{!mmcu=attiny11:%{!mmcu=attiny12:%{!mmcu=attiny15:%{!mmcu=attiny28: -lgcc -lm }}}}}" + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "--start-group %G %L --end-group" diff --git a/gcc-4.9/gcc/config/avr/builtins.def b/gcc-4.9/gcc/config/avr/builtins.def new file mode 100644 index 000000000..affcbaa34 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/builtins.def @@ -0,0 +1,169 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* This file contains the definitions and documentation for the + builtins defined in the AVR part of the GNU compiler. + Befor including this file, define a macro + + DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME) + + NAME: `__builtin_avr_name' will be the user-level name of the builtin. + `AVR_BUILTIN_NAME' will be the internal builtin's id. + N_ARGS: Number of input arguments. If special treatment is needed, + set to -1 and handle it by hand, see avr.c:avr_expand_builtin(). + TYPE: A tree node describing the prototype of the built-in. + ICODE: Name of attached insn or expander. If special treatment in avr.c + is needed to expand the built-in, use `nothing'. + LIBNAME: Name of the attached implementation in libgcc which is used if + the builtin cannot be folded away and there is no insn. */ + +/* Mapped to respective instruction. */ + +DEF_BUILTIN (NOP, -1, void_ftype_void, nothing, NULL) +DEF_BUILTIN (SEI, 0, void_ftype_void, enable_interrupt, NULL) +DEF_BUILTIN (CLI, 0, void_ftype_void, disable_interrupt, NULL) +DEF_BUILTIN (WDR, 0, void_ftype_void, wdr, NULL) +DEF_BUILTIN (SLEEP, 0, void_ftype_void, sleep, NULL) + +/* Mapped to respective instruction but might also be folded away + or emit as libgcc call if ISA does not provide the instruction. */ + +DEF_BUILTIN (SWAP, 1, uchar_ftype_uchar, rotlqi3_4, NULL) +DEF_BUILTIN (FMUL, 2, uint_ftype_uchar_uchar, fmul, NULL) +DEF_BUILTIN (FMULS, 2, int_ftype_char_char, fmuls, NULL) +DEF_BUILTIN (FMULSU, 2, int_ftype_char_uchar, fmulsu, NULL) + +/* More complex stuff that cannot be mapped 1:1 to an instruction. */ + +DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_ulong, nothing, NULL) +DEF_BUILTIN (INSERT_BITS, 3, uchar_ftype_ulong_uchar_uchar, insert_bits, NULL) +DEF_BUILTIN (FLASH_SEGMENT, 1, char_ftype_const_memx_ptr, flash_segment, NULL) + +/* ISO/IEC TR 18037 "Embedded C" + The following builtins are undocumented and used by stdfix.h. */ + +/* 7.18a.6 The fixed-point intrinsic functions. */ + +/* 7.18a.6.2 The fixed-point absolute value functions. */ + +DEF_BUILTIN (ABSHR, 1, hr_ftype_hr, ssabsqq2, "__ssabs_1") +DEF_BUILTIN (ABSR, 1, nr_ftype_nr, ssabshq2, "__ssabs_2") +DEF_BUILTIN (ABSLR, 1, lr_ftype_lr, ssabssq2, "__ssabs_4") +DEF_BUILTIN (ABSLLR, -1, llr_ftype_llr, nothing, "__ssabsdq2") // GCC extension + +DEF_BUILTIN (ABSHK, 1, hk_ftype_hk, ssabsha2, "__ssabs_2") +DEF_BUILTIN (ABSK, 1, nk_ftype_nk, ssabssa2, "__ssabs_4") +DEF_BUILTIN (ABSLK, -1, lk_ftype_lk, nothing, "__ssabsda2") +DEF_BUILTIN (ABSLLK, -1, llk_ftype_llk, nothing, "__ssabsta2") // GCC extension + +/* 7.18a.6.3 The fixed-point round functions. */ + +DEF_BUILTIN (ROUNDHR, 2, hr_ftype_hr_int, roundqq3, "__roundhr") +DEF_BUILTIN (ROUNDR, 2, nr_ftype_nr_int, roundhq3, "__roundr") +DEF_BUILTIN (ROUNDLR, 2, lr_ftype_lr_int, roundsq3, "__roundlr") +DEF_BUILTIN (ROUNDLLR, -1, llr_ftype_llr_int, nothing, "__rounddq3") // GCC extension + +DEF_BUILTIN (ROUNDUHR, 2, uhr_ftype_uhr_int, rounduqq3, "__rounduhr") +DEF_BUILTIN (ROUNDUR, 2, unr_ftype_unr_int, rounduhq3, "__roundur") +DEF_BUILTIN (ROUNDULR, 2, ulr_ftype_ulr_int, roundusq3, "__roundulr") +DEF_BUILTIN (ROUNDULLR, -1, ullr_ftype_ullr_int, nothing, "__roundudq3") // GCC extension + +DEF_BUILTIN (ROUNDHK, 2, hk_ftype_hk_int, roundha3, "__roundhk") +DEF_BUILTIN (ROUNDK, 2, nk_ftype_nk_int, roundsa3, "__roundk") +DEF_BUILTIN (ROUNDLK, -1, lk_ftype_lk_int, nothing, "__roundda3") +DEF_BUILTIN (ROUNDLLK, -1, llk_ftype_llk_int, nothing, "__roundta3") // GCC extension + +DEF_BUILTIN (ROUNDUHK, 2, uhk_ftype_uhk_int, rounduha3, "__rounduhk") +DEF_BUILTIN (ROUNDUK, 2, unk_ftype_unk_int, roundusa3, "__rounduk") +DEF_BUILTIN (ROUNDULK, -1, ulk_ftype_ulk_int, nothing, "__rounduda3") +DEF_BUILTIN (ROUNDULLK, -1, ullk_ftype_ullk_int, nothing, "__rounduta3") // GCC extension + +/* 7.18a.6.4 The fixed-point bit countls functions. */ + +DEF_BUILTIN (COUNTLSHR, -1, int_ftype_hr, nothing, "__countlsqi2") +DEF_BUILTIN (COUNTLSR, -1, int_ftype_nr, nothing, "__countlshi2") +DEF_BUILTIN (COUNTLSLR, -1, int_ftype_lr, nothing, "__countlssi2") +DEF_BUILTIN (COUNTLSLLR, -1, int_ftype_llr, nothing, "__countlsdi2") // GCC extension + +DEF_BUILTIN (COUNTLSUHR, -1, int_ftype_uhr, nothing, "__countlsuqi2") +DEF_BUILTIN (COUNTLSUR, -1, int_ftype_unr, nothing, "__countlsuhi2") +DEF_BUILTIN (COUNTLSULR, -1, int_ftype_ulr, nothing, "__countlsusi2") +DEF_BUILTIN (COUNTLSULLR, -1, int_ftype_ullr, nothing, "__countlsudi2") // GCC extension + +DEF_BUILTIN (COUNTLSHK, -1, int_ftype_hk, nothing, "__countlshi2") +DEF_BUILTIN (COUNTLSK, -1, int_ftype_nk, nothing, "__countlssi2") +DEF_BUILTIN (COUNTLSLK, -1, int_ftype_lk, nothing, "__countlsdi2") +DEF_BUILTIN (COUNTLSLLK, -1, int_ftype_llk, nothing, "__countlsdi2") // GCC extension + +DEF_BUILTIN (COUNTLSUHK, -1, int_ftype_uhk, nothing, "__countlsuhi2") +DEF_BUILTIN (COUNTLSUK, -1, int_ftype_unk, nothing, "__countlsusi2") +DEF_BUILTIN (COUNTLSULK, -1, int_ftype_ulk, nothing, "__countlsudi2") +DEF_BUILTIN (COUNTLSULLK, -1, int_ftype_ullk, nothing, "__countlsudi2") // GCC extension + +/* 7.18a.6.5 The bitwise fixed-point to integer conversion functions. */ + +DEF_BUILTIN (BITSHR, -1, inthr_ftype_hr, nothing, "__ret") +DEF_BUILTIN (BITSR, -1, intnr_ftype_nr, nothing, "__ret") +DEF_BUILTIN (BITSLR, -1, intlr_ftype_lr, nothing, "__ret") +DEF_BUILTIN (BITSLLR, -1, intllr_ftype_llr, nothing, "__ret") // GCC extension + +DEF_BUILTIN (BITSUHR, -1, intuhr_ftype_uhr, nothing, "__ret") +DEF_BUILTIN (BITSUR, -1, intunr_ftype_unr, nothing, "__ret") +DEF_BUILTIN (BITSULR, -1, intulr_ftype_ulr, nothing, "__ret") +DEF_BUILTIN (BITSULLR, -1, intullr_ftype_ullr, nothing, "__ret") // GCC extension + +DEF_BUILTIN (BITSHK, -1, inthk_ftype_hk, nothing, "__ret") +DEF_BUILTIN (BITSK, -1, intnk_ftype_nk, nothing, "__ret") +DEF_BUILTIN (BITSLK, -1, intlk_ftype_lk, nothing, "__ret") +DEF_BUILTIN (BITSLLK, -1, intllk_ftype_llk, nothing, "__ret") // GCC extension + +DEF_BUILTIN (BITSUHK, -1, intuhk_ftype_uhk, nothing, "__ret") +DEF_BUILTIN (BITSUK, -1, intunk_ftype_unk, nothing, "__ret") +DEF_BUILTIN (BITSULK, -1, intulk_ftype_ulk, nothing, "__ret") +DEF_BUILTIN (BITSULLK, -1, intullk_ftype_ullk, nothing, "__ret") // GCC extension + + +/* 7.18a.6.6 The bitwise integer to fixed-point conversion functions. */ + +DEF_BUILTIN ( HRBITS, -1, hr_ftype_inthr, nothing, "__ret") +DEF_BUILTIN ( RBITS, -1, nr_ftype_intnr, nothing, "__ret") +DEF_BUILTIN ( LRBITS, -1, lr_ftype_intlr, nothing, "__ret") +DEF_BUILTIN ( LLRBITS, -1, llr_ftype_intllr, nothing, "__ret") // GCC extension + +DEF_BUILTIN ( UHRBITS, -1, uhr_ftype_intuhr, nothing, "__ret") +DEF_BUILTIN ( URBITS, -1, unr_ftype_intunr, nothing, "__ret") +DEF_BUILTIN ( ULRBITS, -1, ulr_ftype_intulr, nothing, "__ret") +DEF_BUILTIN (ULLRBITS, -1, ullr_ftype_intullr, nothing, "__ret") // GCC extension + +DEF_BUILTIN ( HKBITS, -1, hk_ftype_inthk, nothing, "__ret") +DEF_BUILTIN ( KBITS, -1, nk_ftype_intnk, nothing, "__ret") +DEF_BUILTIN ( LKBITS, -1, lk_ftype_intlk, nothing, "__ret") +DEF_BUILTIN ( LLKBITS, -1, llk_ftype_intllk, nothing, "__ret") // GCC extension + +DEF_BUILTIN ( UHKBITS, -1, uhk_ftype_intuhk, nothing, "__ret") +DEF_BUILTIN ( UKBITS, -1, unk_ftype_intunk, nothing, "__ret") +DEF_BUILTIN ( ULKBITS, -1, ulk_ftype_intulk, nothing, "__ret") +DEF_BUILTIN (ULLKBITS, -1, ullk_ftype_intullk, nothing, "__ret") // GCC extension + +/* Overloaded */ + +/* 7.18a.6.7 Type-generic fixed-point functions. */ + +DEF_BUILTIN (ABSFX, -1, void_ftype_void /* dummy */, nothing, NULL) +DEF_BUILTIN (ROUNDFX, -1, void_ftype_void /* dummy */, nothing, NULL) +DEF_BUILTIN (COUNTLSFX, -1, void_ftype_void /* dummy */, nothing, NULL) diff --git a/gcc-4.9/gcc/config/avr/constraints.md b/gcc-4.9/gcc/config/avr/constraints.md new file mode 100644 index 000000000..2f6e4ea1b --- /dev/null +++ b/gcc-4.9/gcc/config/avr/constraints.md @@ -0,0 +1,238 @@ +;; Constraint definitions for ATMEL AVR micro controllers. +;; Copyright (C) 2006-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints + +(define_register_constraint "t" "R0_REG" + "Temporary register r0") + +(define_register_constraint "b" "BASE_POINTER_REGS" + "Base pointer registers (r28--r31)") + +(define_register_constraint "e" "POINTER_REGS" + "Pointer registers (r26--r31)") + +(define_register_constraint "w" "ADDW_REGS" + "Registers from r24 to r31. These registers + can be used in @samp{adiw} command.") + +(define_register_constraint "d" "LD_REGS" + "Registers from r16 to r31.") + +(define_register_constraint "l" "NO_LD_REGS" + "Registers from r0 to r15.") + +(define_register_constraint "a" "SIMPLE_LD_REGS" + "Registers from r16 to r23.") + +(define_register_constraint "x" "POINTER_X_REGS" + "Register pair X (r27:r26).") + +(define_register_constraint "y" "POINTER_Y_REGS" + "Register pair Y (r29:r28).") + +(define_register_constraint "z" "POINTER_Z_REGS" + "Register pair Z (r31:r30).") + +(define_register_constraint "q" "STACK_REG" + "Stack pointer register (SPH:SPL).") + +(define_constraint "I" + "Integer constant in the range 0 @dots{} 63." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 63"))) + +(define_constraint "J" + "Integer constant in the range -63 @dots{} 0." + (and (match_code "const_int") + (match_test "ival <= 0 && ival >= -63"))) + +(define_constraint "K" + "Integer constant 2." + (and (match_code "const_int") + (match_test "ival == 2"))) + +(define_constraint "L" + "Zero." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "M" + "Integer constant in the range 0 @dots{} 0xff." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 0xff"))) + +(define_constraint "N" + "Constant integer @minus{}1." + (and (match_code "const_int") + (match_test "ival == -1"))) + +(define_constraint "O" + "Constant integer 8, 16, or 24." + (and (match_code "const_int") + (match_test "ival == 8 || ival == 16 || ival == 24"))) + +(define_constraint "P" + "Constant integer 1." + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "G" + "Constant float 0." + (and (match_code "const_double") + (match_test "op == CONST0_RTX (SFmode)"))) + +(define_memory_constraint "Q" + "A memory address based on Y or Z pointer with displacement." + (and (match_code "mem") + (match_test "extra_constraint_Q (op)"))) + +(define_constraint "Cm2" + "Constant integer @minus{}2." + (and (match_code "const_int") + (match_test "ival == -2"))) + +(define_constraint "C03" + "Constant integer 3." + (and (match_code "const_int") + (match_test "ival == 3"))) + +(define_constraint "C04" + "Constant integer 4." + (and (match_code "const_int") + (match_test "ival == 4"))) + +(define_constraint "C05" + "Constant integer 5." + (and (match_code "const_int") + (match_test "ival == 5"))) + +(define_constraint "C06" + "Constant integer 6." + (and (match_code "const_int") + (match_test "ival == 6"))) + +(define_constraint "C07" + "Constant integer 7." + (and (match_code "const_int") + (match_test "ival == 7"))) + +(define_constraint "Ca2" + "Constant 2-byte integer that allows AND without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 2, (1<<0) | (1<<7) | (1<<8))"))) + +(define_constraint "Ca3" + "Constant 3-byte integer that allows AND without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 3, (1<<0) | (1<<7) | (1<<8))"))) + +(define_constraint "Ca4" + "Constant 4-byte integer that allows AND without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<7) | (1<<8))"))) + +(define_constraint "Co2" + "Constant 2-byte integer that allows OR without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 2, (1<<0) | (1<<1) | (1<<8))"))) + +(define_constraint "Co3" + "Constant 3-byte integer that allows OR without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 3, (1<<0) | (1<<1) | (1<<8))"))) + +(define_constraint "Co4" + "Constant 4-byte integer that allows OR without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<1) | (1<<8))"))) + +(define_constraint "Cx2" + "Constant 2-byte integer that allows XOR without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 2, (1<<0) | (1<<8))"))) + +(define_constraint "Cx3" + "Constant 3-byte integer that allows XOR without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 3, (1<<0) | (1<<8))"))) + +(define_constraint "Cx4" + "Constant 4-byte integer that allows XOR without clobber register." + (and (match_code "const_int") + (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<8))"))) + +(define_constraint "Csp" + "Integer constant in the range -6 @dots{} 6." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -6, 6)"))) + +(define_constraint "Cxf" + "32-bit integer constant where at least one nibble is 0xf." + (and (match_code "const_int") + (match_test "avr_has_nibble_0xf (op)"))) + +(define_constraint "C0f" + "32-bit integer constant where no nibble equals 0xf." + (and (match_code "const_int") + (match_test "!avr_has_nibble_0xf (op)"))) + +;; CONST_FIXED is no element of 'n' so cook our own. +;; "i" or "s" would match but because the insn uses iterators that cover +;; INT_MODE, "i" or "s" is not always possible. + +(define_constraint "Ynn" + "Fixed-point constant known at compile time." + (match_code "const_fixed")) + +(define_constraint "Y00" + "Fixed-point or integer constant with bit representation 0x0" + (and (match_code "const_fixed,const_int") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) + +(define_constraint "Y01" + "Fixed-point or integer constant with bit representation 0x1" + (ior (and (match_code "const_fixed") + (match_test "1 == INTVAL (avr_to_int_mode (op))")) + (match_test "satisfies_constraint_P (op)"))) + +(define_constraint "Ym1" + "Fixed-point or integer constant with bit representation -0x1" + (ior (and (match_code "const_fixed") + (match_test "-1 == INTVAL (avr_to_int_mode (op))")) + (match_test "satisfies_constraint_N (op)"))) + +(define_constraint "Y02" + "Fixed-point or integer constant with bit representation 0x2" + (ior (and (match_code "const_fixed") + (match_test "2 == INTVAL (avr_to_int_mode (op))")) + (match_test "satisfies_constraint_K (op)"))) + +(define_constraint "Ym2" + "Fixed-point or integer constant with bit representation -0x2" + (ior (and (match_code "const_fixed") + (match_test "-2 == INTVAL (avr_to_int_mode (op))")) + (match_test "satisfies_constraint_Cm2 (op)"))) + +;; Similar to "IJ" used with ADIW/SBIW, but for CONST_FIXED. + +(define_constraint "YIJ" + "Fixed-point constant from @minus{}0x003f to 0x003f." + (and (match_code "const_fixed") + (match_test "IN_RANGE (INTVAL (avr_to_int_mode (op)), -63, 63)"))) diff --git a/gcc-4.9/gcc/config/avr/driver-avr.c b/gcc-4.9/gcc/config/avr/driver-avr.c new file mode 100644 index 000000000..cb5dd1d1d --- /dev/null +++ b/gcc-4.9/gcc/config/avr/driver-avr.c @@ -0,0 +1,150 @@ +/* Subroutines for the gcc driver. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by Anatoly Sokolov + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + +/* Current architecture. */ +const avr_arch_t *avr_current_arch = NULL; + +/* Current device. */ +const avr_mcu_t *avr_current_device = NULL; + +/* Initialize avr_current_arch and avr_current_device variables. */ + +static void +avr_set_current_device (const char *name) +{ + + if (NULL != avr_current_arch) + return; + + for (avr_current_device = avr_mcu_types; avr_current_device->name; + avr_current_device++) + { + if (strcmp (avr_current_device->name, name) == 0) + break; + } + + avr_current_arch = &avr_arch_types[avr_current_device->arch]; +} + +/* Returns command line parameters to pass to as. */ + +const char* +avr_device_to_as (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("-mmcu=", avr_current_arch->arch_name, + avr_current_device->errata_skip ? "" : " -mno-skip-bug", + NULL); +} + +/* Returns command line parameters to pass to ld. */ + +const char* +avr_device_to_ld (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("-m ", avr_current_arch->arch_name, NULL); +} + +/* Returns command line parameters that describe start of date section. */ + +const char * +avr_device_to_data_start (int argc, const char **argv) +{ + unsigned long data_section_start; + char data_section_start_str[16]; + + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + if (avr_current_device->data_section_start + == avr_current_arch->default_data_section_start) + return NULL; + + data_section_start = 0x800000 + avr_current_device->data_section_start; + + snprintf (data_section_start_str, sizeof(data_section_start_str) - 1, + "0x%lX", data_section_start); + + return concat ("-Tdata ", data_section_start_str, NULL); +} + +/* Returns command line parameters that describe the device startfile. */ + +const char * +avr_device_to_startfiles (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("crt", avr_current_device->library_name, ".o%s", NULL); +} + +/* Returns command line parameters that describe the device library. */ + +const char * +avr_device_to_devicelib (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + return concat ("-l", avr_current_device->library_name, NULL); +} + +const char* +avr_device_to_sp8 (int argc, const char **argv) +{ + if (0 == argc) + return NULL; + + avr_set_current_device (argv[0]); + + /* Leave "avr2" and "avr25" alone. These two architectures are + the only ones that mix devices with 8-bit SP and 16-bit SP. + -msp8 is set by mmultilib machinery. */ + + if (avr_current_device->macro == NULL + && (avr_current_device->arch == ARCH_AVR2 + || avr_current_device->arch == ARCH_AVR25)) + return ""; + + return avr_current_device->short_sp + ? "-msp8" + : "%. */ + + +/* Overriding some definitions from elfos.h for AVR. */ + +#undef PCC_BITFIELD_TYPE_MATTERS + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + +#undef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT (32768 * 8) + +#undef STRING_LIMIT +#define STRING_LIMIT ((unsigned) 64) + +/* Output alignment 2**1 for jump tables. */ +#undef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \ + ASM_OUTPUT_ALIGN (FILE, 1); + +/* Be conservative in crtstuff.c. */ +#undef INIT_SECTION_ASM_OP +#undef FINI_SECTION_ASM_OP diff --git a/gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c b/gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c new file mode 100644 index 000000000..ea3e6f1ba --- /dev/null +++ b/gcc-4.9/gcc/config/avr/gen-avr-mmcu-texi.c @@ -0,0 +1,144 @@ +/* Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by Georg-Johann Lay (avr@gjlay.de) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include +#include + +#define IN_GEN_AVR_MMCU_TEXI + +#include "avr-arch.h" +#include "avr-devices.c" + +static const char* +mcu_name[sizeof avr_mcu_types / sizeof avr_mcu_types[0]]; + +static int letter (char c) +{ + return c >= 'a' && c <= 'z'; +} + +static int digit (char c) +{ + return c >= '0' && c <= '9'; +} + +static int +comparator (const void *va, const void *vb) +{ + const char *a = *(const char* const*) va; + const char *b = *(const char* const*) vb; + + while (*a && *b) + { + /* Make letters smaller than digits so that `atmega16a' follows + `atmega16' without `atmega161' etc. between them. */ + + if (letter (*a) && digit (*b)) + return -1; + + if (digit (*a) && letter (*b)) + return 1; + + if (*a != *b) + return *a - *b; + + a++; + b++; + } + + return *a - *b; +} + +static void +print_mcus (size_t n_mcus) +{ + int duplicate = 0; + size_t i; + + if (!n_mcus) + return; + + qsort (mcu_name, n_mcus, sizeof (char*), comparator); + + printf ("@*@var{mcu}@tie{}="); + + for (i = 0; i < n_mcus; i++) + { + printf (" @code{%s}%s", mcu_name[i], i == n_mcus-1 ? ".\n\n" : ","); + + if (i && !strcmp (mcu_name[i], mcu_name[i-1])) + { + /* Sanity-check: Fail on devices that are present more than once. */ + + duplicate = 1; + fprintf (stderr, "error: duplicate device: %s\n", mcu_name[i]); + } + } + + if (duplicate) + exit (1); +} + +int main (void) +{ + enum avr_arch arch = ARCH_UNKNOWN; + size_t i, n_mcus = 0; + const avr_mcu_t *mcu; + + printf ("@c Copyright (C) 2012-2014 Free Software Foundation, Inc.\n"); + printf ("@c This is part of the GCC manual.\n"); + printf ("@c For copying conditions, see the file " + "gcc/doc/include/fdl.texi.\n\n"); + + printf ("@c This file is generated automatically using\n"); + printf ("@c gcc/config/avr/gen-avr-mmcu-texi.c from:\n"); + printf ("@c gcc/config/avr/avr-arch.h\n"); + printf ("@c gcc/config/avr/avr-devices.c\n"); + printf ("@c gcc/config/avr/avr-mcus.def\n\n"); + + printf ("@c Please do not edit manually.\n\n"); + + printf ("@table @code\n\n"); + + for (mcu = avr_mcu_types; mcu->name; mcu++) + { + if (mcu->macro == NULL) + { + arch = mcu->arch; + + /* Start a new architecture: Flush the MCUs collected so far. */ + + print_mcus (n_mcus); + n_mcus = 0; + + for (i = 0; i < sizeof (avr_texinfo) / sizeof (*avr_texinfo); i++) + if (arch == avr_texinfo[i].arch) + printf ("@item %s\n%s\n", mcu->name, avr_texinfo[i].texinfo); + } + else if (arch == (enum avr_arch) mcu->arch) + { + mcu_name[n_mcus++] = mcu->name; + } + } + + print_mcus (n_mcus); + printf ("@end table\n"); + + return EXIT_SUCCESS; +} diff --git a/gcc-4.9/gcc/config/avr/genmultilib.awk b/gcc-4.9/gcc/config/avr/genmultilib.awk new file mode 100644 index 000000000..90e5e5cfd --- /dev/null +++ b/gcc-4.9/gcc/config/avr/genmultilib.awk @@ -0,0 +1,216 @@ +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +################################################################## +# +# Transform Core/Device Information from avr-mcus.def to a +# Representation that is understood by GCC's multilib Machinery. +# +# The Script works as a Filter from STDIN to STDOUT. +# +# FORMAT = "Makefile": Generate Makefile Snipet that sets some +# MULTILIB_* Variables as needed. +# +################################################################## + +BEGIN { + FS ="[(, \t]+" + option[""] = "" + tiny_stack[""] = 1 + comment = 1 + n_mcu = 0 + n_cores = 0 + + mtiny[0] = "" + mtiny[1] = "tiny-stack" + option["tiny-stack"] = "msp8" +} + +################################################################## +# Add some Comments to the generated Files and copy-paste +# Copyright Notice from above. +################################################################## + +/^#/ { + if (!comment) + next + else if (comment == 1) + { + if (FORMAT == "Makefile") + { + print "# Auto-generated Makefile Snip" + print "# Generated by : ./gcc/config/avr/genmultilib.awk" + print "# Generated from : ./gcc/config/avr/avr-mcus.def" + print "# Used by : tmake_file from Makefile and genmultilib" + print "" + } + } + + comment = 2; + + print +} + +/^$/ { + # The first empty line stops copy-pasting the GPL comments + # from this file to the generated file. + + comment = 0 +} + +################################################################## +# Run over all AVR_MCU Lines and gather Information: +# cores[] : Enumerates the Cores (avr2, avr25, ...) +# mcu[] : Enumerates the Devices +# tiny_stack[]: Maps Core/Device to 0 (2-byte SP) or 1 (1-byte SP) +# option[] : Maps Core/Device to the mmcu= option to get it +# toCore[] : Maps Device to its Core +################################################################## + +/^AVR_MCU/ { + name = $2 + gsub ("\"", "", name) + + if ($4 == "NULL") + { + core = name + + # avr1 is supported for Assembler only: It gets no multilib + if (core == "avr1") + next + + cores[n_cores] = core + n_cores++ + tiny_stack[core] = 0 + option[core] = "mmcu=" core + + next + } + + # avr1 is supported for Assembler only: Its Devices are ignored + if (core == "avr1") + next + + tiny_stack[name] = $5 + mcu[n_mcu] = name + n_mcu++ + option[name] = "mmcu=" name + toCore[name] = core + + if (tiny_stack[name] == 1) + tiny_stack[core] = 1 +} + +################################################################## +# +# We gathered all the Information, now build/output the following: +# +# awk Variable target Variable FORMAT +# ----------------------------------------------------------- +# m_options <-> MULTILIB_OPTIONS Makefile +# m_dirnames <-> MULTILIB_DIRNAMES " +# m_exceptions <-> MULTILIB_EXCEPTIONS " +# m_matches <-> MULTILIB_MATCHES " +# +################################################################## + +END { + m_options = "\nMULTILIB_OPTIONS = " + m_dirnames = "\nMULTILIB_DIRNAMES =" + m_exceptions = "\nMULTILIB_EXCEPTIONS =" + m_matches = "\nMULTILIB_MATCHES =" + + ############################################################## + # Compose MULTILIB_OPTIONS. This represents the Cross-Product + # (avr2, avr25, ...) x msp8 + + sep = "" + for (c = 0; c < n_cores; c++) + { + m_options = m_options sep option[cores[c]] + sep = "/" + } + + # The ... x msp8 + m_options = m_options " " option[mtiny[1]] + + ############################################################## + # Map Device to its multilib + + for (t = 0; t < n_mcu; t++) + { + core = toCore[mcu[t]] + + line = option[core] ":" option[mcu[t]] + gsub ("=", "?", line) + gsub (":", "=", line) + + m_matches = m_matches " \\\n\t" line + } + + #################################################################### + # Compose MULTILIB_DIRNAMES and MULTILIB_EXEPTIONS + + n_mtiny = 2 + for (t = 0; t < n_mtiny; t++) + for (c = -1; c < n_cores; c++) + { + if (c == -1) + core = "" + else + core = cores[c] + + # The Directory Name for this multilib + + if (core != "" && mtiny[t] != "") + { + mdir = core "/" mtiny[t] + mopt = option[core] "/" option[mtiny[t]] + } + else + { + mdir = core mtiny[t] + mopt = option[core] option[mtiny[t]] + } + + if (core != "" && tiny_stack[core] == 0 && mtiny[t] != "") + { + # There's not a single SP = 8 Devices for this Core: + # Don't build respective multilib + m_exceptions = m_exceptions " \\\n\t" mopt + continue + } + + if (core != "avr2" || mtiny[t] == "") + m_dirnames = m_dirnames " " mdir + } + + ############################################################ + # Output that Stuff + ############################################################ + + if (FORMAT == "Makefile") + { + # Intended Target: ./gcc/config/avr/t-multilib + + print m_options + print m_dirnames + print m_exceptions + print m_matches + } +} diff --git a/gcc-4.9/gcc/config/avr/genopt.sh b/gcc-4.9/gcc/config/avr/genopt.sh new file mode 100755 index 000000000..9838ec25a --- /dev/null +++ b/gcc-4.9/gcc/config/avr/genopt.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# Generate avr-tables.opt from the list in avr-mcus.def. +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +cat <. + +Enum +Name(avr_mcu) Type(int) +Known MCU names: + +EOF + +awk -F'[(, ]+' 'BEGIN { + value = 0 +} +/^AVR_MCU/ { + name = $2 + gsub("\"", "", name) + print "EnumValue" + print "Enum(avr_mcu) String(" name ") Value(" value ")" + print "" + value++ +}' $1 diff --git a/gcc-4.9/gcc/config/avr/predicates.md b/gcc-4.9/gcc/config/avr/predicates.md new file mode 100644 index 000000000..85612e14a --- /dev/null +++ b/gcc-4.9/gcc/config/avr/predicates.md @@ -0,0 +1,275 @@ +;; Predicate definitions for ATMEL AVR micro controllers. +;; Copyright (C) 2006-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Registers from r0 to r15. +(define_predicate "l_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) <= 15"))) + +;; Registers from r16 to r31. +(define_predicate "d_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) >= 16 && REGNO (op) <= 31"))) + +(define_predicate "even_register_operand" + (and (match_code "reg") + (and (match_test "REGNO (op) <= 31") + (match_test "(REGNO (op) & 1) == 0")))) + +(define_predicate "odd_register_operand" + (and (match_code "reg") + (and (match_test "REGNO (op) <= 31") + (match_test "(REGNO (op) & 1) != 0")))) + +;; SP register. +(define_predicate "stack_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_SP"))) + +;; Return true if OP is a valid address for lower half of I/O space. +(define_predicate "low_io_address_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op) - avr_current_arch->sfr_offset, + 0, 0x1f)"))) + +;; Return true if OP is a valid address for high half of I/O space. +(define_predicate "high_io_address_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op) - avr_current_arch->sfr_offset, + 0x20, 0x3F)"))) + +;; Return true if OP is a valid address of I/O space. +(define_predicate "io_address_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op) - avr_current_arch->sfr_offset, + 0, 0x40 - GET_MODE_SIZE (mode))"))) + +;; Return 1 if OP is a general operand not in flash memory +(define_predicate "nop_general_operand" + (and (match_operand 0 "general_operand") + (match_test "!avr_mem_flash_p (op)"))) + +;; Return 1 if OP is an "ordinary" general operand, i.e. a general +;; operand whose load is not handled by a libgcc call or ELPM. +(define_predicate "nox_general_operand" + (and (match_operand 0 "general_operand") + (not (match_test "avr_load_libgcc_p (op)")) + (not (match_test "avr_mem_memx_p (op)")))) + +;; Return 1 if OP is a memory operand in one of the __flash* address spaces +(define_predicate "flash_operand" + (and (match_operand 0 "memory_operand") + (match_test "Pmode == mode") + (ior (match_test "!MEM_P (op)") + (match_test "avr_mem_flash_p (op)")))) + +;; Return 1 if OP is the zero constant for MODE. +(define_predicate "const0_operand" + (and (match_code "const_int,const_fixed,const_double") + (match_test "op == CONST0_RTX (mode)"))) + +;; Return 1 if OP is the one constant integer for MODE. +(define_predicate "const1_operand" + (and (match_code "const_int") + (match_test "op == CONST1_RTX (mode)"))) + + +;; Return 1 if OP is constant integer 0..7 for MODE. +(define_predicate "const_0_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) + +;; Return 1 if OP is constant integer 2..7 for MODE. +(define_predicate "const_2_to_7_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 7)"))) + +;; Return 1 if OP is constant integer 1..6 for MODE. +(define_predicate "const_1_to_6_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 1, 6)"))) + +;; Return 1 if OP is constant integer 2..6 for MODE. +(define_predicate "const_2_to_6_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 2, 6)"))) + +;; Returns true if OP is either the constant zero or a register. +(define_predicate "reg_or_0_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const0_operand"))) + +;; Returns 1 if OP is a SYMBOL_REF. +(define_predicate "symbol_ref_operand" + (match_code "symbol_ref")) + +;; Return true if OP is a text segment reference. +;; This is needed for program memory address expressions. +(define_predicate "text_segment_operand" + (match_code "code_label,label_ref,symbol_ref,plus,const") +{ + switch (GET_CODE (op)) + { + case CODE_LABEL: + return true; + case LABEL_REF : + return true; + case SYMBOL_REF : + return SYMBOL_REF_FUNCTION_P (op); + case PLUS : + /* Assume canonical format of symbol + constant. + Fall through. */ + case CONST : + return text_segment_operand (XEXP (op, 0), VOIDmode); + default : + return false; + } +}) + +;; Return true if OP is a constant that contains only one 1 in its +;; binary representation. +(define_predicate "single_one_operand" + (and (match_code "const_int") + (match_test "exact_log2(INTVAL (op) & GET_MODE_MASK (mode)) >= 0"))) + +;; Return true if OP is a constant that contains only one 0 in its +;; binary representation. +(define_predicate "single_zero_operand" + (and (match_code "const_int") + (match_test "exact_log2(~INTVAL (op) & GET_MODE_MASK (mode)) >= 0"))) + +;; +(define_predicate "avr_sp_immediate_operand" + (and (match_code "const_int") + (match_test "satisfies_constraint_Csp (op)"))) + +;; True for EQ & NE +(define_predicate "eqne_operator" + (match_code "eq,ne")) + +;; True for GE & LT +(define_predicate "gelt_operator" + (match_code "ge,lt")) + +;; True for GT, GTU, LE & LEU +(define_predicate "difficult_comparison_operator" + (match_code "gt,gtu,le,leu")) + +;; False for GT, GTU, LE & LEU +(define_predicate "simple_comparison_operator" + (and (match_operand 0 "comparison_operator") + (not (match_code "gt,gtu,le,leu")))) + +;; Return true if OP is a valid call operand. +(define_predicate "call_insn_operand" + (and (match_code "mem") + (ior (match_test "register_operand (XEXP (op, 0), mode)") + (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))")))) + +;; For some insns we must ensure that no hard register is inserted +;; into their operands because the insns are split and the split +;; involves hard registers. An example are divmod insn that are +;; split to insns that represent implicit library calls. + +;; True for register that is pseudo register. +(define_predicate "pseudo_register_operand" + (and (match_operand 0 "register_operand") + (not (and (match_code "reg") + (match_test "HARD_REGISTER_P (op)"))))) + +;; True for operand that is pseudo register or CONST_INT. +(define_predicate "pseudo_register_or_const_int_operand" + (ior (match_operand 0 "const_int_operand") + (match_operand 0 "pseudo_register_operand"))) + +;; We keep combiner from inserting hard registers into the input of sign- and +;; zero-extends. A hard register in the input operand is not wanted because +;; 32-bit multiply patterns clobber some hard registers and extends with a +;; hard register that overlaps these clobbers won't combine to a widening +;; multiplication. There is no need for combine to propagate or insert +;; hard registers, register allocation can do it just as well. + +;; True for operand that is pseudo register at combine time. +(define_predicate "combine_pseudo_register_operand" + (ior (match_operand 0 "pseudo_register_operand") + (and (match_operand 0 "register_operand") + (match_test "reload_completed || reload_in_progress")))) + +;; Return true if OP is a constant integer that is either +;; 8 or 16 or 24. +(define_predicate "const_8_16_24_operand" + (and (match_code "const_int") + (match_test "8 == INTVAL(op) || 16 == INTVAL(op) || 24 == INTVAL(op)"))) + +;; Unsigned CONST_INT that fits in 8 bits, i.e. 0..255. +(define_predicate "u8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 255)"))) + +;; Signed CONST_INT that fits in 8 bits, i.e. -128..127. +(define_predicate "s8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -128, 127)"))) + +;; One-extended CONST_INT that fits in 8 bits, i.e. -256..-1. +(define_predicate "o8_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -256, -1)"))) + +;; Signed CONST_INT that fits in 9 bits, i.e. -256..255. +(define_predicate "s9_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -256, 255)"))) + +(define_predicate "register_or_s9_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "s9_operand"))) + +;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536. +(define_predicate "u16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)"))) + +;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767. +(define_predicate "s16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)"))) + +;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1. +(define_predicate "o16_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)"))) + +;; Const int, fixed, or double operand +(define_predicate "const_operand" + (ior (match_code "const_fixed") + (match_code "const_double") + (match_operand 0 "const_int_operand"))) + +;; Const int, const fixed, or const double operand +(define_predicate "nonmemory_or_const_operand" + (ior (match_code "const_fixed") + (match_code "const_double") + (match_operand 0 "nonmemory_operand"))) + +;; Immediate, const fixed, or const double operand +(define_predicate "const_or_immediate_operand" + (ior (match_code "const_fixed") + (match_code "const_double") + (match_operand 0 "immediate_operand"))) diff --git a/gcc-4.9/gcc/config/avr/rtems.h b/gcc-4.9/gcc/config/avr/rtems.h new file mode 100644 index 000000000..473273b99 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/rtems.h @@ -0,0 +1,27 @@ +/* Definitions for rtems targeting a AVR using ELF. + Copyright (C) 2004-2014 Free Software Foundation, Inc. + Contributed by Ralf Corsepius (ralf.corsepius@rtems.org). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Specify predefined symbols in preprocessor. */ + +#define TARGET_OS_CPP_BUILTINS() \ +do { \ + builtin_define ("__rtems__"); \ + builtin_assert ("system=rtems"); \ +} while (0) diff --git a/gcc-4.9/gcc/config/avr/stdfix.h b/gcc-4.9/gcc/config/avr/stdfix.h new file mode 100644 index 000000000..38d80e4dc --- /dev/null +++ b/gcc-4.9/gcc/config/avr/stdfix.h @@ -0,0 +1,236 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* ISO/IEC JTC1 SC22 WG14 N1169 + * Date: 2006-04-04 + * ISO/IEC TR 18037 + * Programming languages - C - Extensions to support embedded processors + */ + +#ifndef _AVRGCC_STDFIX_H +#define _AVRGCC_STDFIX_H + +/* 7.18a.1 Introduction. */ +/* 7.18a.3 Precision macros. */ + +#include + + +#if __SIZEOF_INT__ == 2 + +typedef signed char int_hr_t; +typedef unsigned char uint_uhr_t; + +typedef short int int_r_t; +typedef short unsigned int uint_ur_t; + +typedef short int int_hk_t; +typedef short unsigned int uint_uhk_t; + +typedef long int int_lr_t; +typedef long unsigned int uint_ulr_t; + +typedef long int int_k_t; +typedef long unsigned int uint_uk_t; + +typedef long long int int_llr_t; +typedef long long unsigned int uint_ullr_t; + +typedef long long int int_lk_t; +typedef long long unsigned int uint_ulk_t; + +typedef long long int int_llk_t; +typedef long long unsigned int uint_ullk_t; + +#elif __SIZEOF_INT__ == 1 /* -mint8 */ + +typedef signed char int_hr_t; +typedef unsigned char uint_uhr_t; + +typedef long int int_r_t; +typedef long unsigned int uint_ur_t; + +typedef long int int_hk_t; +typedef long unsigned int uint_uhk_t; + +typedef long long int int_lr_t; +typedef long long unsigned int uint_ulr_t; + +typedef long long int int_k_t; +typedef long long unsigned int uint_uk_t; + +#endif /* __SIZEOF_INT__ == 1, 2 */ + + +/* 7.18a.6 The fixed-point intrinsic functions. */ + + +/* 7.18a.6.2 The fixed-point absolute value functions. */ + +#define abshr __builtin_avr_abshr +#define absr __builtin_avr_absr +#define abslr __builtin_avr_abslr + +#define abshk __builtin_avr_abshk +#define absk __builtin_avr_absk + +#if __SIZEOF_INT__ == 2 + +#define abslk __builtin_avr_abslk +#define absllr __builtin_avr_absllr /* GCC Extension */ +#define absllk __builtin_avr_absllk /* GCC Extension */ + +#endif /* sizeof (int) == 2 */ + + +/* 7.18a.6.3 The fixed-point round functions. */ + +/* The Embedded-C paper specifies results only for rounding points + + 0 < RP < FBIT + + As an extension, the following functions work as expected + with rounding points + + -IBIT < RP < FBIT + + For example, rounding an accum with a rounding point of -1 will + result in an even integer value. */ + +#define roundhr __builtin_avr_roundhr +#define roundr __builtin_avr_roundr +#define roundlr __builtin_avr_roundlr + +#define rounduhr __builtin_avr_rounduhr +#define roundur __builtin_avr_roundur +#define roundulr __builtin_avr_roundulr + +#define roundhk __builtin_avr_roundhk +#define roundk __builtin_avr_roundk + +#define rounduhk __builtin_avr_rounduhk +#define rounduk __builtin_avr_rounduk + +#if __SIZEOF_INT__ == 2 + +#define roundlk __builtin_avr_roundlk +#define roundulk __builtin_avr_roundulk +#define roundllr __builtin_avr_roundllr /* GCC Extension */ +#define roundullr __builtin_avr_roundullr /* GCC Extension */ +#define roundllk __builtin_avr_roundllk /* GCC Extension */ +#define roundullk __builtin_avr_roundullk /* GCC Extension */ + +#endif /* sizeof (int) == 2 */ + + +/* 7.18a.6.4 The fixed-point bit countls functions. */ + +#define countlshr __builtin_avr_countlshr +#define countlsr __builtin_avr_countlsr +#define countlslr __builtin_avr_countlslr + +#define countlsuhr __builtin_avr_countlsuhr +#define countlsur __builtin_avr_countlsur +#define countlsulr __builtin_avr_countlsulr + +#define countlshk __builtin_avr_countlshk +#define countlsk __builtin_avr_countlsk + +#define countlsuhk __builtin_avr_countlsuhk +#define countlsuk __builtin_avr_countlsuk + +#if __SIZEOF_INT__ == 2 + +#define countlslk __builtin_avr_countlslk +#define countlsulk __builtin_avr_countlsulk +#define countlsllr __builtin_avr_countlsllr /* GCC Extension */ +#define countlsullr __builtin_avr_countlsullr /* GCC Extension */ +#define countlsllk __builtin_avr_countlsllk /* GCC Extension */ +#define countlsullk __builtin_avr_countlsullk /* GCC Extension */ + +#endif /* sizeof (int) == 2 */ + + +/* 7.18a.6.5 The bitwise fixed-point to integer conversion functions. */ + +#define bitshr __builtin_avr_bitshr +#define bitsr __builtin_avr_bitsr +#define bitslr __builtin_avr_bitslr + +#define bitsuhr __builtin_avr_bitsuhr +#define bitsur __builtin_avr_bitsur +#define bitsulr __builtin_avr_bitsulr + +#define bitshk __builtin_avr_bitshk +#define bitsk __builtin_avr_bitsk + +#define bitsuhk __builtin_avr_bitsuhk +#define bitsuk __builtin_avr_bitsuk + +#if __SIZEOF_INT__ == 2 + +#define bitslk __builtin_avr_bitslk +#define bitsulk __builtin_avr_bitsulk +#define bitsllr __builtin_avr_bitsllr /* GCC Extension */ +#define bitsullr __builtin_avr_bitsullr /* GCC Extension */ +#define bitsllk __builtin_avr_bitsllk /* GCC Extension */ +#define bitsullk __builtin_avr_bitsullk /* GCC Extension */ + +#endif /* sizeof (int) == 2 */ + + +/* 7.18a.6.6 The bitwise integer to fixed-point conversion functions. */ + +#define hrbits __builtin_avr_hrbits +#define rbits __builtin_avr_rbits +#define lrbits __builtin_avr_lrbits + +#define uhrbits __builtin_avr_uhrbits +#define urbits __builtin_avr_urbits +#define ulrbits __builtin_avr_ulrbits + +#define hkbits __builtin_avr_hkbits +#define kbits __builtin_avr_kbits + +#define uhkbits __builtin_avr_uhkbits +#define ukbits __builtin_avr_ukbits + +#if __SIZEOF_INT__ == 2 + +#define lkbits __builtin_avr_lkbits +#define ulkbits __builtin_avr_ulkbits +#define llrbits __builtin_avr_llrbits /* GCC Extension */ +#define ullrbits __builtin_avr_ullrbits /* GCC Extension */ +#define llkbits __builtin_avr_llkbits /* GCC Extension */ +#define ullkbits __builtin_avr_ullkbits /* GCC Extension */ + +#endif /* sizeof (int) == 2 */ + + +/* 7.18a.6.7 Type-generic fixed-point functions. */ + +#define absfx __builtin_avr_absfx +#define roundfx __builtin_avr_roundfx +#define countlsfx __builtin_avr_countlsfx + +#endif /* _AVRGCC_STDFIX_H */ diff --git a/gcc-4.9/gcc/config/avr/t-avr b/gcc-4.9/gcc/config/avr/t-avr new file mode 100644 index 000000000..75120ef1e --- /dev/null +++ b/gcc-4.9/gcc/config/avr/t-avr @@ -0,0 +1,83 @@ +# Copyright (C) 2000-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +driver-avr.o: $(srcdir)/config/avr/driver-avr.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +avr-devices.o: $(srcdir)/config/avr/avr-devices.c \ + $(srcdir)/config/avr/avr-mcus.def \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +avr-c.o: $(srcdir)/config/avr/avr-c.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(C_COMMON_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +avr-log.o: $(srcdir)/config/avr/avr-log.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(INPUT_H) dumpfile.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< + +avr.o avr-c.o: $(srcdir)/config/avr/builtins.def + +# This overrides stdfix.h from USER_H which we supply and include +# in our own stdint.h as stdint-gcc.h. + +EXTRA_HEADERS = $(srcdir)/config/avr/stdfix.h \ + stdfix-gcc.h + +stdfix-gcc.h: $(srcdir)/ginclude/stdfix.h + -cp $< $@ + +# Files and Variables auto-generated from avr-mcus.def + +AVR_MCUS = $(srcdir)/config/avr/avr-mcus.def + +# Run `avr-mcus' after you changed or added devices in avr-mcus.def + +.PHONY: avr-mcus + +avr-mcus: $(srcdir)/config/avr/t-multilib \ + $(srcdir)/config/avr/avr-tables.opt \ + $(srcdir)/doc/avr-mmcu.texi ; @true + +# Make sure that -mmcu= is supported for devices from avr-mcus.def and +# all -mmcu= values are displayed on the help screen +$(srcdir)/config/avr/avr-tables.opt: $(srcdir)/config/avr/genopt.sh $(AVR_MCUS) + $(SHELL) $< $(AVR_MCUS) > $@ + +# Make sure that -mmcu= support is in sync with -mmcu= documentation. +gen-avr-mmcu-texi$(build_exeext): $(srcdir)/config/avr/gen-avr-mmcu-texi.c \ + $(AVR_MCUS) $(srcdir)/config/avr/avr-devices.c \ + $(srcdir)/config/avr/avr-arch.h + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $< -o $@ + +$(srcdir)/doc/avr-mmcu.texi: gen-avr-mmcu-texi$(build_exeext) + $(RUN_GEN) ./$< > $@ + +# Map -mmcu= to the right multilib variant +# MULTILIB_OPTIONS +# MULTILIB_DIRNAMES +# MULTILIB_EXCEPTIONS +# MULTILIB_MATCHES + +s-mlib: $(srcdir)/config/avr/t-multilib + +$(srcdir)/config/avr/t-multilib: $(srcdir)/config/avr/genmultilib.awk \ + $(AVR_MCUS) + $(AWK) -f $< -v FORMAT=Makefile $< $(AVR_MCUS) > $@ diff --git a/gcc-4.9/gcc/config/avr/t-multilib b/gcc-4.9/gcc/config/avr/t-multilib new file mode 100644 index 000000000..301f86496 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/t-multilib @@ -0,0 +1,269 @@ +# Auto-generated Makefile Snip +# Generated by : ./gcc/config/avr/genmultilib.awk +# Generated from : ./gcc/config/avr/avr-mcus.def +# Used by : tmake_file from Makefile and genmultilib + +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = mmcu=avr2/mmcu=avr25/mmcu=avr3/mmcu=avr31/mmcu=avr35/mmcu=avr4/mmcu=avr5/mmcu=avr51/mmcu=avr6/mmcu=avrxmega2/mmcu=avrxmega4/mmcu=avrxmega5/mmcu=avrxmega6/mmcu=avrxmega7 msp8 + +MULTILIB_DIRNAMES = avr2 avr25 avr3 avr31 avr35 avr4 avr5 avr51 avr6 avrxmega2 avrxmega4 avrxmega5 avrxmega6 avrxmega7 tiny-stack avr25/tiny-stack + +MULTILIB_EXCEPTIONS = \ + mmcu=avr3/msp8 \ + mmcu=avr31/msp8 \ + mmcu=avr35/msp8 \ + mmcu=avr4/msp8 \ + mmcu=avr5/msp8 \ + mmcu=avr51/msp8 \ + mmcu=avr6/msp8 \ + mmcu=avrxmega2/msp8 \ + mmcu=avrxmega4/msp8 \ + mmcu=avrxmega5/msp8 \ + mmcu=avrxmega6/msp8 \ + mmcu=avrxmega7/msp8 + +MULTILIB_MATCHES = \ + mmcu?avr2=mmcu?at90s2313 \ + mmcu?avr2=mmcu?at90s2323 \ + mmcu?avr2=mmcu?at90s2333 \ + mmcu?avr2=mmcu?at90s2343 \ + mmcu?avr2=mmcu?attiny22 \ + mmcu?avr2=mmcu?attiny26 \ + mmcu?avr2=mmcu?at90s4414 \ + mmcu?avr2=mmcu?at90s4433 \ + mmcu?avr2=mmcu?at90s4434 \ + mmcu?avr2=mmcu?at90s8515 \ + mmcu?avr2=mmcu?at90c8534 \ + mmcu?avr2=mmcu?at90s8535 \ + mmcu?avr25=mmcu?ata6289 \ + mmcu?avr25=mmcu?ata5272 \ + mmcu?avr25=mmcu?attiny13 \ + mmcu?avr25=mmcu?attiny13a \ + mmcu?avr25=mmcu?attiny2313 \ + mmcu?avr25=mmcu?attiny2313a \ + mmcu?avr25=mmcu?attiny24 \ + mmcu?avr25=mmcu?attiny24a \ + mmcu?avr25=mmcu?attiny4313 \ + mmcu?avr25=mmcu?attiny44 \ + mmcu?avr25=mmcu?attiny44a \ + mmcu?avr25=mmcu?attiny84 \ + mmcu?avr25=mmcu?attiny84a \ + mmcu?avr25=mmcu?attiny25 \ + mmcu?avr25=mmcu?attiny45 \ + mmcu?avr25=mmcu?attiny85 \ + mmcu?avr25=mmcu?attiny261 \ + mmcu?avr25=mmcu?attiny261a \ + mmcu?avr25=mmcu?attiny461 \ + mmcu?avr25=mmcu?attiny461a \ + mmcu?avr25=mmcu?attiny861 \ + mmcu?avr25=mmcu?attiny861a \ + mmcu?avr25=mmcu?attiny43u \ + mmcu?avr25=mmcu?attiny87 \ + mmcu?avr25=mmcu?attiny48 \ + mmcu?avr25=mmcu?attiny88 \ + mmcu?avr25=mmcu?at86rf401 \ + mmcu?avr3=mmcu?at43usb355 \ + mmcu?avr3=mmcu?at76c711 \ + mmcu?avr31=mmcu?atmega103 \ + mmcu?avr31=mmcu?at43usb320 \ + mmcu?avr35=mmcu?ata5505 \ + mmcu?avr35=mmcu?at90usb82 \ + mmcu?avr35=mmcu?at90usb162 \ + mmcu?avr35=mmcu?atmega8u2 \ + mmcu?avr35=mmcu?atmega16u2 \ + mmcu?avr35=mmcu?atmega32u2 \ + mmcu?avr35=mmcu?attiny167 \ + mmcu?avr35=mmcu?attiny1634 \ + mmcu?avr4=mmcu?ata6285 \ + mmcu?avr4=mmcu?ata6286 \ + mmcu?avr4=mmcu?atmega8 \ + mmcu?avr4=mmcu?atmega8a \ + mmcu?avr4=mmcu?atmega48 \ + mmcu?avr4=mmcu?atmega48a \ + mmcu?avr4=mmcu?atmega48p \ + mmcu?avr4=mmcu?atmega48pa \ + mmcu?avr4=mmcu?atmega88 \ + mmcu?avr4=mmcu?atmega88a \ + mmcu?avr4=mmcu?atmega88p \ + mmcu?avr4=mmcu?atmega88pa \ + mmcu?avr4=mmcu?atmega8515 \ + mmcu?avr4=mmcu?atmega8535 \ + mmcu?avr4=mmcu?atmega8hva \ + mmcu?avr4=mmcu?at90pwm1 \ + mmcu?avr4=mmcu?at90pwm2 \ + mmcu?avr4=mmcu?at90pwm2b \ + mmcu?avr4=mmcu?at90pwm3 \ + mmcu?avr4=mmcu?at90pwm3b \ + mmcu?avr4=mmcu?at90pwm81 \ + mmcu?avr5=mmcu?ata5790 \ + mmcu?avr5=mmcu?ata5790n \ + mmcu?avr5=mmcu?ata5795 \ + mmcu?avr5=mmcu?atmega16 \ + mmcu?avr5=mmcu?atmega16a \ + mmcu?avr5=mmcu?atmega161 \ + mmcu?avr5=mmcu?atmega162 \ + mmcu?avr5=mmcu?atmega163 \ + mmcu?avr5=mmcu?atmega164a \ + mmcu?avr5=mmcu?atmega164p \ + mmcu?avr5=mmcu?atmega164pa \ + mmcu?avr5=mmcu?atmega165 \ + mmcu?avr5=mmcu?atmega165a \ + mmcu?avr5=mmcu?atmega165p \ + mmcu?avr5=mmcu?atmega165pa \ + mmcu?avr5=mmcu?atmega168 \ + mmcu?avr5=mmcu?atmega168a \ + mmcu?avr5=mmcu?atmega168p \ + mmcu?avr5=mmcu?atmega168pa \ + mmcu?avr5=mmcu?atmega169 \ + mmcu?avr5=mmcu?atmega169a \ + mmcu?avr5=mmcu?atmega169p \ + mmcu?avr5=mmcu?atmega169pa \ + mmcu?avr5=mmcu?atmega16hvb \ + mmcu?avr5=mmcu?atmega16hvbrevb \ + mmcu?avr5=mmcu?atmega16m1 \ + mmcu?avr5=mmcu?atmega16u4 \ + mmcu?avr5=mmcu?atmega26hvg \ + mmcu?avr5=mmcu?atmega32a \ + mmcu?avr5=mmcu?atmega32 \ + mmcu?avr5=mmcu?atmega323 \ + mmcu?avr5=mmcu?atmega324a \ + mmcu?avr5=mmcu?atmega324p \ + mmcu?avr5=mmcu?atmega324pa \ + mmcu?avr5=mmcu?atmega325 \ + mmcu?avr5=mmcu?atmega325a \ + mmcu?avr5=mmcu?atmega325p \ + mmcu?avr5=mmcu?atmega3250 \ + mmcu?avr5=mmcu?atmega3250a \ + mmcu?avr5=mmcu?atmega3250p \ + mmcu?avr5=mmcu?atmega3250pa \ + mmcu?avr5=mmcu?atmega328 \ + mmcu?avr5=mmcu?atmega328p \ + mmcu?avr5=mmcu?atmega329 \ + mmcu?avr5=mmcu?atmega329a \ + mmcu?avr5=mmcu?atmega329p \ + mmcu?avr5=mmcu?atmega329pa \ + mmcu?avr5=mmcu?atmega3290 \ + mmcu?avr5=mmcu?atmega3290a \ + mmcu?avr5=mmcu?atmega3290p \ + mmcu?avr5=mmcu?atmega3290pa \ + mmcu?avr5=mmcu?atmega32c1 \ + mmcu?avr5=mmcu?atmega32m1 \ + mmcu?avr5=mmcu?atmega32u4 \ + mmcu?avr5=mmcu?atmega32u6 \ + mmcu?avr5=mmcu?atmega406 \ + mmcu?avr5=mmcu?atmega64 \ + mmcu?avr5=mmcu?atmega64a \ + mmcu?avr5=mmcu?atmega640 \ + mmcu?avr5=mmcu?atmega644 \ + mmcu?avr5=mmcu?atmega644a \ + mmcu?avr5=mmcu?atmega644p \ + mmcu?avr5=mmcu?atmega644pa \ + mmcu?avr5=mmcu?atmega645 \ + mmcu?avr5=mmcu?atmega645a \ + mmcu?avr5=mmcu?atmega645p \ + mmcu?avr5=mmcu?atmega6450 \ + mmcu?avr5=mmcu?atmega6450a \ + mmcu?avr5=mmcu?atmega6450p \ + mmcu?avr5=mmcu?atmega649 \ + mmcu?avr5=mmcu?atmega649a \ + mmcu?avr5=mmcu?atmega649p \ + mmcu?avr5=mmcu?atmega6490 \ + mmcu?avr5=mmcu?atmega16hva \ + mmcu?avr5=mmcu?atmega16hva2 \ + mmcu?avr5=mmcu?atmega32hvb \ + mmcu?avr5=mmcu?atmega6490a \ + mmcu?avr5=mmcu?atmega6490p \ + mmcu?avr5=mmcu?atmega64c1 \ + mmcu?avr5=mmcu?atmega64m1 \ + mmcu?avr5=mmcu?atmega64hve \ + mmcu?avr5=mmcu?atmega64rfa2 \ + mmcu?avr5=mmcu?atmega64rfr2 \ + mmcu?avr5=mmcu?atmega32hvbrevb \ + mmcu?avr5=mmcu?atmega48hvf \ + mmcu?avr5=mmcu?at90can32 \ + mmcu?avr5=mmcu?at90can64 \ + mmcu?avr5=mmcu?at90pwm161 \ + mmcu?avr5=mmcu?at90pwm216 \ + mmcu?avr5=mmcu?at90pwm316 \ + mmcu?avr5=mmcu?at90scr100 \ + mmcu?avr5=mmcu?at90usb646 \ + mmcu?avr5=mmcu?at90usb647 \ + mmcu?avr5=mmcu?at94k \ + mmcu?avr5=mmcu?m3000 \ + mmcu?avr51=mmcu?atmega128 \ + mmcu?avr51=mmcu?atmega128a \ + mmcu?avr51=mmcu?atmega1280 \ + mmcu?avr51=mmcu?atmega1281 \ + mmcu?avr51=mmcu?atmega1284 \ + mmcu?avr51=mmcu?atmega1284p \ + mmcu?avr51=mmcu?atmega128rfa1 \ + mmcu?avr51=mmcu?at90can128 \ + mmcu?avr51=mmcu?at90usb1286 \ + mmcu?avr51=mmcu?at90usb1287 \ + mmcu?avr6=mmcu?atmega2560 \ + mmcu?avr6=mmcu?atmega2561 \ + mmcu?avrxmega2=mmcu?atxmega16a4 \ + mmcu?avrxmega2=mmcu?atxmega16d4 \ + mmcu?avrxmega2=mmcu?atxmega32a4 \ + mmcu?avrxmega2=mmcu?atxmega32d4 \ + mmcu?avrxmega2=mmcu?atxmega32x1 \ + mmcu?avrxmega2=mmcu?atmxt112sl \ + mmcu?avrxmega2=mmcu?atmxt224 \ + mmcu?avrxmega2=mmcu?atmxt224e \ + mmcu?avrxmega2=mmcu?atmxt336s \ + mmcu?avrxmega2=mmcu?atxmega16a4u \ + mmcu?avrxmega2=mmcu?atxmega16c4 \ + mmcu?avrxmega2=mmcu?atxmega32a4u \ + mmcu?avrxmega2=mmcu?atxmega32c4 \ + mmcu?avrxmega2=mmcu?atxmega32e5 \ + mmcu?avrxmega4=mmcu?atxmega64a3 \ + mmcu?avrxmega4=mmcu?atxmega64d3 \ + mmcu?avrxmega4=mmcu?atxmega64a3u \ + mmcu?avrxmega4=mmcu?atxmega64a4u \ + mmcu?avrxmega4=mmcu?atxmega64b1 \ + mmcu?avrxmega4=mmcu?atxmega64b3 \ + mmcu?avrxmega4=mmcu?atxmega64c3 \ + mmcu?avrxmega4=mmcu?atxmega64d4 \ + mmcu?avrxmega5=mmcu?atxmega64a1 \ + mmcu?avrxmega5=mmcu?atxmega64a1u \ + mmcu?avrxmega6=mmcu?atxmega128a3 \ + mmcu?avrxmega6=mmcu?atxmega128d3 \ + mmcu?avrxmega6=mmcu?atxmega192a3 \ + mmcu?avrxmega6=mmcu?atxmega192d3 \ + mmcu?avrxmega6=mmcu?atxmega256a3 \ + mmcu?avrxmega6=mmcu?atxmega256a3b \ + mmcu?avrxmega6=mmcu?atxmega256a3bu \ + mmcu?avrxmega6=mmcu?atxmega256d3 \ + mmcu?avrxmega6=mmcu?atxmega128a3u \ + mmcu?avrxmega6=mmcu?atxmega128b1 \ + mmcu?avrxmega6=mmcu?atxmega128b3 \ + mmcu?avrxmega6=mmcu?atxmega128c3 \ + mmcu?avrxmega6=mmcu?atxmega128d4 \ + mmcu?avrxmega6=mmcu?atmxt540s \ + mmcu?avrxmega6=mmcu?atmxt540sreva \ + mmcu?avrxmega6=mmcu?atxmega192a3u \ + mmcu?avrxmega6=mmcu?atxmega192c3 \ + mmcu?avrxmega6=mmcu?atxmega256a3u \ + mmcu?avrxmega6=mmcu?atxmega256c3 \ + mmcu?avrxmega6=mmcu?atxmega384c3 \ + mmcu?avrxmega6=mmcu?atxmega384d3 \ + mmcu?avrxmega7=mmcu?atxmega128a1 \ + mmcu?avrxmega7=mmcu?atxmega128a1u \ + mmcu?avrxmega7=mmcu?atxmega128a4u diff --git a/gcc-4.9/gcc/config/avr/t-rtems b/gcc-4.9/gcc/config/avr/t-rtems new file mode 100644 index 000000000..a3ef8bd80 --- /dev/null +++ b/gcc-4.9/gcc/config/avr/t-rtems @@ -0,0 +1,3 @@ +# Multilibs for avr RTEMS targets. + +# ATM, this is just a stub diff --git a/gcc-4.9/gcc/config/bfin/bfin-modes.def b/gcc-4.9/gcc/config/bfin/bfin-modes.def new file mode 100644 index 000000000..9006b169f --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin-modes.def @@ -0,0 +1,28 @@ +/* Definitions of target machine for GNU compiler, for Blackfin. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + Contributed by Analog Devices. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* PDImode for the 40-bit accumulators. */ +PARTIAL_INT_MODE (DI, 40, PDI); + +/* Two of those - covering both accumulators for vector multiplications. */ +VECTOR_MODE (INT, PDI, 2); + +VECTOR_MODE (INT, HI, 2); /* V2HI */ +VECTOR_MODE (INT, SI, 2); /* V2SI - occasionally used. */ diff --git a/gcc-4.9/gcc/config/bfin/bfin-opts.h b/gcc-4.9/gcc/config/bfin/bfin-opts.h new file mode 100644 index 000000000..b7cb39e8f --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin-opts.h @@ -0,0 +1,59 @@ +/* Definitions for the Blackfin port needed for option handling. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef BFIN_OPTS_H +#define BFIN_OPTS_H + +/* CPU type. */ +typedef enum bfin_cpu_type +{ + BFIN_CPU_UNKNOWN, + BFIN_CPU_BF512, + BFIN_CPU_BF514, + BFIN_CPU_BF516, + BFIN_CPU_BF518, + BFIN_CPU_BF522, + BFIN_CPU_BF523, + BFIN_CPU_BF524, + BFIN_CPU_BF525, + BFIN_CPU_BF526, + BFIN_CPU_BF527, + BFIN_CPU_BF531, + BFIN_CPU_BF532, + BFIN_CPU_BF533, + BFIN_CPU_BF534, + BFIN_CPU_BF536, + BFIN_CPU_BF537, + BFIN_CPU_BF538, + BFIN_CPU_BF539, + BFIN_CPU_BF542, + BFIN_CPU_BF542M, + BFIN_CPU_BF544, + BFIN_CPU_BF544M, + BFIN_CPU_BF547, + BFIN_CPU_BF547M, + BFIN_CPU_BF548, + BFIN_CPU_BF548M, + BFIN_CPU_BF549, + BFIN_CPU_BF549M, + BFIN_CPU_BF561, + BFIN_CPU_BF592 +} bfin_cpu_t; + +#endif diff --git a/gcc-4.9/gcc/config/bfin/bfin-protos.h b/gcc-4.9/gcc/config/bfin/bfin-protos.h new file mode 100644 index 000000000..be26ad105 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin-protos.h @@ -0,0 +1,117 @@ +/* Prototypes for Blackfin functions used in the md file & elsewhere. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Function prototypes that cannot exist in bfin.h due to dependency + complications. */ +#ifndef GCC_BFIN_PROTOS_H +#define GCC_BFIN_PROTOS_H + +/* For the anomaly 05-00-0245 */ +#define WA_SPECULATIVE_LOADS 0x00000001 +#define ENABLE_WA_SPECULATIVE_LOADS \ + (bfin_workarounds & WA_SPECULATIVE_LOADS) + +/* For the anomaly 05-00-0244 */ +#define WA_SPECULATIVE_SYNCS 0x00000002 +#define ENABLE_WA_SPECULATIVE_SYNCS \ + (bfin_workarounds & WA_SPECULATIVE_SYNCS) + +/* For the anomaly 05-00-0371 */ +#define WA_RETS 0x00000004 +#define ENABLE_WA_RETS \ + (bfin_workarounds & WA_RETS) + +/* For the anomaly 05-00-0426 */ +#define WA_INDIRECT_CALLS 0x00000008 +#define ENABLE_WA_INDIRECT_CALLS \ + ((bfin_workarounds & WA_INDIRECT_CALLS) && !TARGET_ICPLB) + +#define WA_05000257 0x00000010 +#define ENABLE_WA_05000257 \ + (bfin_workarounds & WA_05000257) + +#define WA_05000283 0x00000020 +#define ENABLE_WA_05000283 \ + (bfin_workarounds & WA_05000283) + +#define WA_05000315 0x00000040 +#define ENABLE_WA_05000315 \ + (bfin_workarounds & WA_05000315) + +/* For the anomaly 05-00-0312 */ +#define WA_LOAD_LCREGS 0x00000080 +#define ENABLE_WA_LOAD_LCREGS \ + (bfin_workarounds & WA_LOAD_LCREGS) + +#define WA_05000074 0x00000100 +#define ENABLE_WA_05000074 \ + (bfin_workarounds & WA_05000074) + +extern bool function_arg_regno_p (int); + +extern const char *output_load_immediate (rtx *); +extern const char *output_casesi_internal (rtx *); +extern char *bfin_asm_long (void); +extern char *bfin_asm_short (void); +extern int log2constp (unsigned HOST_WIDE_INT); + +extern int hard_regno_mode_ok (int, enum machine_mode); +extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx); +extern HOST_WIDE_INT bfin_initial_elimination_offset (int, int); + +extern int effective_address_32bit_p (rtx, enum machine_mode); +extern int symbolic_reference_mentioned_p (rtx); +extern rtx bfin_gen_compare (rtx, enum machine_mode); +extern bool expand_move (rtx *, enum machine_mode); +extern void bfin_expand_call (rtx, rtx, rtx, rtx, int); +extern bool bfin_longcall_p (rtx, int); +extern bool bfin_dsp_memref_p (rtx); +extern bool bfin_expand_movmem (rtx, rtx, rtx, rtx); + +extern enum reg_class secondary_input_reload_class (enum reg_class, + enum machine_mode, + rtx); +extern enum reg_class secondary_output_reload_class (enum reg_class, + enum machine_mode, + rtx); +extern char *section_asm_op_1 (SECT_ENUM_T); +extern char *section_asm_op (SECT_ENUM_T); +extern void print_operand (FILE *, rtx, char); +extern void print_address_operand (FILE *, rtx); +extern void split_di (rtx [], int, rtx [], rtx []); +extern int split_load_immediate (rtx []); +extern void emit_pic_move (rtx *, enum machine_mode); +extern void asm_conditional_branch (rtx, rtx *, int, int); +extern rtx bfin_gen_compare (rtx, enum machine_mode); + +extern unsigned bfin_local_alignment (tree, unsigned); +extern rtx bfin_va_arg (tree, tree); + +extern void bfin_expand_prologue (void); +extern void bfin_expand_epilogue (int, int, bool); +extern int analyze_push_multiple_operation (rtx); +extern int analyze_pop_multiple_operation (rtx); +extern void output_push_multiple (rtx, rtx *); +extern void output_pop_multiple (rtx, rtx *); +extern int bfin_hard_regno_rename_ok (unsigned int, unsigned int); +extern rtx bfin_return_addr_rtx (int); +extern void bfin_hardware_loop (void); + +#endif + diff --git a/gcc-4.9/gcc/config/bfin/bfin.c b/gcc-4.9/gcc/config/bfin/bfin.c new file mode 100644 index 000000000..8b2821189 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin.c @@ -0,0 +1,5834 @@ +/* The Blackfin code generation auxiliary output file. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + Contributed by Analog Devices. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "insn-codes.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "tree.h" +#include "varasm.h" +#include "calls.h" +#include "flags.h" +#include "except.h" +#include "function.h" +#include "input.h" +#include "target.h" +#include "target-def.h" +#include "expr.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "optabs.h" +#include "ggc.h" +#include "cgraph.h" +#include "langhooks.h" +#include "bfin-protos.h" +#include "tm_p.h" +#include "tm-preds.h" +#include "tm-constrs.h" +#include "gt-bfin.h" +#include "basic-block.h" +#include "timevar.h" +#include "df.h" +#include "sel-sched.h" +#include "hw-doloop.h" +#include "opts.h" +#include "dumpfile.h" + +/* A C structure for machine-specific, per-function data. + This is added to the cfun structure. */ +struct GTY(()) machine_function +{ + /* Set if we are notified by the doloop pass that a hardware loop + was created. */ + int has_hardware_loops; + + /* Set if we create a memcpy pattern that uses loop registers. */ + int has_loopreg_clobber; +}; + +/* RTX for condition code flag register and RETS register */ +extern GTY(()) rtx bfin_cc_rtx; +extern GTY(()) rtx bfin_rets_rtx; +rtx bfin_cc_rtx, bfin_rets_rtx; + +int max_arg_registers = 0; + +/* Arrays used when emitting register names. */ +const char *short_reg_names[] = SHORT_REGISTER_NAMES; +const char *high_reg_names[] = HIGH_REGISTER_NAMES; +const char *dregs_pair_names[] = DREGS_PAIR_NAMES; +const char *byte_reg_names[] = BYTE_REGISTER_NAMES; + +static int arg_regs[] = FUNCTION_ARG_REGISTERS; +static int ret_regs[] = FUNCTION_RETURN_REGISTERS; + +int splitting_for_sched, splitting_loops; + +static void +bfin_globalize_label (FILE *stream, const char *name) +{ + fputs (".global ", stream); + assemble_name (stream, name); + fputc (';',stream); + fputc ('\n',stream); +} + +static void +output_file_start (void) +{ + FILE *file = asm_out_file; + int i; + + fprintf (file, ".file \"%s\";\n", LOCATION_FILE (input_location)); + + for (i = 0; arg_regs[i] >= 0; i++) + ; + max_arg_registers = i; /* how many arg reg used */ +} + +/* Examine machine-dependent attributes of function type FUNTYPE and return its + type. See the definition of E_FUNKIND. */ + +static e_funkind +funkind (const_tree funtype) +{ + tree attrs = TYPE_ATTRIBUTES (funtype); + if (lookup_attribute ("interrupt_handler", attrs)) + return INTERRUPT_HANDLER; + else if (lookup_attribute ("exception_handler", attrs)) + return EXCPT_HANDLER; + else if (lookup_attribute ("nmi_handler", attrs)) + return NMI_HANDLER; + else + return SUBROUTINE; +} + +/* Legitimize PIC addresses. If the address is already position-independent, + we return ORIG. Newly generated position-independent addresses go into a + reg. This is REG if nonzero, otherwise we allocate register(s) as + necessary. PICREG is the register holding the pointer to the PIC offset + table. */ + +static rtx +legitimize_pic_address (rtx orig, rtx reg, rtx picreg) +{ + rtx addr = orig; + rtx new_rtx = orig; + + if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF) + { + int unspec; + rtx tmp; + + if (TARGET_ID_SHARED_LIBRARY) + unspec = UNSPEC_MOVE_PIC; + else if (GET_CODE (addr) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (addr)) + unspec = UNSPEC_FUNCDESC_GOT17M4; + else + unspec = UNSPEC_MOVE_FDPIC; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec); + new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp)); + + emit_move_insn (reg, new_rtx); + if (picreg == pic_offset_table_rtx) + crtl->uses_pic_offset_table = 1; + return reg; + } + + else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS) + { + rtx base; + + if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + gcc_assert (GET_CODE (addr) == PLUS); + } + + if (XEXP (addr, 0) == picreg) + return orig; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + base = legitimize_pic_address (XEXP (addr, 0), reg, picreg); + addr = legitimize_pic_address (XEXP (addr, 1), + base == reg ? NULL_RTX : reg, + picreg); + + if (GET_CODE (addr) == CONST_INT) + { + gcc_assert (! reload_in_progress && ! reload_completed); + addr = force_reg (Pmode, addr); + } + + if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1))) + { + base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0)); + addr = XEXP (addr, 1); + } + + return gen_rtx_PLUS (Pmode, base, addr); + } + + return new_rtx; +} + +/* Stack frame layout. */ + +/* For a given REGNO, determine whether it must be saved in the function + prologue. IS_INTHANDLER specifies whether we're generating a normal + prologue or an interrupt/exception one. */ +static bool +must_save_p (bool is_inthandler, unsigned regno) +{ + if (D_REGNO_P (regno)) + { + bool is_eh_return_reg = false; + if (crtl->calls_eh_return) + { + unsigned j; + for (j = 0; ; j++) + { + unsigned test = EH_RETURN_DATA_REGNO (j); + if (test == INVALID_REGNUM) + break; + if (test == regno) + is_eh_return_reg = true; + } + } + + return (is_eh_return_reg + || (df_regs_ever_live_p (regno) + && !fixed_regs[regno] + && (is_inthandler || !call_used_regs[regno]))); + } + else if (P_REGNO_P (regno)) + { + return ((df_regs_ever_live_p (regno) + && !fixed_regs[regno] + && (is_inthandler || !call_used_regs[regno])) + || (is_inthandler + && (ENABLE_WA_05000283 || ENABLE_WA_05000315) + && regno == REG_P5) + || (!TARGET_FDPIC + && regno == PIC_OFFSET_TABLE_REGNUM + && (crtl->uses_pic_offset_table + || (TARGET_ID_SHARED_LIBRARY && !crtl->is_leaf)))); + } + else + return ((is_inthandler || !call_used_regs[regno]) + && (df_regs_ever_live_p (regno) + || (!leaf_function_p () && call_used_regs[regno]))); + +} + +/* Compute the number of DREGS to save with a push_multiple operation. + This could include registers that aren't modified in the function, + since push_multiple only takes a range of registers. + If IS_INTHANDLER, then everything that is live must be saved, even + if normally call-clobbered. + If CONSECUTIVE, return the number of registers we can save in one + instruction with a push/pop multiple instruction. */ + +static int +n_dregs_to_save (bool is_inthandler, bool consecutive) +{ + int count = 0; + unsigned i; + + for (i = REG_R7 + 1; i-- != REG_R0;) + { + if (must_save_p (is_inthandler, i)) + count++; + else if (consecutive) + return count; + } + return count; +} + +/* Like n_dregs_to_save, but compute number of PREGS to save. */ + +static int +n_pregs_to_save (bool is_inthandler, bool consecutive) +{ + int count = 0; + unsigned i; + + for (i = REG_P5 + 1; i-- != REG_P0;) + if (must_save_p (is_inthandler, i)) + count++; + else if (consecutive) + return count; + return count; +} + +/* Determine if we are going to save the frame pointer in the prologue. */ + +static bool +must_save_fp_p (void) +{ + return df_regs_ever_live_p (REG_FP); +} + +/* Determine if we are going to save the RETS register. */ +static bool +must_save_rets_p (void) +{ + return df_regs_ever_live_p (REG_RETS); +} + +static bool +stack_frame_needed_p (void) +{ + /* EH return puts a new return address into the frame using an + address relative to the frame pointer. */ + if (crtl->calls_eh_return) + return true; + return frame_pointer_needed; +} + +/* Emit code to save registers in the prologue. SAVEALL is nonzero if we + must save all registers; this is used for interrupt handlers. + SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing + this for an interrupt (or exception) handler. */ + +static void +expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler) +{ + rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg); + rtx predec = gen_rtx_MEM (SImode, predec1); + int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false); + int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true); + int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true); + int dregno, pregno; + int total_consec = ndregs_consec + npregs_consec; + int i, d_to_save; + + if (saveall || is_inthandler) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT)); + + RTX_FRAME_RELATED_P (insn) = 1; + for (dregno = REG_LT0; dregno <= REG_LB1; dregno++) + if (! crtl->is_leaf + || cfun->machine->has_hardware_loops + || cfun->machine->has_loopreg_clobber + || (ENABLE_WA_05000257 + && (dregno == REG_LC0 || dregno == REG_LC1))) + { + insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (total_consec != 0) + { + rtx insn; + rtx val = GEN_INT (-total_consec * 4); + rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2)); + + XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val), + UNSPEC_PUSH_MULTIPLE); + XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg, + gen_rtx_PLUS (Pmode, + spreg, + val)); + RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1; + d_to_save = ndregs_consec; + dregno = REG_R7 + 1 - ndregs_consec; + pregno = REG_P5 + 1 - npregs_consec; + for (i = 0; i < total_consec; i++) + { + rtx memref = gen_rtx_MEM (word_mode, + gen_rtx_PLUS (Pmode, spreg, + GEN_INT (- i * 4 - 4))); + rtx subpat; + if (d_to_save > 0) + { + subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, + dregno++)); + d_to_save--; + } + else + { + subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode, + pregno++)); + } + XVECEXP (pat, 0, i + 1) = subpat; + RTX_FRAME_RELATED_P (subpat) = 1; + } + insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + + for (dregno = REG_R0; ndregs != ndregs_consec; dregno++) + { + if (must_save_p (is_inthandler, dregno)) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno)); + RTX_FRAME_RELATED_P (insn) = 1; + ndregs--; + } + } + for (pregno = REG_P0; npregs != npregs_consec; pregno++) + { + if (must_save_p (is_inthandler, pregno)) + { + rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno)); + RTX_FRAME_RELATED_P (insn) = 1; + npregs--; + } + } + for (i = REG_P7 + 1; i < REG_CC; i++) + if (saveall + || (is_inthandler + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + { + rtx insn; + if (i == REG_A0 || i == REG_A1) + insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1), + gen_rtx_REG (PDImode, i)); + else + insn = emit_move_insn (predec, gen_rtx_REG (SImode, i)); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Emit code to restore registers in the epilogue. SAVEALL is nonzero if we + must save all registers; this is used for interrupt handlers. + SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing + this for an interrupt (or exception) handler. */ + +static void +expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler) +{ + rtx postinc1 = gen_rtx_POST_INC (SImode, spreg); + rtx postinc = gen_rtx_MEM (SImode, postinc1); + + int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false); + int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true); + int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true); + int total_consec = ndregs_consec + npregs_consec; + int i, regno; + rtx insn; + + /* A slightly crude technique to stop flow from trying to delete "dead" + insns. */ + MEM_VOLATILE_P (postinc) = 1; + + for (i = REG_CC - 1; i > REG_P7; i--) + if (saveall + || (is_inthandler + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + { + if (i == REG_A0 || i == REG_A1) + { + rtx mem = gen_rtx_MEM (PDImode, postinc1); + MEM_VOLATILE_P (mem) = 1; + emit_move_insn (gen_rtx_REG (PDImode, i), mem); + } + else + emit_move_insn (gen_rtx_REG (SImode, i), postinc); + } + + regno = REG_P5 - npregs_consec; + for (; npregs != npregs_consec; regno--) + { + if (must_save_p (is_inthandler, regno)) + { + emit_move_insn (gen_rtx_REG (word_mode, regno), postinc); + npregs--; + } + } + regno = REG_R7 - ndregs_consec; + for (; ndregs != ndregs_consec; regno--) + { + if (must_save_p (is_inthandler, regno)) + { + emit_move_insn (gen_rtx_REG (word_mode, regno), postinc); + ndregs--; + } + } + + if (total_consec != 0) + { + rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1)); + XVECEXP (pat, 0, 0) + = gen_rtx_SET (VOIDmode, spreg, + gen_rtx_PLUS (Pmode, spreg, + GEN_INT (total_consec * 4))); + + if (npregs_consec > 0) + regno = REG_P5 + 1; + else + regno = REG_R7 + 1; + + for (i = 0; i < total_consec; i++) + { + rtx addr = (i > 0 + ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4)) + : spreg); + rtx memref = gen_rtx_MEM (word_mode, addr); + + regno--; + XVECEXP (pat, 0, i + 1) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref); + + if (npregs_consec > 0) + { + if (--npregs_consec == 0) + regno = REG_R7 + 1; + } + } + + insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (saveall || is_inthandler) + { + for (regno = REG_LB1; regno >= REG_LT0; regno--) + if (! crtl->is_leaf + || cfun->machine->has_hardware_loops + || cfun->machine->has_loopreg_clobber + || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1))) + emit_move_insn (gen_rtx_REG (SImode, regno), postinc); + + emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc); + } +} + +/* Perform any needed actions needed for a function that is receiving a + variable number of arguments. + + CUM is as above. + + MODE and TYPE are the mode and type of the current parameter. + + PRETEND_SIZE is a variable that should be set to the amount of stack + that must be pushed by the prolog to pretend that our caller pushed + it. + + Normally, this macro will push all remaining incoming registers on the + stack and set PRETEND_SIZE to the length of the registers pushed. + + Blackfin specific : + - VDSP C compiler manual (our ABI) says that a variable args function + should save the R0, R1 and R2 registers in the stack. + - The caller will always leave space on the stack for the + arguments that are passed in registers, so we dont have + to leave any extra space. + - now, the vastart pointer can access all arguments from the stack. */ + +static void +setup_incoming_varargs (cumulative_args_t cum, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED, int *pretend_size, + int no_rtl) +{ + rtx mem; + int i; + + if (no_rtl) + return; + + /* The move for named arguments will be generated automatically by the + compiler. We need to generate the move rtx for the unnamed arguments + if they are in the first 3 words. We assume at least 1 named argument + exists, so we never generate [ARGP] = R0 here. */ + + for (i = get_cumulative_args (cum)->words + 1; i < max_arg_registers; i++) + { + mem = gen_rtx_MEM (Pmode, + plus_constant (Pmode, arg_pointer_rtx, + (i * UNITS_PER_WORD))); + emit_move_insn (mem, gen_rtx_REG (Pmode, i)); + } + + *pretend_size = 0; +} + +/* Value should be nonzero if functions must have frame pointers. + Zero means the frame pointer need not be set up (and parms may + be accessed via the stack pointer) in functions that seem suitable. */ + +static bool +bfin_frame_pointer_required (void) +{ + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + + if (fkind != SUBROUTINE) + return true; + + /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used, + so we have to override it for non-leaf functions. */ + if (TARGET_OMIT_LEAF_FRAME_POINTER && ! crtl->is_leaf) + return true; + + return false; +} + +/* Return the number of registers pushed during the prologue. */ + +static int +n_regs_saved_by_prologue (void) +{ + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + bool is_inthandler = fkind != SUBROUTINE; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE + || (is_inthandler && !crtl->is_leaf)); + int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false); + int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false); + int n = ndregs + npregs; + int i; + + if (all || stack_frame_needed_p ()) + n += 2; + else + { + if (must_save_fp_p ()) + n++; + if (must_save_rets_p ()) + n++; + } + + if (fkind != SUBROUTINE || all) + { + /* Increment once for ASTAT. */ + n++; + if (! crtl->is_leaf + || cfun->machine->has_hardware_loops + || cfun->machine->has_loopreg_clobber) + { + n += 6; + } + } + + if (fkind != SUBROUTINE) + { + /* RETE/X/N. */ + if (lookup_attribute ("nesting", attrs)) + n++; + } + + for (i = REG_P7 + 1; i < REG_CC; i++) + if (all + || (fkind != SUBROUTINE + && (df_regs_ever_live_p (i) + || (!leaf_function_p () && call_used_regs[i])))) + n += i == REG_A0 || i == REG_A1 ? 2 : 1; + + return n; +} + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. Frame pointer elimination is automatically handled. + + All other eliminations are valid. */ + +static bool +bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true); +} + +/* Return the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +HOST_WIDE_INT +bfin_initial_elimination_offset (int from, int to) +{ + HOST_WIDE_INT offset = 0; + + if (from == ARG_POINTER_REGNUM) + offset = n_regs_saved_by_prologue () * 4; + + if (to == STACK_POINTER_REGNUM) + { + if (crtl->outgoing_args_size >= FIXED_STACK_AREA) + offset += crtl->outgoing_args_size; + else if (crtl->outgoing_args_size) + offset += FIXED_STACK_AREA; + + offset += get_frame_size (); + } + + return offset; +} + +/* Emit code to load a constant CONSTANT into register REG; setting + RTX_FRAME_RELATED_P on all insns we generate if RELATED is true. + Make sure that the insns we generate need not be split. */ + +static void +frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related) +{ + rtx insn; + rtx cst = GEN_INT (constant); + + if (constant >= -32768 && constant < 65536) + insn = emit_move_insn (reg, cst); + else + { + /* We don't call split_load_immediate here, since dwarf2out.c can get + confused about some of the more clever sequences it can generate. */ + insn = emit_insn (gen_movsi_high (reg, cst)); + if (related) + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movsi_low (reg, reg, cst)); + } + if (related) + RTX_FRAME_RELATED_P (insn) = 1; +} + +/* Generate efficient code to add a value to a P register. + Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero. + EPILOGUE_P is zero if this function is called for prologue, + otherwise it's nonzero. And it's less than zero if this is for + sibcall epilogue. */ + +static void +add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p) +{ + if (value == 0) + return; + + /* Choose whether to use a sequence using a temporary register, or + a sequence with multiple adds. We can add a signed 7-bit value + in one instruction. */ + if (value > 120 || value < -120) + { + rtx tmpreg; + rtx tmpreg2; + rtx insn; + + tmpreg2 = NULL_RTX; + + /* For prologue or normal epilogue, P1 can be safely used + as the temporary register. For sibcall epilogue, we try to find + a call used P register, which will be restored in epilogue. + If we cannot find such a P register, we have to use one I register + to help us. */ + + if (epilogue_p >= 0) + tmpreg = gen_rtx_REG (SImode, REG_P1); + else + { + int i; + for (i = REG_P0; i <= REG_P5; i++) + if ((df_regs_ever_live_p (i) && ! call_used_regs[i]) + || (!TARGET_FDPIC + && i == PIC_OFFSET_TABLE_REGNUM + && (crtl->uses_pic_offset_table + || (TARGET_ID_SHARED_LIBRARY + && ! crtl->is_leaf)))) + break; + if (i <= REG_P5) + tmpreg = gen_rtx_REG (SImode, i); + else + { + tmpreg = gen_rtx_REG (SImode, REG_P1); + tmpreg2 = gen_rtx_REG (SImode, REG_I0); + emit_move_insn (tmpreg2, tmpreg); + } + } + + if (frame) + frame_related_constant_load (tmpreg, value, TRUE); + else + insn = emit_move_insn (tmpreg, GEN_INT (value)); + + insn = emit_insn (gen_addsi3 (reg, reg, tmpreg)); + if (frame) + RTX_FRAME_RELATED_P (insn) = 1; + + if (tmpreg2 != NULL_RTX) + emit_move_insn (tmpreg, tmpreg2); + } + else + do + { + int size = value; + rtx insn; + + if (size > 60) + size = 60; + else if (size < -60) + /* We could use -62, but that would leave the stack unaligned, so + it's no good. */ + size = -60; + + insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size))); + if (frame) + RTX_FRAME_RELATED_P (insn) = 1; + value -= size; + } + while (value != 0); +} + +/* Generate a LINK insn for a frame sized FRAME_SIZE. If this constant + is too large, generate a sequence of insns that has the same effect. + SPREG contains (reg:SI REG_SP). */ + +static void +emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size) +{ + HOST_WIDE_INT link_size = frame_size; + rtx insn; + int i; + + if (link_size > 262140) + link_size = 262140; + + /* Use a LINK insn with as big a constant as possible, then subtract + any remaining size from the SP. */ + insn = emit_insn (gen_link (GEN_INT (-8 - link_size))); + RTX_FRAME_RELATED_P (insn) = 1; + + for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) + { + rtx set = XVECEXP (PATTERN (insn), 0, i); + gcc_assert (GET_CODE (set) == SET); + RTX_FRAME_RELATED_P (set) = 1; + } + + frame_size -= link_size; + + if (frame_size > 0) + { + /* Must use a call-clobbered PREG that isn't the static chain. */ + rtx tmpreg = gen_rtx_REG (Pmode, REG_P1); + + frame_related_constant_load (tmpreg, -frame_size, TRUE); + insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg)); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Return the number of bytes we must reserve for outgoing arguments + in the current function's stack frame. */ + +static HOST_WIDE_INT +arg_area_size (void) +{ + if (crtl->outgoing_args_size) + { + if (crtl->outgoing_args_size >= FIXED_STACK_AREA) + return crtl->outgoing_args_size; + else + return FIXED_STACK_AREA; + } + return 0; +} + +/* Save RETS and FP, and allocate a stack frame. ALL is true if the + function must save all its registers (true only for certain interrupt + handlers). */ + +static void +do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all) +{ + frame_size += arg_area_size (); + + if (all + || stack_frame_needed_p () + || (must_save_rets_p () && must_save_fp_p ())) + emit_link_insn (spreg, frame_size); + else + { + if (must_save_rets_p ()) + { + rtx pat = gen_movsi (gen_rtx_MEM (Pmode, + gen_rtx_PRE_DEC (Pmode, spreg)), + bfin_rets_rtx); + rtx insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (must_save_fp_p ()) + { + rtx pat = gen_movsi (gen_rtx_MEM (Pmode, + gen_rtx_PRE_DEC (Pmode, spreg)), + gen_rtx_REG (Pmode, REG_FP)); + rtx insn = emit_insn (pat); + RTX_FRAME_RELATED_P (insn) = 1; + } + add_to_reg (spreg, -frame_size, 1, 0); + } +} + +/* Like do_link, but used for epilogues to deallocate the stack frame. + EPILOGUE_P is zero if this function is called for prologue, + otherwise it's nonzero. And it's less than zero if this is for + sibcall epilogue. */ + +static void +do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p) +{ + frame_size += arg_area_size (); + + if (stack_frame_needed_p ()) + emit_insn (gen_unlink ()); + else + { + rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg)); + + add_to_reg (spreg, frame_size, 0, epilogue_p); + if (all || must_save_fp_p ()) + { + rtx fpreg = gen_rtx_REG (Pmode, REG_FP); + emit_move_insn (fpreg, postinc); + emit_use (fpreg); + } + if (all || must_save_rets_p ()) + { + emit_move_insn (bfin_rets_rtx, postinc); + emit_use (bfin_rets_rtx); + } + } +} + +/* Generate a prologue suitable for a function of kind FKIND. This is + called for interrupt and exception handler prologues. + SPREG contains (reg:SI REG_SP). */ + +static void +expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all) +{ + HOST_WIDE_INT frame_size = get_frame_size (); + rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg); + rtx predec = gen_rtx_MEM (SImode, predec1); + rtx insn; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + tree kspisusp = lookup_attribute ("kspisusp", attrs); + + if (kspisusp) + { + insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* We need space on the stack in case we need to save the argument + registers. */ + if (fkind == EXCPT_HANDLER) + { + insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* If we're calling other functions, they won't save their call-clobbered + registers, so we must save everything here. */ + if (!crtl->is_leaf) + all = true; + expand_prologue_reg_save (spreg, all, true); + + if (ENABLE_WA_05000283 || ENABLE_WA_05000315) + { + rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode)); + rtx p5reg = gen_rtx_REG (Pmode, REG_P5); + emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx)); + emit_insn (gen_movsi_high (p5reg, chipid)); + emit_insn (gen_movsi_low (p5reg, p5reg, chipid)); + emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx)); + } + + if (lookup_attribute ("nesting", attrs)) + { + rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]); + insn = emit_move_insn (predec, srcreg); + RTX_FRAME_RELATED_P (insn) = 1; + } + + do_link (spreg, frame_size, all); + + if (fkind == EXCPT_HANDLER) + { + rtx r0reg = gen_rtx_REG (SImode, REG_R0); + rtx r1reg = gen_rtx_REG (SImode, REG_R1); + rtx r2reg = gen_rtx_REG (SImode, REG_R2); + + emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT)); + emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26))); + emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26))); + emit_move_insn (r1reg, spreg); + emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP)); + emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8))); + } +} + +/* Generate an epilogue suitable for a function of kind FKIND. This is + called for interrupt and exception handler epilogues. + SPREG contains (reg:SI REG_SP). */ + +static void +expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all) +{ + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + rtx postinc1 = gen_rtx_POST_INC (SImode, spreg); + rtx postinc = gen_rtx_MEM (SImode, postinc1); + + /* A slightly crude technique to stop flow from trying to delete "dead" + insns. */ + MEM_VOLATILE_P (postinc) = 1; + + do_unlink (spreg, get_frame_size (), all, 1); + + if (lookup_attribute ("nesting", attrs)) + { + rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]); + emit_move_insn (srcreg, postinc); + } + + /* If we're calling other functions, they won't save their call-clobbered + registers, so we must save (and restore) everything here. */ + if (!crtl->is_leaf) + all = true; + + expand_epilogue_reg_restore (spreg, all, true); + + /* Deallocate any space we left on the stack in case we needed to save the + argument registers. */ + if (fkind == EXCPT_HANDLER) + emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12))); + + emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind]))); +} + +/* Used while emitting the prologue to generate code to load the correct value + into the PIC register, which is passed in DEST. */ + +static rtx +bfin_load_pic_reg (rtx dest) +{ + struct cgraph_local_info *i = NULL; + rtx addr; + + i = cgraph_local_info (current_function_decl); + + /* Functions local to the translation unit don't need to reload the + pic reg, since the caller always passes a usable one. */ + if (i && i->local) + return pic_offset_table_rtx; + + if (global_options_set.x_bfin_library_id) + addr = plus_constant (Pmode, pic_offset_table_rtx, + -4 - bfin_library_id * 4); + else + addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_LIBRARY_OFFSET)); + emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr))); + return dest; +} + +/* Generate RTL for the prologue of the current function. */ + +void +bfin_expand_prologue (void) +{ + HOST_WIDE_INT frame_size = get_frame_size (); + rtx spreg = gen_rtx_REG (Pmode, REG_SP); + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + rtx pic_reg_loaded = NULL_RTX; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; + + if (fkind != SUBROUTINE) + { + expand_interrupt_handler_prologue (spreg, fkind, all); + return; + } + + if (crtl->limit_stack + || (TARGET_STACK_CHECK_L1 + && !DECL_NO_LIMIT_STACK (current_function_decl))) + { + HOST_WIDE_INT offset + = bfin_initial_elimination_offset (ARG_POINTER_REGNUM, + STACK_POINTER_REGNUM); + rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX; + rtx tmp = gen_rtx_REG (Pmode, REG_R3); + rtx p2reg = gen_rtx_REG (Pmode, REG_P2); + + emit_move_insn (tmp, p2reg); + if (!lim) + { + emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode)); + emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg)); + lim = p2reg; + } + if (GET_CODE (lim) == SYMBOL_REF) + { + if (TARGET_ID_SHARED_LIBRARY) + { + rtx p1reg = gen_rtx_REG (Pmode, REG_P1); + rtx val; + pic_reg_loaded = bfin_load_pic_reg (p2reg); + val = legitimize_pic_address (stack_limit_rtx, p1reg, + pic_reg_loaded); + emit_move_insn (p1reg, val); + frame_related_constant_load (p2reg, offset, FALSE); + emit_insn (gen_addsi3 (p2reg, p2reg, p1reg)); + lim = p2reg; + } + else + { + rtx limit = plus_constant (Pmode, lim, offset); + emit_move_insn (p2reg, limit); + lim = p2reg; + } + } + else + { + if (lim != p2reg) + emit_move_insn (p2reg, lim); + add_to_reg (p2reg, offset, 0, 0); + lim = p2reg; + } + emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim)); + emit_insn (gen_trapifcc ()); + emit_move_insn (p2reg, tmp); + } + expand_prologue_reg_save (spreg, all, false); + + do_link (spreg, frame_size, all); + + if (TARGET_ID_SHARED_LIBRARY + && !TARGET_SEP_DATA + && (crtl->uses_pic_offset_table + || !crtl->is_leaf)) + bfin_load_pic_reg (pic_offset_table_rtx); +} + +/* Generate RTL for the epilogue of the current function. NEED_RETURN is zero + if this is for a sibcall. EH_RETURN is nonzero if we're expanding an + eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue, + false otherwise. */ + +void +bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p) +{ + rtx spreg = gen_rtx_REG (Pmode, REG_SP); + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + int e = sibcall_p ? -1 : 1; + tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + bool all = lookup_attribute ("saveall", attrs) != NULL_TREE; + + if (fkind != SUBROUTINE) + { + expand_interrupt_handler_epilogue (spreg, fkind, all); + return; + } + + do_unlink (spreg, get_frame_size (), all, e); + + expand_epilogue_reg_restore (spreg, all, false); + + /* Omit the return insn if this is for a sibcall. */ + if (! need_return) + return; + + if (eh_return) + emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2))); + + emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS))); +} + +/* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */ + +int +bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, + unsigned int new_reg) +{ + /* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be + call-clobbered. */ + + if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE + && !df_regs_ever_live_p (new_reg)) + return 0; + + return 1; +} + +/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. */ +static void +bfin_extra_live_on_entry (bitmap regs) +{ + if (TARGET_FDPIC) + bitmap_set_bit (regs, FDPIC_REGNO); +} + +/* Return the value of the return address for the frame COUNT steps up + from the current frame, after the prologue. + We punt for everything but the current frame by returning const0_rtx. */ + +rtx +bfin_return_addr_rtx (int count) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, REG_RETS); +} + +static rtx +bfin_delegitimize_address (rtx orig_x) +{ + rtx x = orig_x; + + if (GET_CODE (x) != MEM) + return orig_x; + + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC + && GET_CODE (XEXP (x, 0)) == REG + && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM) + return XVECEXP (XEXP (x, 1), 0, 0); + + return orig_x; +} + +/* This predicate is used to compute the length of a load/store insn. + OP is a MEM rtx, we return nonzero if its addressing mode requires a + 32-bit instruction. */ + +int +effective_address_32bit_p (rtx op, enum machine_mode mode) +{ + HOST_WIDE_INT offset; + + mode = GET_MODE (op); + op = XEXP (op, 0); + + if (GET_CODE (op) != PLUS) + { + gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC + || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC); + return 0; + } + + if (GET_CODE (XEXP (op, 1)) == UNSPEC) + return 1; + + offset = INTVAL (XEXP (op, 1)); + + /* All byte loads use a 16-bit offset. */ + if (GET_MODE_SIZE (mode) == 1) + return 1; + + if (GET_MODE_SIZE (mode) == 4) + { + /* Frame pointer relative loads can use a negative offset, all others + are restricted to a small positive one. */ + if (XEXP (op, 0) == frame_pointer_rtx) + return offset < -128 || offset > 60; + return offset < 0 || offset > 60; + } + + /* Must be HImode now. */ + return offset < 0 || offset > 30; +} + +/* Returns true if X is a memory reference using an I register. */ +bool +bfin_dsp_memref_p (rtx x) +{ + if (! MEM_P (x)) + return false; + x = XEXP (x, 0); + if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC + || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC) + x = XEXP (x, 0); + return IREG_P (x); +} + +/* Return cost of the memory address ADDR. + All addressing modes are equally cheap on the Blackfin. */ + +static int +bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) +{ + return 1; +} + +/* Subroutine of print_operand; used to print a memory reference X to FILE. */ + +void +print_address_operand (FILE *file, rtx x) +{ + switch (GET_CODE (x)) + { + case PLUS: + output_address (XEXP (x, 0)); + fprintf (file, "+"); + output_address (XEXP (x, 1)); + break; + + case PRE_DEC: + fprintf (file, "--"); + output_address (XEXP (x, 0)); + break; + case POST_INC: + output_address (XEXP (x, 0)); + fprintf (file, "++"); + break; + case POST_DEC: + output_address (XEXP (x, 0)); + fprintf (file, "--"); + break; + + default: + gcc_assert (GET_CODE (x) != MEM); + print_operand (file, x, 0); + break; + } +} + +/* Adding intp DImode support by Tony + * -- Q: (low word) + * -- R: (high word) + */ + +void +print_operand (FILE *file, rtx x, char code) +{ + enum machine_mode mode; + + if (code == '!') + { + if (GET_MODE (current_output_insn) == SImode) + fprintf (file, " ||"); + else + fprintf (file, ";"); + return; + } + + mode = GET_MODE (x); + + switch (code) + { + case 'j': + switch (GET_CODE (x)) + { + case EQ: + fprintf (file, "e"); + break; + case NE: + fprintf (file, "ne"); + break; + case GT: + fprintf (file, "g"); + break; + case LT: + fprintf (file, "l"); + break; + case GE: + fprintf (file, "ge"); + break; + case LE: + fprintf (file, "le"); + break; + case GTU: + fprintf (file, "g"); + break; + case LTU: + fprintf (file, "l"); + break; + case GEU: + fprintf (file, "ge"); + break; + case LEU: + fprintf (file, "le"); + break; + default: + output_operand_lossage ("invalid %%j value"); + } + break; + + case 'J': /* reverse logic */ + switch (GET_CODE(x)) + { + case EQ: + fprintf (file, "ne"); + break; + case NE: + fprintf (file, "e"); + break; + case GT: + fprintf (file, "le"); + break; + case LT: + fprintf (file, "ge"); + break; + case GE: + fprintf (file, "l"); + break; + case LE: + fprintf (file, "g"); + break; + case GTU: + fprintf (file, "le"); + break; + case LTU: + fprintf (file, "ge"); + break; + case GEU: + fprintf (file, "l"); + break; + case LEU: + fprintf (file, "g"); + break; + default: + output_operand_lossage ("invalid %%J value"); + } + break; + + default: + switch (GET_CODE (x)) + { + case REG: + if (code == 'h') + { + if (REGNO (x) < 32) + fprintf (file, "%s", short_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'd') + { + if (REGNO (x) < 32) + fprintf (file, "%s", high_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'w') + { + if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1) + fprintf (file, "%s.w", reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'x') + { + if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1) + fprintf (file, "%s.x", reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'v') + { + if (REGNO (x) == REG_A0) + fprintf (file, "AV0"); + else if (REGNO (x) == REG_A1) + fprintf (file, "AV1"); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'D') + { + if (D_REGNO_P (REGNO (x))) + fprintf (file, "%s", dregs_pair_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'H') + { + if ((mode == DImode || mode == DFmode) && REG_P (x)) + fprintf (file, "%s", reg_names[REGNO (x) + 1]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else if (code == 'T') + { + if (D_REGNO_P (REGNO (x))) + fprintf (file, "%s", byte_reg_names[REGNO (x)]); + else + output_operand_lossage ("invalid operand for code '%c'", code); + } + else + fprintf (file, "%s", reg_names[REGNO (x)]); + break; + + case MEM: + fputc ('[', file); + x = XEXP (x,0); + print_address_operand (file, x); + fputc (']', file); + break; + + case CONST_INT: + if (code == 'M') + { + switch (INTVAL (x)) + { + case MACFLAG_NONE: + break; + case MACFLAG_FU: + fputs ("(FU)", file); + break; + case MACFLAG_T: + fputs ("(T)", file); + break; + case MACFLAG_TFU: + fputs ("(TFU)", file); + break; + case MACFLAG_W32: + fputs ("(W32)", file); + break; + case MACFLAG_IS: + fputs ("(IS)", file); + break; + case MACFLAG_IU: + fputs ("(IU)", file); + break; + case MACFLAG_IH: + fputs ("(IH)", file); + break; + case MACFLAG_M: + fputs ("(M)", file); + break; + case MACFLAG_IS_M: + fputs ("(IS,M)", file); + break; + case MACFLAG_ISS2: + fputs ("(ISS2)", file); + break; + case MACFLAG_S2RND: + fputs ("(S2RND)", file); + break; + default: + gcc_unreachable (); + } + break; + } + else if (code == 'b') + { + if (INTVAL (x) == 0) + fputs ("+=", file); + else if (INTVAL (x) == 1) + fputs ("-=", file); + else + gcc_unreachable (); + break; + } + /* Moves to half registers with d or h modifiers always use unsigned + constants. */ + else if (code == 'd') + x = GEN_INT ((INTVAL (x) >> 16) & 0xffff); + else if (code == 'h') + x = GEN_INT (INTVAL (x) & 0xffff); + else if (code == 'N') + x = GEN_INT (-INTVAL (x)); + else if (code == 'X') + x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x))); + else if (code == 'Y') + x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x))); + else if (code == 'Z') + /* Used for LINK insns. */ + x = GEN_INT (-8 - INTVAL (x)); + + /* fall through */ + + case SYMBOL_REF: + output_addr_const (file, x); + break; + + case CONST_DOUBLE: + output_operand_lossage ("invalid const_double operand"); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + case UNSPEC_MOVE_PIC: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@GOT"); + break; + + case UNSPEC_MOVE_FDPIC: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@GOT17M4"); + break; + + case UNSPEC_FUNCDESC_GOT17M4: + output_addr_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@FUNCDESC_GOT17M4"); + break; + + case UNSPEC_LIBRARY_OFFSET: + fprintf (file, "_current_shared_library_p5_offset_"); + break; + + default: + gcc_unreachable (); + } + break; + + default: + output_addr_const (file, x); + } + } +} + +/* Argument support functions. */ + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. + VDSP C Compiler manual, our ABI says that + first 3 words of arguments will use R0, R1 and R2. +*/ + +void +init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, + rtx libname ATTRIBUTE_UNUSED) +{ + static CUMULATIVE_ARGS zero_cum; + + *cum = zero_cum; + + /* Set up the number of registers to use for passing arguments. */ + + cum->nregs = max_arg_registers; + cum->arg_regs = arg_regs; + + cum->call_cookie = CALL_NORMAL; + /* Check for a longcall attribute. */ + if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))) + cum->call_cookie |= CALL_SHORT; + else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))) + cum->call_cookie |= CALL_LONG; + + return; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ + +static void +bfin_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int count, bytes, words; + + bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + cum->words += words; + cum->nregs -= words; + + if (cum->nregs <= 0) + { + cum->nregs = 0; + cum->arg_regs = NULL; + } + else + { + for (count = 1; count <= words; count++) + cum->arg_regs++; + } + + return; +} + +/* Define where to put the arguments to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ + +static rtx +bfin_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int bytes + = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + + if (mode == VOIDmode) + /* Compute operand 2 of the call insn. */ + return GEN_INT (cum->call_cookie); + + if (bytes == -1) + return NULL_RTX; + + if (cum->nregs) + return gen_rtx_REG (mode, *(cum->arg_regs)); + + return NULL_RTX; +} + +/* For an arg passed partly in registers and partly in memory, + this is the number of bytes passed in registers. + For args passed entirely in registers or entirely in memory, zero. + + Refer VDSP C Compiler manual, our ABI. + First 3 words are in registers. So, if an argument is larger + than the registers available, it will span the register and + stack. */ + +static int +bfin_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode, + tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + int bytes + = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); + int bytes_left = get_cumulative_args (cum)->nregs * UNITS_PER_WORD; + + if (bytes == -1) + return 0; + + if (bytes_left == 0) + return 0; + if (bytes > bytes_left) + return bytes_left; + return 0; +} + +/* Variable sized types are passed by reference. */ + +static bool +bfin_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; +} + +/* Decide whether a type should be returned in memory (true) + or in a register (false). This is called by the macro + TARGET_RETURN_IN_MEMORY. */ + +static bool +bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + int size = int_size_in_bytes (type); + return size > 2 * UNITS_PER_WORD || size == -1; +} + +/* Register in which address to store a structure value + is passed to a function. */ +static rtx +bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, REG_P0); +} + +/* Return true when register may be used to pass function parameters. */ + +bool +function_arg_regno_p (int n) +{ + int i; + for (i = 0; arg_regs[i] != -1; i++) + if (n == arg_regs[i]) + return true; + return false; +} + +/* Returns 1 if OP contains a symbol reference */ + +int +symbolic_reference_mentioned_p (rtx op) +{ + register const char *fmt; + register int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return 1; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return 1; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return 1; + } + + return 0; +} + +/* Decide whether we can make a sibling call to a function. DECL is the + declaration of the function being targeted by the call and EXP is the + CALL_EXPR representing the call. */ + +static bool +bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, + tree exp ATTRIBUTE_UNUSED) +{ + struct cgraph_local_info *this_func, *called_func; + e_funkind fkind = funkind (TREE_TYPE (current_function_decl)); + if (fkind != SUBROUTINE) + return false; + if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA) + return true; + + /* When compiling for ID shared libraries, can't sibcall a local function + from a non-local function, because the local function thinks it does + not need to reload P5 in the prologue, but the sibcall wil pop P5 in the + sibcall epilogue, and we end up with the wrong value in P5. */ + + if (!decl) + /* Not enough information. */ + return false; + + this_func = cgraph_local_info (current_function_decl); + called_func = cgraph_local_info (decl); + if (!called_func) + return false; + return !called_func->local || this_func->local; +} + +/* Write a template for a trampoline to F. */ + +static void +bfin_asm_trampoline_template (FILE *f) +{ + if (TARGET_FDPIC) + { + fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */ + fprintf (f, "\t.dd\t0x00000000\n"); /* 0 */ + fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */ + fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */ + fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */ + fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */ + fprintf (f, "\t.dw\t0xac4b\n"); /* p3 = [p1 + 4] */ + fprintf (f, "\t.dw\t0x9149\n"); /* p1 = [p1] */ + fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/ + } + else + { + fprintf (f, "\t.dd\t0x0000e109\n"); /* p1.l = fn low */ + fprintf (f, "\t.dd\t0x0000e149\n"); /* p1.h = fn high */ + fprintf (f, "\t.dd\t0x0000e10a\n"); /* p2.l = sc low */ + fprintf (f, "\t.dd\t0x0000e14a\n"); /* p2.h = sc high */ + fprintf (f, "\t.dw\t0x0051\n"); /* jump (p1)*/ + } +} + +/* Emit RTL insns to initialize the variable parts of a trampoline at + M_TRAMP. FNDECL is the target function. CHAIN_VALUE is an RTX for + the static chain value for the function. */ + +static void +bfin_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx t1 = copy_to_reg (XEXP (DECL_RTL (fndecl), 0)); + rtx t2 = copy_to_reg (chain_value); + rtx mem; + int i = 0; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + if (TARGET_FDPIC) + { + rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0), 8)); + mem = adjust_address (m_tramp, Pmode, 0); + emit_move_insn (mem, a); + i = 8; + } + + mem = adjust_address (m_tramp, HImode, i + 2); + emit_move_insn (mem, gen_lowpart (HImode, t1)); + emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16))); + mem = adjust_address (m_tramp, HImode, i + 6); + emit_move_insn (mem, gen_lowpart (HImode, t1)); + + mem = adjust_address (m_tramp, HImode, i + 10); + emit_move_insn (mem, gen_lowpart (HImode, t2)); + emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16))); + mem = adjust_address (m_tramp, HImode, i + 14); + emit_move_insn (mem, gen_lowpart (HImode, t2)); +} + +/* Emit insns to move operands[1] into operands[0]. */ + +void +emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode); + + gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed)); + if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + else + operands[1] = legitimize_pic_address (operands[1], temp, + TARGET_FDPIC ? OUR_FDPIC_REG + : pic_offset_table_rtx); +} + +/* Expand a move operation in mode MODE. The operands are in OPERANDS. + Returns true if no further code must be generated, false if the caller + should generate an insn to move OPERANDS[1] to OPERANDS[0]. */ + +bool +expand_move (rtx *operands, enum machine_mode mode) +{ + rtx op = operands[1]; + if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC) + && SYMBOLIC_CONST (op)) + emit_pic_move (operands, mode); + else if (mode == SImode && GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + && !targetm.legitimate_constant_p (mode, op)) + { + rtx dest = operands[0]; + rtx op0, op1; + gcc_assert (!reload_in_progress && !reload_completed); + op = XEXP (op, 0); + op0 = force_reg (mode, XEXP (op, 0)); + op1 = XEXP (op, 1); + if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode)) + op1 = force_reg (mode, op1); + if (GET_CODE (dest) == MEM) + dest = gen_reg_rtx (mode); + emit_insn (gen_addsi3 (dest, op0, op1)); + if (dest == operands[0]) + return true; + operands[1] = dest; + } + /* Don't generate memory->memory or constant->memory moves, go through a + register */ + else if ((reload_in_progress | reload_completed) == 0 + && GET_CODE (operands[0]) == MEM + && GET_CODE (operands[1]) != REG) + operands[1] = force_reg (mode, operands[1]); + return false; +} + +/* Split one or more DImode RTL references into pairs of SImode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) +{ + while (num--) + { + rtx op = operands[num]; + + /* simplify_subreg refuse to split volatile memory addresses, + but we still have to handle it. */ + if (GET_CODE (op) == MEM) + { + lo_half[num] = adjust_address (op, SImode, 0); + hi_half[num] = adjust_address (op, SImode, 4); + } + else + { + lo_half[num] = simplify_gen_subreg (SImode, op, + GET_MODE (op) == VOIDmode + ? DImode : GET_MODE (op), 0); + hi_half[num] = simplify_gen_subreg (SImode, op, + GET_MODE (op) == VOIDmode + ? DImode : GET_MODE (op), 4); + } + } +} + +bool +bfin_longcall_p (rtx op, int call_cookie) +{ + gcc_assert (GET_CODE (op) == SYMBOL_REF); + if (SYMBOL_REF_WEAK (op)) + return 1; + if (call_cookie & CALL_SHORT) + return 0; + if (call_cookie & CALL_LONG) + return 1; + if (TARGET_LONG_CALLS) + return 1; + return 0; +} + +/* Expand a call instruction. FNADDR is the call target, RETVAL the return value. + COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args. + SIBCALL is nonzero if this is a sibling call. */ + +void +bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall) +{ + rtx use = NULL, call; + rtx callee = XEXP (fnaddr, 0); + int nelts = 3; + rtx pat; + rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO); + rtx retsreg = gen_rtx_REG (Pmode, REG_RETS); + int n; + + /* In an untyped call, we can get NULL for operand 2. */ + if (cookie == NULL_RTX) + cookie = const0_rtx; + + /* Static functions and indirect calls don't need the pic register. */ + if (!TARGET_FDPIC && flag_pic + && GET_CODE (callee) == SYMBOL_REF + && !SYMBOL_REF_LOCAL_P (callee)) + use_reg (&use, pic_offset_table_rtx); + + if (TARGET_FDPIC) + { + int caller_in_sram, callee_in_sram; + + /* 0 is not in sram, 1 is in L1 sram, 2 is in L2 sram. */ + caller_in_sram = callee_in_sram = 0; + + if (lookup_attribute ("l1_text", + DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE) + caller_in_sram = 1; + else if (lookup_attribute ("l2", + DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE) + caller_in_sram = 2; + + if (GET_CODE (callee) == SYMBOL_REF + && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee))) + { + if (lookup_attribute + ("l1_text", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE) + callee_in_sram = 1; + else if (lookup_attribute + ("l2", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE) + callee_in_sram = 2; + } + + if (GET_CODE (callee) != SYMBOL_REF + || bfin_longcall_p (callee, INTVAL (cookie)) + || (GET_CODE (callee) == SYMBOL_REF + && !SYMBOL_REF_LOCAL_P (callee) + && TARGET_INLINE_PLT) + || caller_in_sram != callee_in_sram + || (caller_in_sram && callee_in_sram + && (GET_CODE (callee) != SYMBOL_REF + || !SYMBOL_REF_LOCAL_P (callee)))) + { + rtx addr = callee; + if (! address_operand (addr, Pmode)) + addr = force_reg (Pmode, addr); + + fnaddr = gen_reg_rtx (SImode); + emit_insn (gen_load_funcdescsi (fnaddr, addr)); + fnaddr = gen_rtx_MEM (Pmode, fnaddr); + + picreg = gen_reg_rtx (SImode); + emit_insn (gen_load_funcdescsi (picreg, + plus_constant (Pmode, addr, 4))); + } + + nelts++; + } + else if ((!register_no_elim_operand (callee, Pmode) + && GET_CODE (callee) != SYMBOL_REF) + || (GET_CODE (callee) == SYMBOL_REF + && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY) + || bfin_longcall_p (callee, INTVAL (cookie))))) + { + callee = copy_to_mode_reg (Pmode, callee); + fnaddr = gen_rtx_MEM (Pmode, callee); + } + call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); + + if (retval) + call = gen_rtx_SET (VOIDmode, retval, call); + + pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts)); + n = 0; + XVECEXP (pat, 0, n++) = call; + if (TARGET_FDPIC) + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg); + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie); + if (sibcall) + XVECEXP (pat, 0, n++) = ret_rtx; + else + XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg); + call = emit_call_insn (pat); + if (use) + CALL_INSN_FUNCTION_USAGE (call) = use; +} + +/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ + +int +hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + /* Allow only dregs to store value of mode HI or QI */ + enum reg_class rclass = REGNO_REG_CLASS (regno); + + if (mode == CCmode) + return 0; + + if (mode == V2HImode) + return D_REGNO_P (regno); + if (rclass == CCREGS) + return mode == BImode; + if (mode == PDImode || mode == V2PDImode) + return regno == REG_A0 || regno == REG_A1; + + /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes + up with a bad register class (such as ALL_REGS) for DImode. */ + if (mode == DImode) + return regno < REG_M3; + + if (mode == SImode + && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno)) + return 1; + + return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno); +} + +/* Implements target hook vector_mode_supported_p. */ + +static bool +bfin_vector_mode_supported_p (enum machine_mode mode) +{ + return mode == V2HImode; +} + +/* Worker function for TARGET_REGISTER_MOVE_COST. */ + +static int +bfin_register_move_cost (enum machine_mode mode, + reg_class_t class1, reg_class_t class2) +{ + /* These need secondary reloads, so they're more expensive. */ + if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS)) + || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS))) + return 4; + + /* If optimizing for size, always prefer reg-reg over reg-memory moves. */ + if (optimize_size) + return 2; + + if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* Discourage trying to use the accumulators. */ + if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0) + || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1) + || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0) + || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1)) + return 20; + } + return 2; +} + +/* Worker function for TARGET_MEMORY_MOVE_COST. + + ??? In theory L1 memory has single-cycle latency. We should add a switch + that tells the compiler whether we expect to use only L1 memory for the + program; it'll make the costs more accurate. */ + +static int +bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass, + bool in ATTRIBUTE_UNUSED) +{ + /* Make memory accesses slightly more expensive than any register-register + move. Also, penalize non-DP registers, since they need secondary + reloads to load and store. */ + if (! reg_class_subset_p (rclass, DPREGS)) + return 10; + + return 8; +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch register. Return the class needed for the + scratch register. */ + +static reg_class_t +bfin_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, + enum machine_mode mode, secondary_reload_info *sri) +{ + /* If we have HImode or QImode, we can only use DREGS as secondary registers; + in most other cases we can also use PREGS. */ + enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS; + enum reg_class x_class = NO_REGS; + enum rtx_code code = GET_CODE (x); + enum reg_class rclass = (enum reg_class) rclass_i; + + if (code == SUBREG) + x = SUBREG_REG (x), code = GET_CODE (x); + if (REG_P (x)) + { + int regno = REGNO (x); + if (regno >= FIRST_PSEUDO_REGISTER) + regno = reg_renumber[regno]; + + if (regno == -1) + code = MEM; + else + x_class = REGNO_REG_CLASS (regno); + } + + /* We can be asked to reload (plus (FP) (large_constant)) into a DREG. + This happens as a side effect of register elimination, and we need + a scratch register to do it. */ + if (fp_plus_const_operand (x, mode)) + { + rtx op2 = XEXP (x, 1); + int large_constant_p = ! satisfies_constraint_Ks7 (op2); + + if (rclass == PREGS || rclass == PREGS_CLOBBERED) + return NO_REGS; + /* If destination is a DREG, we can do this without a scratch register + if the constant is valid for an add instruction. */ + if ((rclass == DREGS || rclass == DPREGS) + && ! large_constant_p) + return NO_REGS; + /* Reloading to anything other than a DREG? Use a PREG scratch + register. */ + sri->icode = CODE_FOR_reload_insi; + return NO_REGS; + } + + /* Data can usually be moved freely between registers of most classes. + AREGS are an exception; they can only move to or from another register + in AREGS or one in DREGS. They can also be assigned the constant 0. */ + if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS) + return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS + || rclass == ODD_AREGS + ? NO_REGS : DREGS); + + if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS) + { + if (code == MEM) + { + sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi; + return NO_REGS; + } + + if (x != const0_rtx && x_class != DREGS) + { + return DREGS; + } + else + return NO_REGS; + } + + /* CCREGS can only be moved from/to DREGS. */ + if (rclass == CCREGS && x_class != DREGS) + return DREGS; + if (x_class == CCREGS && rclass != DREGS) + return DREGS; + + /* All registers other than AREGS can load arbitrary constants. The only + case that remains is MEM. */ + if (code == MEM) + if (! reg_class_subset_p (rclass, default_class)) + return default_class; + + return NO_REGS; +} + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ + +static bool +bfin_class_likely_spilled_p (reg_class_t rclass) +{ + switch (rclass) + { + case PREGS_CLOBBERED: + case PROLOGUE_REGS: + case P0REGS: + case D0REGS: + case D1REGS: + case D2REGS: + case CCREGS: + return true; + + default: + break; + } + + return false; +} + +static struct machine_function * +bfin_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Implement the TARGET_OPTION_OVERRIDE hook. */ + +static void +bfin_option_override (void) +{ + /* If processor type is not specified, enable all workarounds. */ + if (bfin_cpu_type == BFIN_CPU_UNKNOWN) + { + int i; + + for (i = 0; bfin_cpus[i].name != NULL; i++) + bfin_workarounds |= bfin_cpus[i].workarounds; + + bfin_si_revision = 0xffff; + } + + if (bfin_csync_anomaly == 1) + bfin_workarounds |= WA_SPECULATIVE_SYNCS; + else if (bfin_csync_anomaly == 0) + bfin_workarounds &= ~WA_SPECULATIVE_SYNCS; + + if (bfin_specld_anomaly == 1) + bfin_workarounds |= WA_SPECULATIVE_LOADS; + else if (bfin_specld_anomaly == 0) + bfin_workarounds &= ~WA_SPECULATIVE_LOADS; + + if (TARGET_OMIT_LEAF_FRAME_POINTER) + flag_omit_frame_pointer = 1; + +#ifdef SUBTARGET_FDPIC_NOT_SUPPORTED + if (TARGET_FDPIC) + error ("-mfdpic is not supported, please use a bfin-linux-uclibc target"); +#endif + + /* Library identification */ + if (global_options_set.x_bfin_library_id && ! TARGET_ID_SHARED_LIBRARY) + error ("-mshared-library-id= specified without -mid-shared-library"); + + if (stack_limit_rtx && TARGET_FDPIC) + { + warning (0, "-fstack-limit- options are ignored with -mfdpic; use -mstack-check-l1"); + stack_limit_rtx = NULL_RTX; + } + + if (stack_limit_rtx && TARGET_STACK_CHECK_L1) + error ("can%'t use multiple stack checking methods together"); + + if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC) + error ("ID shared libraries and FD-PIC mode can%'t be used together"); + + /* Don't allow the user to specify -mid-shared-library and -msep-data + together, as it makes little sense from a user's point of view... */ + if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY) + error ("cannot specify both -msep-data and -mid-shared-library"); + /* ... internally, however, it's nearly the same. */ + if (TARGET_SEP_DATA) + target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY; + + if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0) + flag_pic = 1; + + /* There is no single unaligned SI op for PIC code. Sometimes we + need to use ".4byte" and sometimes we need to use ".picptr". + See bfin_assemble_integer for details. */ + if (TARGET_FDPIC) + targetm.asm_out.unaligned_op.si = 0; + + /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries, + since we don't support it and it'll just break. */ + if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY) + flag_pic = 0; + + if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561) + error ("-mmulticore can only be used with BF561"); + + if (TARGET_COREA && !TARGET_MULTICORE) + error ("-mcorea should be used with -mmulticore"); + + if (TARGET_COREB && !TARGET_MULTICORE) + error ("-mcoreb should be used with -mmulticore"); + + if (TARGET_COREA && TARGET_COREB) + error ("-mcorea and -mcoreb can%'t be used together"); + + flag_schedule_insns = 0; + + init_machine_status = bfin_init_machine_status; +} + +/* Return the destination address of BRANCH. + We need to use this instead of get_attr_length, because the + cbranch_with_nops pattern conservatively sets its length to 6, and + we still prefer to use shorter sequences. */ + +static int +branch_dest (rtx branch) +{ + rtx dest; + int dest_uid; + rtx pat = PATTERN (branch); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + dest = SET_SRC (pat); + if (GET_CODE (dest) == IF_THEN_ELSE) + dest = XEXP (dest, 1); + dest = XEXP (dest, 0); + dest_uid = INSN_UID (dest); + return INSN_ADDRESSES (dest_uid); +} + +/* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates + it's a branch that's predicted taken. */ + +static int +cbranch_predicted_taken_p (rtx insn) +{ + rtx x = find_reg_note (insn, REG_BR_PROB, 0); + + if (x) + { + int pred_val = XINT (x, 0); + + return pred_val >= REG_BR_PROB_BASE / 2; + } + + return 0; +} + +/* Templates for use by asm_conditional_branch. */ + +static const char *ccbranch_templates[][3] = { + { "if !cc jump %3;", "if cc jump 4 (bp); jump.s %3;", "if cc jump 6 (bp); jump.l %3;" }, + { "if cc jump %3;", "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" }, + { "if !cc jump %3 (bp);", "if cc jump 4; jump.s %3;", "if cc jump 6; jump.l %3;" }, + { "if cc jump %3 (bp);", "if !cc jump 4; jump.s %3;", "if !cc jump 6; jump.l %3;" }, +}; + +/* Output INSN, which is a conditional branch instruction with operands + OPERANDS. + + We deal with the various forms of conditional branches that can be generated + by bfin_reorg to prevent the hardware from doing speculative loads, by + - emitting a sufficient number of nops, if N_NOPS is nonzero, or + - always emitting the branch as predicted taken, if PREDICT_TAKEN is true. + Either of these is only necessary if the branch is short, otherwise the + template we use ends in an unconditional jump which flushes the pipeline + anyway. */ + +void +asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken) +{ + int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); + /* Note : offset for instructions like if cc jmp; jump.[sl] offset + is to be taken from start of if cc rather than jump. + Range for jump.s is (-4094, 4096) instead of (-4096, 4094) + */ + int len = (offset >= -1024 && offset <= 1022 ? 0 + : offset >= -4094 && offset <= 4096 ? 1 + : 2); + int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn); + int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT); + output_asm_insn (ccbranch_templates[idx][len], operands); + gcc_assert (n_nops == 0 || !bp); + if (len == 0) + while (n_nops-- > 0) + output_asm_insn ("nop;", NULL); +} + +/* Emit rtl for a comparison operation CMP in mode MODE. Operands have been + stored in bfin_compare_op0 and bfin_compare_op1 already. */ + +rtx +bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED) +{ + enum rtx_code code1, code2; + rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1); + rtx tem = bfin_cc_rtx; + enum rtx_code code = GET_CODE (cmp); + + /* If we have a BImode input, then we already have a compare result, and + do not need to emit another comparison. */ + if (GET_MODE (op0) == BImode) + { + gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx); + tem = op0, code2 = code; + } + else + { + switch (code) { + /* bfin has these conditions */ + case EQ: + case LT: + case LE: + case LEU: + case LTU: + code1 = code; + code2 = NE; + break; + default: + code1 = reverse_condition (code); + code2 = EQ; + break; + } + emit_insn (gen_rtx_SET (VOIDmode, tem, + gen_rtx_fmt_ee (code1, BImode, op0, op1))); + } + + return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode)); +} + +/* Return nonzero iff C has exactly one bit set if it is interpreted + as a 32-bit constant. */ + +int +log2constp (unsigned HOST_WIDE_INT c) +{ + c &= 0xFFFFFFFF; + return c != 0 && (c & (c-1)) == 0; +} + +/* Returns the number of consecutive least significant zeros in the binary + representation of *V. + We modify *V to contain the original value arithmetically shifted right by + the number of zeroes. */ + +static int +shiftr_zero (HOST_WIDE_INT *v) +{ + unsigned HOST_WIDE_INT tmp = *v; + unsigned HOST_WIDE_INT sgn; + int n = 0; + + if (tmp == 0) + return 0; + + sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1)); + while ((tmp & 0x1) == 0 && n <= 32) + { + tmp = (tmp >> 1) | sgn; + n++; + } + *v = tmp; + return n; +} + +/* After reload, split the load of an immediate constant. OPERANDS are the + operands of the movsi_insn pattern which we are splitting. We return + nonzero if we emitted a sequence to load the constant, zero if we emitted + nothing because we want to use the splitter's default sequence. */ + +int +split_load_immediate (rtx operands[]) +{ + HOST_WIDE_INT val = INTVAL (operands[1]); + HOST_WIDE_INT tmp; + HOST_WIDE_INT shifted = val; + HOST_WIDE_INT shifted_compl = ~val; + int num_zero = shiftr_zero (&shifted); + int num_compl_zero = shiftr_zero (&shifted_compl); + unsigned int regno = REGNO (operands[0]); + + /* This case takes care of single-bit set/clear constants, which we could + also implement with BITSET/BITCLR. */ + if (num_zero + && shifted >= -32768 && shifted < 65536 + && (D_REGNO_P (regno) + || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2))) + { + emit_insn (gen_movsi (operands[0], GEN_INT (shifted))); + emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero))); + return 1; + } + + tmp = val & 0xFFFF; + tmp |= -(tmp & 0x8000); + + /* If high word has one bit set or clear, try to use a bit operation. */ + if (D_REGNO_P (regno)) + { + if (log2constp (val & 0xFFFF0000)) + { + emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF))); + emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000))); + return 1; + } + else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0) + { + emit_insn (gen_movsi (operands[0], GEN_INT (tmp))); + emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF))); + } + } + + if (D_REGNO_P (regno)) + { + if (tmp >= -64 && tmp <= 63) + { + emit_insn (gen_movsi (operands[0], GEN_INT (tmp))); + emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536))); + return 1; + } + + if ((val & 0xFFFF0000) == 0) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + emit_insn (gen_movsi_low (operands[0], operands[0], operands[1])); + return 1; + } + + if ((val & 0xFFFF0000) == 0xFFFF0000) + { + emit_insn (gen_movsi (operands[0], constm1_rtx)); + emit_insn (gen_movsi_low (operands[0], operands[0], operands[1])); + return 1; + } + } + + /* Need DREGs for the remaining case. */ + if (regno > REG_R7) + return 0; + + if (optimize_size + && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63) + { + /* If optimizing for size, generate a sequence that has more instructions + but is shorter. */ + emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl))); + emit_insn (gen_ashlsi3 (operands[0], operands[0], + GEN_INT (num_compl_zero))); + emit_insn (gen_one_cmplsi2 (operands[0], operands[0])); + return 1; + } + return 0; +} + +/* Return true if the legitimate memory address for a memory operand of mode + MODE. Return false if not. */ + +static bool +bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value) +{ + unsigned HOST_WIDE_INT v = value > 0 ? value : -value; + int sz = GET_MODE_SIZE (mode); + int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2; + /* The usual offsettable_memref machinery doesn't work so well for this + port, so we deal with the problem here. */ + if (value > 0 && sz == 8) + v += 4; + return (v & ~(0x7fff << shift)) == 0; +} + +static bool +bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode, + enum rtx_code outer_code) +{ + if (strict) + return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH); + else + return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH); +} + +/* Recognize an RTL expression that is a valid memory address for an + instruction. The MODE argument is the machine mode for the MEM expression + that wants to use this address. + + Blackfin addressing modes are as follows: + + [preg] + [preg + imm16] + + B [ Preg + uimm15 ] + W [ Preg + uimm16m2 ] + [ Preg + uimm17m4 ] + + [preg++] + [preg--] + [--sp] +*/ + +static bool +bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + switch (GET_CODE (x)) { + case REG: + if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM)) + return true; + break; + case PLUS: + if (REG_P (XEXP (x, 0)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS) + && ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode) + || (GET_CODE (XEXP (x, 1)) == CONST_INT + && bfin_valid_add (mode, INTVAL (XEXP (x, 1)))))) + return true; + break; + case POST_INC: + case POST_DEC: + if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode) + && REG_P (XEXP (x, 0)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC)) + return true; + case PRE_DEC: + if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode) + && XEXP (x, 0) == stack_pointer_rtx + && REG_P (XEXP (x, 0)) + && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC)) + return true; + break; + default: + break; + } + return false; +} + +/* Decide whether we can force certain constants to memory. If we + decide we can't, the caller should be able to cope with it in + another way. */ + +static bool +bfin_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED) +{ + /* We have only one class of non-legitimate constants, and our movsi + expander knows how to handle them. Dropping these constants into the + data section would only shift the problem - we'd still get relocs + outside the object, in the data section rather than the text section. */ + return true; +} + +/* Ensure that for any constant of the form symbol + offset, the offset + remains within the object. Any other constants are ok. + This ensures that flat binaries never have to deal with relocations + crossing section boundaries. */ + +static bool +bfin_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) +{ + rtx sym; + HOST_WIDE_INT offset; + + if (GET_CODE (x) != CONST) + return true; + + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS); + + sym = XEXP (x, 0); + x = XEXP (x, 1); + if (GET_CODE (sym) != SYMBOL_REF + || GET_CODE (x) != CONST_INT) + return true; + offset = INTVAL (x); + + if (SYMBOL_REF_DECL (sym) == 0) + return true; + if (offset < 0 + || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym)))) + return false; + + return true; +} + +static bool +bfin_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, + bool speed) +{ + enum rtx_code code = (enum rtx_code) code_i; + enum rtx_code outer_code = (enum rtx_code) outer_code_i; + int cost2 = COSTS_N_INSNS (1); + rtx op0, op1; + + switch (code) + { + case CONST_INT: + if (outer_code == SET || outer_code == PLUS) + *total = satisfies_constraint_Ks7 (x) ? 0 : cost2; + else if (outer_code == AND) + *total = log2constp (~INTVAL (x)) ? 0 : cost2; + else if (outer_code == LE || outer_code == LT || outer_code == EQ) + *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2; + else if (outer_code == LEU || outer_code == LTU) + *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2; + else if (outer_code == MULT) + *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2; + else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2)) + *total = 0; + else if (outer_code == ASHIFT || outer_code == ASHIFTRT + || outer_code == LSHIFTRT) + *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2; + else if (outer_code == IOR || outer_code == XOR) + *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2; + else + *total = cost2; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + case CONST_DOUBLE: + *total = COSTS_N_INSNS (2); + return true; + + case PLUS: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_MODE (x) == SImode) + { + if (GET_CODE (op0) == MULT + && GET_CODE (XEXP (op0, 1)) == CONST_INT) + { + HOST_WIDE_INT val = INTVAL (XEXP (op0, 1)); + if (val == 2 || val == 4) + { + *total = cost2; + *total += rtx_cost (XEXP (op0, 0), outer_code, opno, speed); + *total += rtx_cost (op1, outer_code, opno, speed); + return true; + } + } + *total = cost2; + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += set_src_cost (op0, speed); +#if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer + towards creating too many induction variables. */ + if (!reg_or_7bit_operand (op1, SImode)) + *total += set_src_cost (op1, speed); +#endif + } + else if (GET_MODE (x) == DImode) + { + *total = 6 * cost2; + if (GET_CODE (op1) != CONST_INT + || !satisfies_constraint_Ks7 (op1)) + *total += rtx_cost (op1, PLUS, 1, speed); + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, PLUS, 0, speed); + } + return true; + + case MINUS: + if (GET_MODE (x) == DImode) + *total = 6 * cost2; + else + *total = cost2; + return true; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (GET_MODE (x) == DImode) + *total = 6 * cost2; + else + *total = cost2; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, code, 0, speed); + + return true; + + case IOR: + case AND: + case XOR: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */ + if (code == IOR) + { + if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT) + || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND) + || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT) + || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT)) + { + *total = cost2; + return true; + } + } + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, code, 0, speed); + + if (GET_MODE (x) == DImode) + { + *total = 2 * cost2; + return true; + } + *total = cost2; + if (GET_MODE (x) != SImode) + return true; + + if (code == AND) + { + if (! rhs_andsi3_operand (XEXP (x, 1), SImode)) + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + } + else + { + if (! regorlog2_operand (XEXP (x, 1), SImode)) + *total += rtx_cost (XEXP (x, 1), code, 1, speed); + } + + return true; + + case ZERO_EXTRACT: + case SIGN_EXTRACT: + if (outer_code == SET + && XEXP (x, 1) == const1_rtx + && GET_CODE (XEXP (x, 2)) == CONST_INT) + { + *total = 2 * cost2; + return true; + } + /* fall through */ + + case SIGN_EXTEND: + case ZERO_EXTEND: + *total = cost2; + return true; + + case MULT: + { + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_CODE (op0) == GET_CODE (op1) + && (GET_CODE (op0) == ZERO_EXTEND + || GET_CODE (op0) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (1); + op0 = XEXP (op0, 0); + op1 = XEXP (op1, 0); + } + else if (!speed) + *total = COSTS_N_INSNS (1); + else + *total = COSTS_N_INSNS (3); + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, MULT, 0, speed); + if (GET_CODE (op1) != REG + && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG)) + *total += rtx_cost (op1, MULT, 1, speed); + } + return true; + + case UDIV: + case UMOD: + *total = COSTS_N_INSNS (32); + return true; + + case VEC_CONCAT: + case VEC_SELECT: + if (outer_code == SET) + *total = cost2; + return true; + + default: + return false; + } +} + +/* Used for communication between {push,pop}_multiple_operation (which + we use not only as a predicate) and the corresponding output functions. */ +static int first_preg_to_save, first_dreg_to_save; +static int n_regs_to_save; + +int +analyze_push_multiple_operation (rtx op) +{ + int lastdreg = 8, lastpreg = 6; + int i, group; + + first_preg_to_save = lastpreg; + first_dreg_to_save = lastdreg; + for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++) + { + rtx t = XVECEXP (op, 0, i); + rtx src, dest; + int regno; + + if (GET_CODE (t) != SET) + return 0; + + src = SET_SRC (t); + dest = SET_DEST (t); + if (GET_CODE (dest) != MEM || ! REG_P (src)) + return 0; + dest = XEXP (dest, 0); + if (GET_CODE (dest) != PLUS + || ! REG_P (XEXP (dest, 0)) + || REGNO (XEXP (dest, 0)) != REG_SP + || GET_CODE (XEXP (dest, 1)) != CONST_INT + || INTVAL (XEXP (dest, 1)) != -i * 4) + return 0; + + regno = REGNO (src); + if (group == 0) + { + if (D_REGNO_P (regno)) + { + group = 1; + first_dreg_to_save = lastdreg = regno - REG_R0; + } + else if (regno >= REG_P0 && regno <= REG_P7) + { + group = 2; + first_preg_to_save = lastpreg = regno - REG_P0; + } + else + return 0; + + continue; + } + + if (group == 1) + { + if (regno >= REG_P0 && regno <= REG_P7) + { + group = 2; + first_preg_to_save = lastpreg = regno - REG_P0; + } + else if (regno != REG_R0 + lastdreg + 1) + return 0; + else + lastdreg++; + } + else if (group == 2) + { + if (regno != REG_P0 + lastpreg + 1) + return 0; + lastpreg++; + } + } + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; + return 1; +} + +int +analyze_pop_multiple_operation (rtx op) +{ + int lastdreg = 8, lastpreg = 6; + int i, group; + + for (i = 1, group = 0; i < XVECLEN (op, 0); i++) + { + rtx t = XVECEXP (op, 0, i); + rtx src, dest; + int regno; + + if (GET_CODE (t) != SET) + return 0; + + src = SET_SRC (t); + dest = SET_DEST (t); + if (GET_CODE (src) != MEM || ! REG_P (dest)) + return 0; + src = XEXP (src, 0); + + if (i == 1) + { + if (! REG_P (src) || REGNO (src) != REG_SP) + return 0; + } + else if (GET_CODE (src) != PLUS + || ! REG_P (XEXP (src, 0)) + || REGNO (XEXP (src, 0)) != REG_SP + || GET_CODE (XEXP (src, 1)) != CONST_INT + || INTVAL (XEXP (src, 1)) != (i - 1) * 4) + return 0; + + regno = REGNO (dest); + if (group == 0) + { + if (regno == REG_R7) + { + group = 1; + lastdreg = 7; + } + else if (regno != REG_P0 + lastpreg - 1) + return 0; + else + lastpreg--; + } + else if (group == 1) + { + if (regno != REG_R0 + lastdreg - 1) + return 0; + else + lastdreg--; + } + } + first_dreg_to_save = lastdreg; + first_preg_to_save = lastpreg; + n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save; + return 1; +} + +/* Emit assembly code for one multi-register push described by INSN, with + operands in OPERANDS. */ + +void +output_push_multiple (rtx insn, rtx *operands) +{ + char buf[80]; + int ok; + + /* Validate the insn again, and compute first_[dp]reg_to_save. */ + ok = analyze_push_multiple_operation (PATTERN (insn)); + gcc_assert (ok); + + if (first_dreg_to_save == 8) + sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save); + else if (first_preg_to_save == 6) + sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save); + else + sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n", + first_dreg_to_save, first_preg_to_save); + + output_asm_insn (buf, operands); +} + +/* Emit assembly code for one multi-register pop described by INSN, with + operands in OPERANDS. */ + +void +output_pop_multiple (rtx insn, rtx *operands) +{ + char buf[80]; + int ok; + + /* Validate the insn again, and compute first_[dp]reg_to_save. */ + ok = analyze_pop_multiple_operation (PATTERN (insn)); + gcc_assert (ok); + + if (first_dreg_to_save == 8) + sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save); + else if (first_preg_to_save == 6) + sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save); + else + sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n", + first_dreg_to_save, first_preg_to_save); + + output_asm_insn (buf, operands); +} + +/* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */ + +static void +single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset) +{ + rtx scratch = gen_reg_rtx (mode); + rtx srcmem, dstmem; + + srcmem = adjust_address_nv (src, mode, offset); + dstmem = adjust_address_nv (dst, mode, offset); + emit_move_insn (scratch, srcmem); + emit_move_insn (dstmem, scratch); +} + +/* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with + alignment ALIGN_EXP. Return true if successful, false if we should fall + back on a different method. */ + +bool +bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) +{ + rtx srcreg, destreg, countreg; + HOST_WIDE_INT align = 0; + unsigned HOST_WIDE_INT count = 0; + + if (GET_CODE (align_exp) == CONST_INT) + align = INTVAL (align_exp); + if (GET_CODE (count_exp) == CONST_INT) + { + count = INTVAL (count_exp); +#if 0 + if (!TARGET_INLINE_ALL_STRINGOPS && count > 64) + return false; +#endif + } + + /* If optimizing for size, only do single copies inline. */ + if (optimize_size) + { + if (count == 2 && align < 2) + return false; + if (count == 4 && align < 4) + return false; + if (count != 1 && count != 2 && count != 4) + return false; + } + if (align < 2 && count != 1) + return false; + + destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); + if (destreg != XEXP (dst, 0)) + dst = replace_equiv_address_nv (dst, destreg); + srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); + if (srcreg != XEXP (src, 0)) + src = replace_equiv_address_nv (src, srcreg); + + if (count != 0 && align >= 2) + { + unsigned HOST_WIDE_INT offset = 0; + + if (align >= 4) + { + if ((count & ~3) == 4) + { + single_move_for_movmem (dst, src, SImode, offset); + offset = 4; + } + else if (count & ~3) + { + HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1; + countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count)); + + emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg)); + cfun->machine->has_loopreg_clobber = true; + } + if (count & 2) + { + single_move_for_movmem (dst, src, HImode, offset); + offset += 2; + } + } + else + { + if ((count & ~1) == 2) + { + single_move_for_movmem (dst, src, HImode, offset); + offset = 2; + } + else if (count & ~1) + { + HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1; + countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count)); + + emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg)); + cfun->machine->has_loopreg_clobber = true; + } + } + if (count & 1) + { + single_move_for_movmem (dst, src, QImode, offset); + } + return true; + } + return false; +} + +/* Compute the alignment for a local variable. + TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. */ + +unsigned +bfin_local_alignment (tree type, unsigned align) +{ + /* Increasing alignment for (relatively) big types allows the builtin + memcpy can use 32 bit loads/stores. */ + if (TYPE_SIZE (type) + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8 + || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32) + return 32; + return align; +} + +/* Implement TARGET_SCHED_ISSUE_RATE. */ + +static int +bfin_issue_rate (void) +{ + return 3; +} + +static int +bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type dep_insn_type; + int dep_insn_code_number; + + /* Anti and output dependencies have zero cost. */ + if (REG_NOTE_KIND (link) != 0) + return 0; + + dep_insn_code_number = recog_memoized (dep_insn); + + /* If we can't recognize the insns, we can't really do anything. */ + if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) + return cost; + + dep_insn_type = get_attr_type (dep_insn); + + if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD) + { + rtx pat = PATTERN (dep_insn); + rtx dest, src; + + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + dest = SET_DEST (pat); + src = SET_SRC (pat); + if (! ADDRESS_REGNO_P (REGNO (dest)) + || ! (MEM_P (src) || D_REGNO_P (REGNO (src)))) + return cost; + return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3); + } + + return cost; +} + +/* This function acts like NEXT_INSN, but is aware of three-insn bundles and + skips all subsequent parallel instructions if INSN is the start of such + a group. */ +static rtx +find_next_insn_start (rtx insn) +{ + if (GET_MODE (insn) == SImode) + { + while (GET_MODE (insn) != QImode) + insn = NEXT_INSN (insn); + } + return NEXT_INSN (insn); +} + +/* This function acts like PREV_INSN, but is aware of three-insn bundles and + skips all subsequent parallel instructions if INSN is the start of such + a group. */ +static rtx +find_prev_insn_start (rtx insn) +{ + insn = PREV_INSN (insn); + gcc_assert (GET_MODE (insn) != SImode); + if (GET_MODE (insn) == QImode) + { + while (GET_MODE (PREV_INSN (insn)) == SImode) + insn = PREV_INSN (insn); + } + return insn; +} + +/* Implement TARGET_CAN_USE_DOLOOP_P. */ + +static bool +bfin_can_use_doloop_p (double_int, double_int iterations_max, + unsigned int, bool) +{ + /* Due to limitations in the hardware (an initial loop count of 0 + does not loop 2^32 times) we must avoid to generate a hardware + loops when we cannot rule out this case. */ + if (!flag_unsafe_loop_optimizations + && (iterations_max.high != 0 + || iterations_max.low >= 0xFFFFFFFF)) + return false; + return true; +} + +/* Increment the counter for the number of loop instructions in the + current function. */ + +void +bfin_hardware_loop (void) +{ + cfun->machine->has_hardware_loops++; +} + +/* Maximum loop nesting depth. */ +#define MAX_LOOP_DEPTH 2 + +/* Maximum size of a loop. */ +#define MAX_LOOP_LENGTH 2042 + +/* Maximum distance of the LSETUP instruction from the loop start. */ +#define MAX_LSETUP_DISTANCE 30 + +/* Estimate the length of INSN conservatively. */ + +static int +length_for_loop (rtx insn) +{ + int length = 0; + if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size) + { + if (ENABLE_WA_SPECULATIVE_SYNCS) + length = 8; + else if (ENABLE_WA_SPECULATIVE_LOADS) + length = 6; + } + else if (LABEL_P (insn)) + { + if (ENABLE_WA_SPECULATIVE_SYNCS) + length = 4; + } + + if (NONDEBUG_INSN_P (insn)) + length += get_attr_length (insn); + + return length; +} + +/* Optimize LOOP. */ + +static bool +hwloop_optimize (hwloop_info loop) +{ + basic_block bb; + rtx insn, last_insn; + rtx loop_init, start_label, end_label; + rtx iter_reg, scratchreg, scratch_init, scratch_init_insn; + rtx lc_reg, lt_reg, lb_reg; + rtx seq, seq_end; + int length; + bool clobber0, clobber1; + + if (loop->depth > MAX_LOOP_DEPTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no); + return false; + } + + /* Get the loop iteration register. */ + iter_reg = loop->iter_reg; + + gcc_assert (REG_P (iter_reg)); + + scratchreg = NULL_RTX; + scratch_init = iter_reg; + scratch_init_insn = NULL_RTX; + if (!PREG_P (iter_reg) && loop->incoming_src) + { + basic_block bb_in = loop->incoming_src; + int i; + for (i = REG_P0; i <= REG_P5; i++) + if ((df_regs_ever_live_p (i) + || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE + && call_used_regs[i])) + && !REGNO_REG_SET_P (df_get_live_out (bb_in), i)) + { + scratchreg = gen_rtx_REG (SImode, i); + break; + } + for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in); + insn = PREV_INSN (insn)) + { + rtx set; + if (NOTE_P (insn) || BARRIER_P (insn)) + continue; + set = single_set (insn); + if (set && rtx_equal_p (SET_DEST (set), iter_reg)) + { + if (CONSTANT_P (SET_SRC (set))) + { + scratch_init = SET_SRC (set); + scratch_init_insn = insn; + } + break; + } + else if (reg_mentioned_p (iter_reg, PATTERN (insn))) + break; + } + } + + if (loop->incoming_src) + { + /* Make sure the predecessor is before the loop start label, as required by + the LSETUP instruction. */ + length = 0; + insn = BB_END (loop->incoming_src); + /* If we have to insert the LSETUP before a jump, count that jump in the + length. */ + if (vec_safe_length (loop->incoming) > 1 + || !(loop->incoming->last ()->flags & EDGE_FALLTHRU)) + { + gcc_assert (JUMP_P (insn)); + insn = PREV_INSN (insn); + } + + for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn)) + length += length_for_loop (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d lsetup not before loop_start\n", + loop->loop_no); + return false; + } + + /* Account for the pop of a scratch register where necessary. */ + if (!PREG_P (iter_reg) && scratchreg == NULL_RTX + && ENABLE_WA_LOAD_LCREGS) + length += 2; + + if (length > MAX_LSETUP_DISTANCE) + { + if (dump_file) + fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no); + return false; + } + } + + /* Check if start_label appears before loop_end and calculate the + offset between them. We calculate the length of instructions + conservatively. */ + length = 0; + for (insn = loop->start_label; + insn && insn != loop->loop_end; + insn = NEXT_INSN (insn)) + length += length_for_loop (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", + loop->loop_no); + return false; + } + + loop->length = length; + if (loop->length > MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + return false; + } + + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) + fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no); + return false; + } + + clobber0 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB0) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT0)); + clobber1 = (TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LB1) + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REG_LT1)); + if (clobber0 && clobber1) + { + if (dump_file) + fprintf (dump_file, ";; loop %d no loop reg available\n", + loop->loop_no); + return false; + } + + /* There should be an instruction before the loop_end instruction + in the same basic block. And the instruction must not be + - JUMP + - CONDITIONAL BRANCH + - CALL + - CSYNC + - SSYNC + - Returns (RTS, RTN, etc.) */ + + bb = loop->tail; + last_insn = find_prev_insn_start (loop->loop_end); + + while (1) + { + for (; last_insn != BB_HEAD (bb); + last_insn = find_prev_insn_start (last_insn)) + if (NONDEBUG_INSN_P (last_insn)) + break; + + if (last_insn != BB_HEAD (bb)) + break; + + if (single_pred_p (bb) + && single_pred_edge (bb)->flags & EDGE_FALLTHRU + && single_pred (bb) != ENTRY_BLOCK_PTR_FOR_FN (cfun)) + { + bb = single_pred (bb); + last_insn = BB_END (bb); + continue; + } + else + { + last_insn = NULL_RTX; + break; + } + } + + if (!last_insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has no last instruction\n", + loop->loop_no); + return false; + } + + if (JUMP_P (last_insn) && !any_condjump_p (last_insn)) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has bad last instruction\n", + loop->loop_no); + return false; + } + /* In all other cases, try to replace a bad last insn with a nop. */ + else if (JUMP_P (last_insn) + || CALL_P (last_insn) + || get_attr_type (last_insn) == TYPE_SYNC + || get_attr_type (last_insn) == TYPE_CALL + || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI + || recog_memoized (last_insn) == CODE_FOR_return_internal + || GET_CODE (PATTERN (last_insn)) == ASM_INPUT + || asm_noperands (PATTERN (last_insn)) >= 0) + { + if (loop->length + 2 > MAX_LOOP_LENGTH) + { + if (dump_file) + fprintf (dump_file, ";; loop %d too long\n", loop->loop_no); + return false; + } + if (dump_file) + fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n", + loop->loop_no); + + last_insn = emit_insn_after (gen_forced_nop (), last_insn); + } + + loop->last_insn = last_insn; + + /* The loop is good for replacement. */ + start_label = loop->start_label; + end_label = gen_label_rtx (); + iter_reg = loop->iter_reg; + + if (loop->depth == 1 && !clobber1) + { + lc_reg = gen_rtx_REG (SImode, REG_LC1); + lb_reg = gen_rtx_REG (SImode, REG_LB1); + lt_reg = gen_rtx_REG (SImode, REG_LT1); + SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC1); + } + else + { + lc_reg = gen_rtx_REG (SImode, REG_LC0); + lb_reg = gen_rtx_REG (SImode, REG_LB0); + lt_reg = gen_rtx_REG (SImode, REG_LT0); + SET_HARD_REG_BIT (loop->regs_set_in_loop, REG_LC0); + } + + loop->end_label = end_label; + + /* Create a sequence containing the loop setup. */ + start_sequence (); + + /* LSETUP only accepts P registers. If we have one, we can use it, + otherwise there are several ways of working around the problem. + If we're not affected by anomaly 312, we can load the LC register + from any iteration register, and use LSETUP without initialization. + If we've found a P scratch register that's not live here, we can + instead copy the iter_reg into that and use an initializing LSETUP. + If all else fails, push and pop P0 and use it as a scratch. */ + if (P_REGNO_P (REGNO (iter_reg))) + { + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, iter_reg); + seq_end = emit_insn (loop_init); + } + else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg)) + { + emit_insn (gen_movsi (lc_reg, iter_reg)); + loop_init = gen_lsetup_without_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg); + seq_end = emit_insn (loop_init); + } + else if (scratchreg != NULL_RTX) + { + emit_insn (gen_movsi (scratchreg, scratch_init)); + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, scratchreg); + seq_end = emit_insn (loop_init); + if (scratch_init_insn != NULL_RTX) + delete_insn (scratch_init_insn); + } + else + { + rtx p0reg = gen_rtx_REG (SImode, REG_P0); + rtx push = gen_frame_mem (SImode, + gen_rtx_PRE_DEC (SImode, stack_pointer_rtx)); + rtx pop = gen_frame_mem (SImode, + gen_rtx_POST_INC (SImode, stack_pointer_rtx)); + emit_insn (gen_movsi (push, p0reg)); + emit_insn (gen_movsi (p0reg, scratch_init)); + loop_init = gen_lsetup_with_autoinit (lt_reg, start_label, + lb_reg, end_label, + lc_reg, p0reg); + emit_insn (loop_init); + seq_end = emit_insn (gen_movsi (p0reg, pop)); + if (scratch_init_insn != NULL_RTX) + delete_insn (scratch_init_insn); + } + + if (dump_file) + { + fprintf (dump_file, ";; replacing loop %d initializer with\n", + loop->loop_no); + print_rtl_single (dump_file, loop_init); + fprintf (dump_file, ";; replacing loop %d terminator with\n", + loop->loop_no); + print_rtl_single (dump_file, loop->loop_end); + } + + /* If the loop isn't entered at the top, also create a jump to the entry + point. */ + if (!loop->incoming_src && loop->head != loop->incoming_dest) + { + rtx label = BB_HEAD (loop->incoming_dest); + /* If we're jumping to the final basic block in the loop, and there's + only one cheap instruction before the end (typically an increment of + an induction variable), we can just emit a copy here instead of a + jump. */ + if (loop->incoming_dest == loop->tail + && next_real_insn (label) == last_insn + && asm_noperands (last_insn) < 0 + && GET_CODE (PATTERN (last_insn)) == SET) + { + seq_end = emit_insn (copy_rtx (PATTERN (last_insn))); + } + else + { + emit_jump_insn (gen_jump (label)); + seq_end = emit_barrier (); + } + } + + seq = get_insns (); + end_sequence (); + + if (loop->incoming_src) + { + rtx prev = BB_END (loop->incoming_src); + if (vec_safe_length (loop->incoming) > 1 + || !(loop->incoming->last ()->flags & EDGE_FALLTHRU)) + { + gcc_assert (JUMP_P (prev)); + prev = PREV_INSN (prev); + } + emit_insn_after (seq, prev); + } + else + { + basic_block new_bb; + edge e; + edge_iterator ei; + +#ifdef ENABLE_CHECKING + if (loop->head != loop->incoming_dest) + { + /* We aren't entering the loop at the top. Since we've established + that the loop is entered only at one point, this means there + can't be fallthru edges into the head. Any such fallthru edges + would become invalid when we insert the new block, so verify + that this does not in fact happen. */ + FOR_EACH_EDGE (e, ei, loop->head->preds) + gcc_assert (!(e->flags & EDGE_FALLTHRU)); + } +#endif + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + + new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU) + || e->dest != loop->head) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + e = make_edge (new_bb, loop->head, 0); + } + + delete_insn (loop->loop_end); + /* Insert the loop end label before the last instruction of the loop. */ + emit_label_before (loop->end_label, loop->last_insn); + + return true; +} + +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the doloop_end pattern + into a subtract and a test. */ +static void +hwloop_fail (hwloop_info loop) +{ + rtx insn = loop->loop_end; + + if (DPREG_P (loop->iter_reg)) + { + /* If loop->iter_reg is a DREG or PREG, we can split it here + without scratch register. */ + rtx insn, test; + + emit_insn_before (gen_addsi3 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, + loop->iter_reg, const0_rtx, + loop->start_label), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); + } + else + { + splitting_loops = 1; + try_split (PATTERN (insn), insn, 1); + splitting_loops = 0; + } +} + +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a loop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ + +static rtx +hwloop_pattern_reg (rtx insn) +{ + rtx reg; + + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end) + return NULL_RTX; + + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; +} + +static struct hw_doloop_hooks bfin_doloop_hooks = +{ + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; + +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns + and tries to rewrite the RTL of these loops so that proper Blackfin + hardware loops are generated. */ + +static void +bfin_reorg_loops (void) +{ + reorg_loops (true, &bfin_doloop_hooks); +} + +/* Possibly generate a SEQUENCE out of three insns found in SLOT. + Returns true if we modified the insn chain, false otherwise. */ +static bool +gen_one_bundle (rtx slot[3]) +{ + gcc_assert (slot[1] != NULL_RTX); + + /* Don't add extra NOPs if optimizing for size. */ + if (optimize_size + && (slot[0] == NULL_RTX || slot[2] == NULL_RTX)) + return false; + + /* Verify that we really can do the multi-issue. */ + if (slot[0]) + { + rtx t = NEXT_INSN (slot[0]); + while (t != slot[1]) + { + if (! NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_DELETED) + return false; + t = NEXT_INSN (t); + } + } + if (slot[2]) + { + rtx t = NEXT_INSN (slot[1]); + while (t != slot[2]) + { + if (! NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_DELETED) + return false; + t = NEXT_INSN (t); + } + } + + if (slot[0] == NULL_RTX) + { + slot[0] = emit_insn_before (gen_mnop (), slot[1]); + df_insn_rescan (slot[0]); + } + if (slot[2] == NULL_RTX) + { + slot[2] = emit_insn_after (gen_forced_nop (), slot[1]); + df_insn_rescan (slot[2]); + } + + /* Avoid line number information being printed inside one bundle. */ + if (INSN_LOCATION (slot[1]) + && INSN_LOCATION (slot[1]) != INSN_LOCATION (slot[0])) + INSN_LOCATION (slot[1]) = INSN_LOCATION (slot[0]); + if (INSN_LOCATION (slot[2]) + && INSN_LOCATION (slot[2]) != INSN_LOCATION (slot[0])) + INSN_LOCATION (slot[2]) = INSN_LOCATION (slot[0]); + + /* Terminate them with "|| " instead of ";" in the output. */ + PUT_MODE (slot[0], SImode); + PUT_MODE (slot[1], SImode); + /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */ + PUT_MODE (slot[2], QImode); + return true; +} + +/* Go through all insns, and use the information generated during scheduling + to generate SEQUENCEs to represent bundles of instructions issued + simultaneously. */ + +static void +bfin_gen_bundles (void) +{ + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + rtx insn, next; + rtx slot[3]; + int n_filled = 0; + + slot[0] = slot[1] = slot[2] = NULL_RTX; + for (insn = BB_HEAD (bb);; insn = next) + { + int at_end; + rtx delete_this = NULL_RTX; + + if (NONDEBUG_INSN_P (insn)) + { + enum attr_type type = get_attr_type (insn); + + if (type == TYPE_STALL) + { + gcc_assert (n_filled == 0); + delete_this = insn; + } + else + { + if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM) + slot[0] = insn; + else if (slot[1] == NULL_RTX) + slot[1] = insn; + else + slot[2] = insn; + n_filled++; + } + } + + next = NEXT_INSN (insn); + while (next && insn != BB_END (bb) + && !(INSN_P (next) + && GET_CODE (PATTERN (next)) != USE + && GET_CODE (PATTERN (next)) != CLOBBER)) + { + insn = next; + next = NEXT_INSN (insn); + } + + /* BB_END can change due to emitting extra NOPs, so check here. */ + at_end = insn == BB_END (bb); + if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode)) + { + if ((n_filled < 2 + || !gen_one_bundle (slot)) + && slot[0] != NULL_RTX) + { + rtx pat = PATTERN (slot[0]); + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC + && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT) + { + SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0); + INSN_CODE (slot[0]) = -1; + df_insn_rescan (slot[0]); + } + } + n_filled = 0; + slot[0] = slot[1] = slot[2] = NULL_RTX; + } + if (delete_this != NULL_RTX) + delete_insn (delete_this); + if (at_end) + break; + } + } +} + +/* Ensure that no var tracking notes are emitted in the middle of a + three-instruction bundle. */ + +static void +reorder_var_tracking_notes (void) +{ + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + rtx insn, next; + rtx queue = NULL_RTX; + bool in_bundle = false; + + for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next) + { + next = NEXT_INSN (insn); + + if (INSN_P (insn)) + { + /* Emit queued up notes at the last instruction of a bundle. */ + if (GET_MODE (insn) == QImode) + { + while (queue) + { + rtx next_queue = PREV_INSN (queue); + PREV_INSN (NEXT_INSN (insn)) = queue; + NEXT_INSN (queue) = NEXT_INSN (insn); + NEXT_INSN (insn) = queue; + PREV_INSN (queue) = insn; + queue = next_queue; + } + in_bundle = false; + } + else if (GET_MODE (insn) == SImode) + in_bundle = true; + } + else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION) + { + if (in_bundle) + { + rtx prev = PREV_INSN (insn); + PREV_INSN (next) = prev; + NEXT_INSN (prev) = next; + + PREV_INSN (insn) = queue; + queue = insn; + } + } + } + } +} + +/* On some silicon revisions, functions shorter than a certain number of cycles + can cause unpredictable behaviour. Work around this by adding NOPs as + needed. */ +static void +workaround_rts_anomaly (void) +{ + rtx insn, first_insn = NULL_RTX; + int cycles = 4; + + if (! ENABLE_WA_RETS) + return; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (BARRIER_P (insn)) + return; + + if (NOTE_P (insn) || LABEL_P (insn)) + continue; + + if (JUMP_TABLE_DATA_P (insn)) + continue; + + if (first_insn == NULL_RTX) + first_insn = insn; + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT + || asm_noperands (pat) >= 0) + continue; + + if (CALL_P (insn)) + return; + + if (JUMP_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_return_internal) + break; + + /* Nothing to worry about for direct jumps. */ + if (!any_condjump_p (insn)) + return; + if (cycles <= 1) + return; + cycles--; + } + else if (INSN_P (insn)) + { + rtx pat = PATTERN (insn); + int this_cycles = 1; + + if (GET_CODE (pat) == PARALLEL) + { + if (analyze_push_multiple_operation (pat) + || analyze_pop_multiple_operation (pat)) + this_cycles = n_regs_to_save; + } + else + { + int icode = recog_memoized (insn); + + if (icode == CODE_FOR_link) + this_cycles = 4; + else if (icode == CODE_FOR_unlink) + this_cycles = 3; + else if (icode == CODE_FOR_mulsi3) + this_cycles = 5; + } + if (this_cycles >= cycles) + return; + + cycles -= this_cycles; + } + } + while (cycles > 0) + { + emit_insn_before (gen_nop (), first_insn); + cycles--; + } +} + +/* Return an insn type for INSN that can be used by the caller for anomaly + workarounds. This differs from plain get_attr_type in that it handles + SEQUENCEs. */ + +static enum attr_type +type_for_anomaly (rtx insn) +{ + rtx pat = PATTERN (insn); + if (GET_CODE (pat) == SEQUENCE) + { + enum attr_type t; + t = get_attr_type (XVECEXP (pat, 0, 1)); + if (t == TYPE_MCLD) + return t; + t = get_attr_type (XVECEXP (pat, 0, 2)); + if (t == TYPE_MCLD) + return t; + return TYPE_MCST; + } + else + return get_attr_type (insn); +} + +/* Return true iff the address found in MEM is based on the register + NP_REG and optionally has a positive offset. */ +static bool +harmless_null_pointer_p (rtx mem, int np_reg) +{ + mem = XEXP (mem, 0); + if (GET_CODE (mem) == POST_INC || GET_CODE (mem) == POST_DEC) + mem = XEXP (mem, 0); + if (REG_P (mem) && (int) REGNO (mem) == np_reg) + return true; + if (GET_CODE (mem) == PLUS + && REG_P (XEXP (mem, 0)) && (int) REGNO (XEXP (mem, 0)) == np_reg) + { + mem = XEXP (mem, 1); + if (GET_CODE (mem) == CONST_INT && INTVAL (mem) > 0) + return true; + } + return false; +} + +/* Return nonzero if INSN contains any loads that may trap. */ + +static bool +trapping_loads_p (rtx insn, int np_reg, bool after_np_branch) +{ + rtx mem = SET_SRC (single_set (insn)); + + if (!after_np_branch) + np_reg = -1; + return ((np_reg == -1 || !harmless_null_pointer_p (mem, np_reg)) + && may_trap_p (mem)); +} + +/* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of + a three-insn bundle, see if one of them is a load and return that if so. + Return NULL_RTX if the insn does not contain loads. */ +static rtx +find_load (rtx insn) +{ + if (!NONDEBUG_INSN_P (insn)) + return NULL_RTX; + if (get_attr_type (insn) == TYPE_MCLD) + return insn; + if (GET_MODE (insn) != SImode) + return NULL_RTX; + do { + insn = NEXT_INSN (insn); + if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode) + && get_attr_type (insn) == TYPE_MCLD) + return insn; + } while (GET_MODE (insn) != QImode); + return NULL_RTX; +} + +/* Determine whether PAT is an indirect call pattern. */ +static bool +indirect_call_p (rtx pat) +{ + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET) + pat = SET_SRC (pat); + gcc_assert (GET_CODE (pat) == CALL); + pat = XEXP (pat, 0); + gcc_assert (GET_CODE (pat) == MEM); + pat = XEXP (pat, 0); + + return REG_P (pat); +} + +/* During workaround_speculation, track whether we're in the shadow of a + conditional branch that tests a P register for NULL. If so, we can omit + emitting NOPs if we see a load from that P register, since a speculative + access at address 0 isn't a problem, and the load is executed in all other + cases anyway. + Global for communication with note_np_check_stores through note_stores. + */ +int np_check_regno = -1; +bool np_after_branch = false; + +/* Subroutine of workaround_speculation, called through note_stores. */ +static void +note_np_check_stores (rtx x, const_rtx pat ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + if (REG_P (x) && (REGNO (x) == REG_CC || (int) REGNO (x) == np_check_regno)) + np_check_regno = -1; +} + +static void +workaround_speculation (void) +{ + rtx insn, next; + rtx last_condjump = NULL_RTX; + int cycles_since_jump = INT_MAX; + int delay_added = 0; + + if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS + && ! ENABLE_WA_INDIRECT_CALLS) + return; + + /* First pass: find predicted-false branches; if something after them + needs nops, insert them or change the branch to predict true. */ + for (insn = get_insns (); insn; insn = next) + { + rtx pat; + int delay_needed = 0; + + next = find_next_insn_start (insn); + + if (NOTE_P (insn) || BARRIER_P (insn)) + continue; + if (JUMP_TABLE_DATA_P (insn)) + continue; + + if (LABEL_P (insn)) + { + np_check_regno = -1; + continue; + } + + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + + if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0) + { + np_check_regno = -1; + continue; + } + + if (JUMP_P (insn)) + { + /* Is this a condjump based on a null pointer comparison we saw + earlier? */ + if (np_check_regno != -1 + && recog_memoized (insn) == CODE_FOR_cbranchbi4) + { + rtx op = XEXP (SET_SRC (PATTERN (insn)), 0); + gcc_assert (GET_CODE (op) == EQ || GET_CODE (op) == NE); + if (GET_CODE (op) == NE) + np_after_branch = true; + } + if (any_condjump_p (insn) + && ! cbranch_predicted_taken_p (insn)) + { + last_condjump = insn; + delay_added = 0; + cycles_since_jump = 0; + } + else + cycles_since_jump = INT_MAX; + } + else if (CALL_P (insn)) + { + np_check_regno = -1; + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS) + { + delay_needed = 3; + } + } + else if (NONDEBUG_INSN_P (insn)) + { + rtx load_insn = find_load (insn); + enum attr_type type = type_for_anomaly (insn); + + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + + /* Detect a comparison of a P register with zero. If we later + see a condjump based on it, we have found a null pointer + check. */ + if (recog_memoized (insn) == CODE_FOR_compare_eq) + { + rtx src = SET_SRC (PATTERN (insn)); + if (REG_P (XEXP (src, 0)) + && P_REGNO_P (REGNO (XEXP (src, 0))) + && XEXP (src, 1) == const0_rtx) + { + np_check_regno = REGNO (XEXP (src, 0)); + np_after_branch = false; + } + else + np_check_regno = -1; + } + + if (load_insn && ENABLE_WA_SPECULATIVE_LOADS) + { + if (trapping_loads_p (load_insn, np_check_regno, + np_after_branch)) + delay_needed = 4; + } + else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS) + delay_needed = 3; + + /* See if we need to forget about a null pointer comparison + we found earlier. */ + if (recog_memoized (insn) != CODE_FOR_compare_eq) + { + note_stores (PATTERN (insn), note_np_check_stores, NULL); + if (np_check_regno != -1) + { + if (find_regno_note (insn, REG_INC, np_check_regno)) + np_check_regno = -1; + } + } + + } + + if (delay_needed > cycles_since_jump + && (delay_needed - cycles_since_jump) > delay_added) + { + rtx pat1; + int num_clobbers; + rtx *op = recog_data.operand; + + delay_needed -= cycles_since_jump; + + extract_insn (last_condjump); + if (optimize_size) + { + pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2], + op[3]); + cycles_since_jump = INT_MAX; + } + else + { + /* Do not adjust cycles_since_jump in this case, so that + we'll increase the number of NOPs for a subsequent insn + if necessary. */ + pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3], + GEN_INT (delay_needed)); + delay_added = delay_needed; + } + PATTERN (last_condjump) = pat1; + INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers); + } + if (CALL_P (insn)) + { + cycles_since_jump = INT_MAX; + delay_added = 0; + } + } + + /* Second pass: for predicted-true branches, see if anything at the + branch destination needs extra nops. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + int cycles_since_jump; + if (JUMP_P (insn) + && any_condjump_p (insn) + && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken + || cbranch_predicted_taken_p (insn))) + { + rtx target = JUMP_LABEL (insn); + rtx label = target; + rtx next_tgt; + + cycles_since_jump = 0; + for (; target && cycles_since_jump < 3; target = next_tgt) + { + rtx pat; + + next_tgt = find_next_insn_start (target); + + if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target)) + continue; + + if (JUMP_TABLE_DATA_P (target)) + continue; + + pat = PATTERN (target); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT + || asm_noperands (pat) >= 0) + continue; + + if (NONDEBUG_INSN_P (target)) + { + rtx load_insn = find_load (target); + enum attr_type type = type_for_anomaly (target); + int delay_needed = 0; + if (cycles_since_jump < INT_MAX) + cycles_since_jump++; + + if (load_insn && ENABLE_WA_SPECULATIVE_LOADS) + { + if (trapping_loads_p (load_insn, -1, false)) + delay_needed = 2; + } + else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS) + delay_needed = 2; + + if (delay_needed > cycles_since_jump) + { + rtx prev = prev_real_insn (label); + delay_needed -= cycles_since_jump; + if (dump_file) + fprintf (dump_file, "Adding %d nops after %d\n", + delay_needed, INSN_UID (label)); + if (JUMP_P (prev) + && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops) + { + rtx x; + HOST_WIDE_INT v; + + if (dump_file) + fprintf (dump_file, + "Reducing nops on insn %d.\n", + INSN_UID (prev)); + x = PATTERN (prev); + x = XVECEXP (x, 0, 1); + v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed; + XVECEXP (x, 0, 0) = GEN_INT (v); + } + while (delay_needed-- > 0) + emit_insn_after (gen_nop (), label); + break; + } + } + } + } + } +} + +/* Called just before the final scheduling pass. If we need to insert NOPs + later on to work around speculative loads, insert special placeholder + insns that cause loads to be delayed for as many cycles as necessary + (and possible). This reduces the number of NOPs we need to add. + The dummy insns we generate are later removed by bfin_gen_bundles. */ +static void +add_sched_insns_for_speculation (void) +{ + rtx insn; + + if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS + && ! ENABLE_WA_INDIRECT_CALLS) + return; + + /* First pass: find predicted-false branches; if something after them + needs nops, insert them or change the branch to predict true. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn)) + continue; + if (JUMP_TABLE_DATA_P (insn)) + continue; + + pat = PATTERN (insn); + if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER + || GET_CODE (pat) == ASM_INPUT + || asm_noperands (pat) >= 0) + continue; + + if (JUMP_P (insn)) + { + if (any_condjump_p (insn) + && !cbranch_predicted_taken_p (insn)) + { + rtx n = next_real_insn (insn); + emit_insn_before (gen_stall (GEN_INT (3)), n); + } + } + } + + /* Second pass: for predicted-true branches, see if anything at the + branch destination needs extra nops. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (JUMP_P (insn) + && any_condjump_p (insn) + && (cbranch_predicted_taken_p (insn))) + { + rtx target = JUMP_LABEL (insn); + rtx next = next_real_insn (target); + + if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE + && get_attr_type (next) == TYPE_STALL) + continue; + emit_insn_before (gen_stall (GEN_INT (1)), next); + } + } +} + +/* We use the machine specific reorg pass for emitting CSYNC instructions + after conditional branches as needed. + + The Blackfin is unusual in that a code sequence like + if cc jump label + r0 = (p0) + may speculatively perform the load even if the condition isn't true. This + happens for a branch that is predicted not taken, because the pipeline + isn't flushed or stalled, so the early stages of the following instructions, + which perform the memory reference, are allowed to execute before the + jump condition is evaluated. + Therefore, we must insert additional instructions in all places where this + could lead to incorrect behavior. The manual recommends CSYNC, while + VDSP seems to use NOPs (even though its corresponding compiler option is + named CSYNC). + + When optimizing for speed, we emit NOPs, which seems faster than a CSYNC. + When optimizing for size, we turn the branch into a predicted taken one. + This may be slower due to mispredicts, but saves code size. */ + +static void +bfin_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + if (flag_schedule_insns_after_reload) + { + splitting_for_sched = 1; + split_all_insns (); + splitting_for_sched = 0; + + add_sched_insns_for_speculation (); + + timevar_push (TV_SCHED2); + if (flag_selective_scheduling2 + && !maybe_skip_selective_scheduling ()) + run_selective_scheduling (); + else + schedule_insns (); + timevar_pop (TV_SCHED2); + + /* Examine the schedule and insert nops as necessary for 64-bit parallel + instructions. */ + bfin_gen_bundles (); + } + + df_analyze (); + + /* Doloop optimization */ + if (cfun->machine->has_hardware_loops) + bfin_reorg_loops (); + + workaround_speculation (); + + if (flag_var_tracking) + { + timevar_push (TV_VAR_TRACKING); + variable_tracking_main (); + reorder_var_tracking_notes (); + timevar_pop (TV_VAR_TRACKING); + } + + df_finish_pass (false); + + workaround_rts_anomaly (); +} + +/* Handle interrupt_handler, exception_handler and nmi_handler function + attributes; arguments as in struct attribute_spec.handler. */ + +static tree +handle_int_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree x = *node; + if (TREE_CODE (x) == FUNCTION_DECL) + x = TREE_TYPE (x); + + if (TREE_CODE (x) != FUNCTION_TYPE) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + else if (funkind (x) != SUBROUTINE) + error ("multiple function type attributes specified"); + + return NULL_TREE; +} + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible, and 2 if they are nearly compatible (which causes a + warning to be generated). */ + +static int +bfin_comp_type_attributes (const_tree type1, const_tree type2) +{ + e_funkind kind1, kind2; + + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + kind1 = funkind (type1); + kind2 = funkind (type2); + + if (kind1 != kind2) + return 0; + + /* Check for mismatched modifiers */ + if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2))) + return 0; + + if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2))) + return 0; + + if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2))) + return 0; + + if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1)) + != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2))) + return 0; + + return 1; +} + +/* Handle a "longcall" or "shortcall" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +bfin_handle_longcall_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0 + && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node))) + || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0 + && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node)))) + { + warning (OPT_Wattributes, + "can%'t apply both longcall and shortcall attributes to the same function"); + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle a "l1_text" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != FUNCTION_DECL) + { + error ("%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + /* The decl may have already been given a section attribute + from a previous declaration. Ensure they match. */ + else if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l1.text") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l1.text"); + + return NULL_TREE; +} + +/* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute; + arguments as in struct attribute_spec.handler. */ + +static tree +bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args), + int ARG_UNUSED (flags), bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) != VAR_DECL) + { + error ("%qE attribute only applies to variables", + name); + *no_add_attrs = true; + } + else if (current_function_decl != NULL_TREE + && !TREE_STATIC (decl)) + { + error ("%qE attribute cannot be specified for local variables", + name); + *no_add_attrs = true; + } + else + { + const char *section_name; + + if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0) + section_name = ".l1.data"; + else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0) + section_name = ".l1.data.A"; + else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0) + section_name = ".l1.data.B"; + else + gcc_unreachable (); + + /* The decl may have already been given a section attribute + from a previous declaration. Ensure they match. */ + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + section_name) != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) + = build_string (strlen (section_name) + 1, section_name); + } + + return NULL_TREE; +} + +/* Handle a "l2" attribute; arguments as in struct attribute_spec.handler. */ + +static tree +bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name), + tree ARG_UNUSED (args), int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + tree decl = *node; + + if (TREE_CODE (decl) == FUNCTION_DECL) + { + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l2.text") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l2.text"); + } + else if (TREE_CODE (decl) == VAR_DECL) + { + if (DECL_SECTION_NAME (decl) != NULL_TREE + && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".l2.data") != 0) + { + error ("section of %q+D conflicts with previous declaration", + decl); + *no_add_attrs = true; + } + else + DECL_SECTION_NAME (decl) = build_string (9, ".l2.data"); + } + + return NULL_TREE; +} + +/* Table of valid machine attributes. */ +static const struct attribute_spec bfin_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity } */ + { "interrupt_handler", 0, 0, false, true, true, handle_int_attribute, + false }, + { "exception_handler", 0, 0, false, true, true, handle_int_attribute, + false }, + { "nmi_handler", 0, 0, false, true, true, handle_int_attribute, false }, + { "nesting", 0, 0, false, true, true, NULL, false }, + { "kspisusp", 0, 0, false, true, true, NULL, false }, + { "saveall", 0, 0, false, true, true, NULL, false }, + { "longcall", 0, 0, false, true, true, bfin_handle_longcall_attribute, + false }, + { "shortcall", 0, 0, false, true, true, bfin_handle_longcall_attribute, + false }, + { "l1_text", 0, 0, true, false, false, bfin_handle_l1_text_attribute, + false }, + { "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute, + false }, + { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute, + false }, + { "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute, + false }, + { "l2", 0, 0, true, false, false, bfin_handle_l2_attribute, false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to + tell the assembler to generate pointers to function descriptors in + some cases. */ + +static bool +bfin_assemble_integer (rtx value, unsigned int size, int aligned_p) +{ + if (TARGET_FDPIC && size == UNITS_PER_WORD) + { + if (GET_CODE (value) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (value)) + { + fputs ("\t.picptr\tfuncdesc(", asm_out_file); + output_addr_const (asm_out_file, value); + fputs (")\n", asm_out_file); + return true; + } + if (!aligned_p) + { + /* We've set the unaligned SI op to NULL, so we always have to + handle the unaligned case here. */ + assemble_integer_with_op ("\t.4byte\t", value); + return true; + } + } + return default_assemble_integer (value, size, aligned_p); +} + +/* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ + +static void +bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, + tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, tree function) +{ + rtx xops[3]; + /* The this parameter is passed as the first argument. */ + rtx this_rtx = gen_rtx_REG (Pmode, REG_R0); + + /* Adjust the this parameter by a fixed constant. */ + if (delta) + { + xops[1] = this_rtx; + if (delta >= -64 && delta <= 63) + { + xops[0] = GEN_INT (delta); + output_asm_insn ("%1 += %0;", xops); + } + else if (delta >= -128 && delta < -64) + { + xops[0] = GEN_INT (delta + 64); + output_asm_insn ("%1 += -64; %1 += %0;", xops); + } + else if (delta > 63 && delta <= 126) + { + xops[0] = GEN_INT (delta - 63); + output_asm_insn ("%1 += 63; %1 += %0;", xops); + } + else + { + xops[0] = GEN_INT (delta); + output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops); + } + } + + /* Adjust the this parameter by a value stored in the vtable. */ + if (vcall_offset) + { + rtx p2tmp = gen_rtx_REG (Pmode, REG_P2); + rtx tmp = gen_rtx_REG (Pmode, REG_R3); + + xops[1] = tmp; + xops[2] = p2tmp; + output_asm_insn ("%2 = r0; %2 = [%2];", xops); + + /* Adjust the this parameter. */ + xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, p2tmp, + vcall_offset)); + if (!memory_operand (xops[0], Pmode)) + { + rtx tmp2 = gen_rtx_REG (Pmode, REG_P1); + xops[0] = GEN_INT (vcall_offset); + xops[1] = tmp2; + output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops); + xops[0] = gen_rtx_MEM (Pmode, p2tmp); + } + xops[2] = this_rtx; + output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops); + } + + xops[0] = XEXP (DECL_RTL (function), 0); + if (1 || !flag_pic || (*targetm.binds_local_p) (function)) + output_asm_insn ("jump.l\t%P0", xops); +} + +/* Codes for all the Blackfin builtins. */ +enum bfin_builtins +{ + BFIN_BUILTIN_CSYNC, + BFIN_BUILTIN_SSYNC, + BFIN_BUILTIN_ONES, + BFIN_BUILTIN_COMPOSE_2X16, + BFIN_BUILTIN_EXTRACTLO, + BFIN_BUILTIN_EXTRACTHI, + + BFIN_BUILTIN_SSADD_2X16, + BFIN_BUILTIN_SSSUB_2X16, + BFIN_BUILTIN_SSADDSUB_2X16, + BFIN_BUILTIN_SSSUBADD_2X16, + BFIN_BUILTIN_MULT_2X16, + BFIN_BUILTIN_MULTR_2X16, + BFIN_BUILTIN_NEG_2X16, + BFIN_BUILTIN_ABS_2X16, + BFIN_BUILTIN_MIN_2X16, + BFIN_BUILTIN_MAX_2X16, + + BFIN_BUILTIN_SSADD_1X16, + BFIN_BUILTIN_SSSUB_1X16, + BFIN_BUILTIN_MULT_1X16, + BFIN_BUILTIN_MULTR_1X16, + BFIN_BUILTIN_NORM_1X16, + BFIN_BUILTIN_NEG_1X16, + BFIN_BUILTIN_ABS_1X16, + BFIN_BUILTIN_MIN_1X16, + BFIN_BUILTIN_MAX_1X16, + + BFIN_BUILTIN_SUM_2X16, + BFIN_BUILTIN_DIFFHL_2X16, + BFIN_BUILTIN_DIFFLH_2X16, + + BFIN_BUILTIN_SSADD_1X32, + BFIN_BUILTIN_SSSUB_1X32, + BFIN_BUILTIN_NORM_1X32, + BFIN_BUILTIN_ROUND_1X32, + BFIN_BUILTIN_NEG_1X32, + BFIN_BUILTIN_ABS_1X32, + BFIN_BUILTIN_MIN_1X32, + BFIN_BUILTIN_MAX_1X32, + BFIN_BUILTIN_MULT_1X32, + BFIN_BUILTIN_MULT_1X32X32, + BFIN_BUILTIN_MULT_1X32X32NS, + + BFIN_BUILTIN_MULHISILL, + BFIN_BUILTIN_MULHISILH, + BFIN_BUILTIN_MULHISIHL, + BFIN_BUILTIN_MULHISIHH, + + BFIN_BUILTIN_LSHIFT_1X16, + BFIN_BUILTIN_LSHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X16, + BFIN_BUILTIN_SSASHIFT_2X16, + BFIN_BUILTIN_SSASHIFT_1X32, + + BFIN_BUILTIN_CPLX_MUL_16, + BFIN_BUILTIN_CPLX_MAC_16, + BFIN_BUILTIN_CPLX_MSU_16, + + BFIN_BUILTIN_CPLX_MUL_16_S40, + BFIN_BUILTIN_CPLX_MAC_16_S40, + BFIN_BUILTIN_CPLX_MSU_16_S40, + + BFIN_BUILTIN_CPLX_SQU, + + BFIN_BUILTIN_LOADBYTES, + + BFIN_BUILTIN_MAX +}; + +#define def_builtin(NAME, TYPE, CODE) \ +do { \ + add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + NULL, NULL_TREE); \ +} while (0) + +/* Set up all builtin functions for this target. */ +static void +bfin_init_builtins (void) +{ + tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode); + tree void_ftype_void + = build_function_type_list (void_type_node, NULL_TREE); + tree short_ftype_short + = build_function_type_list (short_integer_type_node, short_integer_type_node, + NULL_TREE); + tree short_ftype_int_int + = build_function_type_list (short_integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_int_int + = build_function_type_list (integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_int + = build_function_type_list (integer_type_node, integer_type_node, + NULL_TREE); + tree short_ftype_int + = build_function_type_list (short_integer_type_node, integer_type_node, + NULL_TREE); + tree int_ftype_v2hi_v2hi + = build_function_type_list (integer_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v2hi_ftype_int_int + = build_function_type_list (V2HI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_int + = build_function_type_list (V2HI_type_node, V2HI_type_node, + integer_type_node, NULL_TREE); + tree int_ftype_short_short + = build_function_type_list (integer_type_node, short_integer_type_node, + short_integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE); + tree short_ftype_v2hi + = build_function_type_list (short_integer_type_node, V2HI_type_node, + NULL_TREE); + tree int_ftype_pint + = build_function_type_list (integer_type_node, + build_pointer_type (integer_type_node), + NULL_TREE); + + /* Add the remaining MMX insns with somewhat more complicated types. */ + def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC); + def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC); + + def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES); + + def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int, + BFIN_BUILTIN_COMPOSE_2X16); + def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi, + BFIN_BUILTIN_EXTRACTHI); + def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi, + BFIN_BUILTIN_EXTRACTLO); + + def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MIN_2X16); + def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MAX_2X16); + + def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADD_2X16); + def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUB_2X16); + def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADDSUB_2X16); + def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUBADD_2X16); + def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULT_2X16); + def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULTR_2X16); + def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi, + BFIN_BUILTIN_NEG_2X16); + def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi, + BFIN_BUILTIN_ABS_2X16); + + def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MIN_1X16); + def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MAX_1X16); + + def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSADD_1X16); + def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSSUB_1X16); + def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MULT_1X16); + def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_MULTR_1X16); + def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short, + BFIN_BUILTIN_NEG_1X16); + def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short, + BFIN_BUILTIN_ABS_1X16); + def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int, + BFIN_BUILTIN_NORM_1X16); + + def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_SUM_2X16); + def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_DIFFHL_2X16); + def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi, + BFIN_BUILTIN_DIFFLH_2X16); + + def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISILL); + def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISIHL); + def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISILH); + def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi, + BFIN_BUILTIN_MULHISIHH); + + def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_MIN_1X32); + def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_MAX_1X32); + + def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSADD_1X32); + def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSSUB_1X32); + def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int, + BFIN_BUILTIN_NEG_1X32); + def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int, + BFIN_BUILTIN_ABS_1X32); + def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int, + BFIN_BUILTIN_NORM_1X32); + def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int, + BFIN_BUILTIN_ROUND_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short, + BFIN_BUILTIN_MULT_1X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32); + def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int, + BFIN_BUILTIN_MULT_1X32X32NS); + + /* Shifts. */ + def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X16); + def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int, + BFIN_BUILTIN_SSASHIFT_2X16); + def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int, + BFIN_BUILTIN_LSHIFT_1X16); + def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int, + BFIN_BUILTIN_LSHIFT_2X16); + def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int, + BFIN_BUILTIN_SSASHIFT_1X32); + + /* Complex numbers. */ + def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSADD_2X16); + def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_SSSUB_2X16); + def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MUL_16); + def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MAC_16); + def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MSU_16); + def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MUL_16_S40); + def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MAC_16_S40); + def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi, + BFIN_BUILTIN_CPLX_MSU_16_S40); + def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi, + BFIN_BUILTIN_CPLX_SQU); + + /* "Unaligned" load. */ + def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint, + BFIN_BUILTIN_LOADBYTES); + +} + + +struct builtin_description +{ + const enum insn_code icode; + const char *const name; + const enum bfin_builtins code; + int macflag; +}; + +static const struct builtin_description bdesc_2arg[] = +{ + { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 }, + + { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 }, + { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 }, + { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 }, + { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 }, + { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 }, + + { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 }, + { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 }, + { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 }, + { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 }, + + { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 }, + { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 }, + { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 }, + { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 }, + + { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 }, + { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 }, + { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 }, + { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 }, + { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 }, + { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 }, + + { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE }, + { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T }, + { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE }, + { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T }, + { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE }, + + { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 }, + { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 }, + { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 }, + { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 } + +}; + +static const struct builtin_description bdesc_1arg[] = +{ + { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 }, + + { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 }, + + { CODE_FOR_clrsbhi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 }, + { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 }, + { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 }, + + { CODE_FOR_clrsbsi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 }, + { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 }, + { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 }, + { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 }, + + { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 }, + { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 }, + { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 }, + { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 } +}; + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x != const0_rtx) + return x; + x = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (x, CONST0_RTX (SImode))); + return gen_lowpart (mode, x); +} + +/* Subroutine of bfin_expand_builtin to take care of binop insns. MACFLAG is -1 + if this is a normal binary op, or one of the MACFLAG_xxx constants. */ + +static rtx +bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target, + int macflag) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode op1mode = GET_MODE (op1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode) + { + op1mode = HImode; + op1 = gen_lowpart (HImode, op1); + } + /* In case the insn wants input operands in modes different from + the result, abort. */ + gcc_assert ((op0mode == mode0 || op0mode == VOIDmode) + && (op1mode == mode1 || op1mode == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + if (macflag == -1) + pat = GEN_FCN (icode) (target, op0, op1); + else + pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag)); + if (! pat) + return 0; + + emit_insn (pat); + return target; +} + +/* Subroutine of bfin_expand_builtin to take care of unop insns. */ + +static rtx +bfin_expand_unop_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (op0mode == SImode && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + gcc_assert (op0mode == mode0 || op0mode == VOIDmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + size_t i; + enum insn_code icode; + const struct builtin_description *d; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arg0, arg1, arg2; + rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg; + enum machine_mode tmode, mode0; + + switch (fcode) + { + case BFIN_BUILTIN_CSYNC: + emit_insn (gen_csync ()); + return 0; + case BFIN_BUILTIN_SSYNC: + emit_insn (gen_ssync ()); + return 0; + + case BFIN_BUILTIN_DIFFHL_2X16: + case BFIN_BUILTIN_DIFFLH_2X16: + case BFIN_BUILTIN_SUM_2X16: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3 + : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3 + : CODE_FOR_ssaddhilov2hi3); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case BFIN_BUILTIN_MULT_1X32X32: + case BFIN_BUILTIN_MULT_1X32X32NS: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + if (! target + || !register_operand (target, SImode)) + target = gen_reg_rtx (SImode); + if (! register_operand (op0, SImode)) + op0 = copy_to_mode_reg (SImode, op0); + if (! register_operand (op1, SImode)) + op1 = copy_to_mode_reg (SImode, op1); + + a1reg = gen_rtx_REG (PDImode, REG_A1); + a0reg = gen_rtx_REG (PDImode, REG_A0); + tmp1 = gen_lowpart (V2HImode, op0); + tmp2 = gen_lowpart (V2HImode, op1); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, op0), + gen_lowpart (HImode, op1), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + + if (fcode == BFIN_BUILTIN_MULT_1X32X32) + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + else + { + /* For saturating multiplication, there's exactly one special case + to be handled: multiplying the smallest negative value with + itself. Due to shift correction in fractional multiplies, this + can overflow. Iff this happens, OP2 will contain 1, which, when + added in 32 bits to the smallest negative, wraps to the largest + positive, which is the result we want. */ + op2 = gen_reg_rtx (V2HImode); + emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx)); + emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC), + gen_lowpart (SImode, op2))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2, + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_NONE), + GEN_INT (MACFLAG_M))); + op2 = gen_reg_rtx (SImode); + emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC))); + } + emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1, + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15))); + emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg)); + if (fcode == BFIN_BUILTIN_MULT_1X32X32NS) + emit_insn (gen_addsi3 (target, target, op2)); + return target; + + case BFIN_BUILTIN_CPLX_MUL_16: + case BFIN_BUILTIN_CPLX_MUL_16_S40: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_macv2hi_parts; + tmode = insn_data[icode].operand[0].mode; + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (tmode); + if (! register_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + if (! register_operand (op1, GET_MODE (op1))) + op1 = copy_to_mode_reg (GET_MODE (op1), op1); + + if (fcode == BFIN_BUILTIN_CPLX_MUL_16) + emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, GEN_INT (MACFLAG_W32))); + else + emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, GEN_INT (MACFLAG_NONE))); + emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx, + const1_rtx, const1_rtx, + const0_rtx, accvec, const1_rtx, const0_rtx, + GEN_INT (MACFLAG_NONE), accvec)); + + return target; + + case BFIN_BUILTIN_CPLX_MAC_16: + case BFIN_BUILTIN_CPLX_MSU_16: + case BFIN_BUILTIN_CPLX_MAC_16_S40: + case BFIN_BUILTIN_CPLX_MSU_16_S40: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_macv2hi_parts; + tmode = insn_data[icode].operand[0].mode; + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (tmode); + if (! register_operand (op1, GET_MODE (op1))) + op1 = copy_to_mode_reg (GET_MODE (op1), op1); + if (! register_operand (op2, GET_MODE (op2))) + op2 = copy_to_mode_reg (GET_MODE (op2), op2); + + tmp1 = gen_reg_rtx (SImode); + tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16))); + emit_move_insn (tmp2, gen_lowpart (SImode, op0)); + emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx)); + emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2)); + if (fcode == BFIN_BUILTIN_CPLX_MAC_16 + || fcode == BFIN_BUILTIN_CPLX_MSU_16) + emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, accvec, const0_rtx, + const0_rtx, + GEN_INT (MACFLAG_W32))); + else + emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx, + const0_rtx, const0_rtx, + const1_rtx, accvec, const0_rtx, + const0_rtx, + GEN_INT (MACFLAG_NONE))); + if (fcode == BFIN_BUILTIN_CPLX_MAC_16 + || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40) + { + tmp1 = const1_rtx; + tmp2 = const0_rtx; + } + else + { + tmp1 = const0_rtx; + tmp2 = const1_rtx; + } + emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx, + const1_rtx, const1_rtx, + const0_rtx, accvec, tmp1, tmp2, + GEN_INT (MACFLAG_NONE), accvec)); + + return target; + + case BFIN_BUILTIN_CPLX_SQU: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + accvec = gen_reg_rtx (V2PDImode); + icode = CODE_FOR_flag_mulv2hi; + tmp1 = gen_reg_rtx (V2HImode); + tmp2 = gen_reg_rtx (V2HImode); + + if (! target + || GET_MODE (target) != V2HImode + || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode)) + target = gen_reg_rtx (V2HImode); + if (! register_operand (op0, GET_MODE (op0))) + op0 = copy_to_mode_reg (GET_MODE (op0), op0); + + emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE))); + + emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0, + const0_rtx, const1_rtx, + GEN_INT (MACFLAG_NONE))); + + emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx, + const0_rtx)); + emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1, + const0_rtx, const1_rtx)); + + return target; + + default: + break; + } + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + return bfin_expand_binop_builtin (d->icode, exp, target, + d->macflag); + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == fcode) + return bfin_expand_unop_builtin (d->icode, exp, target); + + gcc_unreachable (); +} + +static void +bfin_conditional_register_usage (void) +{ + /* initialize condition code flag register rtx */ + bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC); + bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS); + if (TARGET_FDPIC) + call_used_regs[FDPIC_REGNO] = 1; + if (!TARGET_FDPIC && flag_pic) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } +} + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS bfin_init_builtins + +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN bfin_expand_builtin + +#undef TARGET_ASM_GLOBALIZE_LABEL +#define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START output_file_start + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE bfin_attribute_table + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS bfin_rtx_costs + +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST bfin_address_cost + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST bfin_register_move_cost + +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST bfin_memory_move_cost + +#undef TARGET_ASM_INTEGER +#define TARGET_ASM_INTEGER bfin_assemble_integer + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg + +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST bfin_adjust_cost + +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE bfin_issue_rate + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote + +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes + +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG bfin_function_arg + +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE bfin_function_arg_advance + +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx + +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE bfin_option_override + +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD bfin_secondary_reload + +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P bfin_class_likely_spilled_p + +#undef TARGET_DELEGITIMIZE_ADDRESS +#define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P bfin_legitimate_constant_p + +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem + +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY bfin_return_in_memory + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P bfin_legitimate_address_p + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE bfin_can_eliminate + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE bfin_conditional_register_usage + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE bfin_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT bfin_trampoline_init + +#undef TARGET_EXTRA_LIVE_ON_ENTRY +#define TARGET_EXTRA_LIVE_ON_ENTRY bfin_extra_live_on_entry + +/* Passes after sched2 can break the helpful TImode annotations that + haifa-sched puts on every insn. Just do scheduling in reorg. */ +#undef TARGET_DELAY_SCHED2 +#define TARGET_DELAY_SCHED2 true + +/* Variable tracking should be run after all optimizations which + change order of insns. It also needs a valid CFG. */ +#undef TARGET_DELAY_VARTRACK +#define TARGET_DELAY_VARTRACK true + +#undef TARGET_CAN_USE_DOLOOP_P +#define TARGET_CAN_USE_DOLOOP_P bfin_can_use_doloop_p + +struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc-4.9/gcc/config/bfin/bfin.h b/gcc-4.9/gcc/config/bfin/bfin.h new file mode 100644 index 000000000..d6f4c610d --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin.h @@ -0,0 +1,1156 @@ +/* Definitions for the Blackfin port. + Copyright (C) 2005-2014 Free Software Foundation, Inc. + Contributed by Analog Devices. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef _BFIN_CONFIG +#define _BFIN_CONFIG + +#ifndef BFIN_OPTS_H +#include "config/bfin/bfin-opts.h" +#endif + +#define OBJECT_FORMAT_ELF + +#define BRT 1 +#define BRF 0 + +/* Predefinition in the preprocessor for this target machine */ +#ifndef TARGET_CPU_CPP_BUILTINS +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("bfin"); \ + builtin_define_std ("BFIN"); \ + builtin_define ("__ADSPBLACKFIN__"); \ + builtin_define ("__ADSPLPBLACKFIN__"); \ + \ + switch (bfin_cpu_type) \ + { \ + case BFIN_CPU_BF512: \ + builtin_define ("__ADSPBF512__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF514: \ + builtin_define ("__ADSPBF514__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF516: \ + builtin_define ("__ADSPBF516__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF518: \ + builtin_define ("__ADSPBF518__"); \ + builtin_define ("__ADSPBF51x__"); \ + break; \ + case BFIN_CPU_BF522: \ + builtin_define ("__ADSPBF522__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF523: \ + builtin_define ("__ADSPBF523__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF524: \ + builtin_define ("__ADSPBF524__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF525: \ + builtin_define ("__ADSPBF525__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF526: \ + builtin_define ("__ADSPBF526__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF527: \ + builtin_define ("__ADSPBF527__"); \ + builtin_define ("__ADSPBF52x__"); \ + break; \ + case BFIN_CPU_BF531: \ + builtin_define ("__ADSPBF531__"); \ + break; \ + case BFIN_CPU_BF532: \ + builtin_define ("__ADSPBF532__"); \ + break; \ + case BFIN_CPU_BF533: \ + builtin_define ("__ADSPBF533__"); \ + break; \ + case BFIN_CPU_BF534: \ + builtin_define ("__ADSPBF534__"); \ + break; \ + case BFIN_CPU_BF536: \ + builtin_define ("__ADSPBF536__"); \ + break; \ + case BFIN_CPU_BF537: \ + builtin_define ("__ADSPBF537__"); \ + break; \ + case BFIN_CPU_BF538: \ + builtin_define ("__ADSPBF538__"); \ + break; \ + case BFIN_CPU_BF539: \ + builtin_define ("__ADSPBF539__"); \ + break; \ + case BFIN_CPU_BF542M: \ + builtin_define ("__ADSPBF542M__"); \ + case BFIN_CPU_BF542: \ + builtin_define ("__ADSPBF542__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF544M: \ + builtin_define ("__ADSPBF544M__"); \ + case BFIN_CPU_BF544: \ + builtin_define ("__ADSPBF544__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF547M: \ + builtin_define ("__ADSPBF547M__"); \ + case BFIN_CPU_BF547: \ + builtin_define ("__ADSPBF547__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF548M: \ + builtin_define ("__ADSPBF548M__"); \ + case BFIN_CPU_BF548: \ + builtin_define ("__ADSPBF548__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF549M: \ + builtin_define ("__ADSPBF549M__"); \ + case BFIN_CPU_BF549: \ + builtin_define ("__ADSPBF549__"); \ + builtin_define ("__ADSPBF54x__"); \ + break; \ + case BFIN_CPU_BF561: \ + builtin_define ("__ADSPBF561__"); \ + break; \ + case BFIN_CPU_BF592: \ + builtin_define ("__ADSPBF592__"); \ + builtin_define ("__ADSPBF59x__"); \ + break; \ + } \ + \ + if (bfin_si_revision != -1) \ + { \ + /* space of 0xnnnn and a NUL */ \ + char *buf = XALLOCAVEC (char, 7); \ + \ + sprintf (buf, "0x%04x", bfin_si_revision); \ + builtin_define_with_value ("__SILICON_REVISION__", buf, 0); \ + } \ + \ + if (bfin_workarounds) \ + builtin_define ("__WORKAROUNDS_ENABLED"); \ + if (ENABLE_WA_SPECULATIVE_LOADS) \ + builtin_define ("__WORKAROUND_SPECULATIVE_LOADS"); \ + if (ENABLE_WA_SPECULATIVE_SYNCS) \ + builtin_define ("__WORKAROUND_SPECULATIVE_SYNCS"); \ + if (ENABLE_WA_INDIRECT_CALLS) \ + builtin_define ("__WORKAROUND_INDIRECT_CALLS"); \ + if (ENABLE_WA_RETS) \ + builtin_define ("__WORKAROUND_RETS"); \ + \ + if (TARGET_FDPIC) \ + { \ + builtin_define ("__BFIN_FDPIC__"); \ + builtin_define ("__FDPIC__"); \ + } \ + if (TARGET_ID_SHARED_LIBRARY \ + && !TARGET_SEP_DATA) \ + builtin_define ("__ID_SHARED_LIB__"); \ + if (flag_no_builtin) \ + builtin_define ("__NO_BUILTIN"); \ + if (TARGET_MULTICORE) \ + builtin_define ("__BFIN_MULTICORE"); \ + if (TARGET_COREA) \ + builtin_define ("__BFIN_COREA"); \ + if (TARGET_COREB) \ + builtin_define ("__BFIN_COREB"); \ + if (TARGET_SDRAM) \ + builtin_define ("__BFIN_SDRAM"); \ + } \ + while (0) +#endif + +#define DRIVER_SELF_SPECS SUBTARGET_DRIVER_SELF_SPECS "\ + %{mleaf-id-shared-library:%{!mid-shared-library:-mid-shared-library}} \ + %{mfdpic:%{!fpic:%{!fpie:%{!fPIC:%{!fPIE:\ + %{!fno-pic:%{!fno-pie:%{!fno-PIC:%{!fno-PIE:-fpie}}}}}}}}} \ +" +#ifndef SUBTARGET_DRIVER_SELF_SPECS +# define SUBTARGET_DRIVER_SELF_SPECS +#endif + +#define LINK_GCC_C_SEQUENCE_SPEC "\ + %{mfast-fp:-lbffastfp} %G %L %{mfast-fp:-lbffastfp} %G \ +" + +#undef ASM_SPEC +#define ASM_SPEC "\ + %{mno-fdpic:-mnopic} %{mfdpic}" + +#define LINK_SPEC "\ +%{h*} %{v:-V} \ +%{mfdpic:-melf32bfinfd -z text} \ +%{static:-dn -Bstatic} \ +%{shared:-G -Bdynamic} \ +%{symbolic:-Bsymbolic} \ +-init __init -fini __fini " + +/* Generate DSP instructions, like DSP halfword loads */ +#define TARGET_DSP (1) + +#define TARGET_DEFAULT 0 + +/* Maximum number of library ids we permit */ +#define MAX_LIBRARY_ID 255 + +extern const char *bfin_library_id_string; + +#define FUNCTION_MODE SImode +#define Pmode SImode + +/* store-condition-codes instructions store 0 for false + This is the value stored for true. */ +#define STORE_FLAG_VALUE 1 + +/* Define this if pushing a word on the stack + makes the stack pointer a smaller address. */ +#define STACK_GROWS_DOWNWARD + +#define STACK_PUSH_CODE PRE_DEC + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* We define a dummy ARGP register; the parameters start at offset 0 from + it. */ +#define FIRST_PARM_OFFSET(DECL) 0 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET 0 + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM REG_P6 + +/* Base register for access to local variables of the function. */ +#define FRAME_POINTER_REGNUM REG_P7 + +/* A dummy register that will be eliminated to either FP or SP. */ +#define ARG_POINTER_REGNUM REG_ARGP + +/* `PIC_OFFSET_TABLE_REGNUM' + The register number of the register used to address a table of + static data addresses in memory. In some cases this register is + defined by a processor's "application binary interface" (ABI). + When this macro is defined, RTL is generated for this register + once, as with the stack pointer and frame pointer registers. If + this macro is not defined, it is up to the machine-dependent files + to allocate such a register (if necessary). */ +#define PIC_OFFSET_TABLE_REGNUM (REG_P5) + +#define FDPIC_FPTR_REGNO REG_P1 +#define FDPIC_REGNO REG_P3 +#define OUR_FDPIC_REG get_hard_reg_initial_val (SImode, FDPIC_REGNO) + +/* A static chain register for nested functions. We need to use a + call-clobbered register for this. */ +#define STATIC_CHAIN_REGNUM REG_P2 + +/* Define this if functions should assume that stack space has been + allocated for arguments even when their values are passed in + registers. + + The value of this macro is the size, in bytes, of the area reserved for + arguments passed in registers. + + This space can either be allocated by the caller or be a part of the + machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' + says which. */ +#define FIXED_STACK_AREA 12 +#define REG_PARM_STACK_SPACE(FNDECL) FIXED_STACK_AREA + +/* Define this if the above stack space is to be considered part of the + * space allocated by the caller. */ +#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1 + +/* Define this if the maximum size of all the outgoing args is to be + accumulated and pushed during the prologue. The amount can be + found in the variable crtl->outgoing_args_size. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/*#define DATA_ALIGNMENT(TYPE, BASIC-ALIGN) for arrays.. */ + +/* If defined, a C expression to compute the alignment for a local + variable. TYPE is the data type, and ALIGN is the alignment that + the object would ordinarily have. The value of this macro is used + instead of that alignment to align the object. + + If this macro is not defined, then ALIGN is used. + + One use of this macro is to increase alignment of medium-size + data to make it all fit in fewer cache lines. */ + +#define LOCAL_ALIGNMENT(TYPE, ALIGN) bfin_local_alignment ((TYPE), (ALIGN)) + +/* Make strings word-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define TRAMPOLINE_SIZE (TARGET_FDPIC ? 30 : 18) + +/* Definitions for register eliminations. + + This is an array of structures. Each structure initializes one pair + of eliminable registers. The "from" register number is given first, + followed by "to". Eliminations of the same "from" register are listed + in order of preference. + + There are two registers that can always be eliminated on the i386. + The frame pointer and the arg pointer can be replaced by either the + hard frame pointer or to the stack pointer, depending upon the + circumstances. The hard frame pointer is not used before reload and + so it is not eligible for elimination. */ + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} \ + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = bfin_initial_elimination_offset ((FROM), (TO))) + +/* This processor has + 8 data register for doing arithmetic + 8 pointer register for doing addressing, including + 1 stack pointer P6 + 1 frame pointer P7 + 4 sets of indexing registers (I0-3, B0-3, L0-3, M0-3) + 1 condition code flag register CC + 5 return address registers RETS/I/X/N/E + 1 arithmetic status register (ASTAT). */ + +#define FIRST_PSEUDO_REGISTER 50 + +#define D_REGNO_P(X) ((X) <= REG_R7) +#define P_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_P7) +#define I_REGNO_P(X) ((X) >= REG_I0 && (X) <= REG_I3) +#define DP_REGNO_P(X) (D_REGNO_P (X) || P_REGNO_P (X)) +#define ADDRESS_REGNO_P(X) ((X) >= REG_P0 && (X) <= REG_M3) +#define DREG_P(X) (REG_P (X) && D_REGNO_P (REGNO (X))) +#define PREG_P(X) (REG_P (X) && P_REGNO_P (REGNO (X))) +#define IREG_P(X) (REG_P (X) && I_REGNO_P (REGNO (X))) +#define DPREG_P(X) (REG_P (X) && DP_REGNO_P (REGNO (X))) + +#define REGISTER_NAMES { \ + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", \ + "P0", "P1", "P2", "P3", "P4", "P5", "SP", "FP", \ + "I0", "I1", "I2", "I3", "B0", "B1", "B2", "B3", \ + "L0", "L1", "L2", "L3", "M0", "M1", "M2", "M3", \ + "A0", "A1", \ + "CC", \ + "RETS", "RETI", "RETX", "RETN", "RETE", "ASTAT", "SEQSTAT", "USP", \ + "ARGP", \ + "LT0", "LT1", "LC0", "LC1", "LB0", "LB1" \ +} + +#define SHORT_REGISTER_NAMES { \ + "R0.L", "R1.L", "R2.L", "R3.L", "R4.L", "R5.L", "R6.L", "R7.L", \ + "P0.L", "P1.L", "P2.L", "P3.L", "P4.L", "P5.L", "SP.L", "FP.L", \ + "I0.L", "I1.L", "I2.L", "I3.L", "B0.L", "B1.L", "B2.L", "B3.L", \ + "L0.L", "L1.L", "L2.L", "L3.L", "M0.L", "M1.L", "M2.L", "M3.L", } + +#define HIGH_REGISTER_NAMES { \ + "R0.H", "R1.H", "R2.H", "R3.H", "R4.H", "R5.H", "R6.H", "R7.H", \ + "P0.H", "P1.H", "P2.H", "P3.H", "P4.H", "P5.H", "SP.H", "FP.H", \ + "I0.H", "I1.H", "I2.H", "I3.H", "B0.H", "B1.H", "B2.H", "B3.H", \ + "L0.H", "L1.H", "L2.H", "L3.H", "M0.H", "M1.H", "M2.H", "M3.H", } + +#define DREGS_PAIR_NAMES { \ + "R1:0.p", 0, "R3:2.p", 0, "R5:4.p", 0, "R7:6.p", 0, } + +#define BYTE_REGISTER_NAMES { \ + "R0.B", "R1.B", "R2.B", "R3.B", "R4.B", "R5.B", "R6.B", "R7.B", } + + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. */ + +#define FIXED_REGISTERS \ +/*r0 r1 r2 r3 r4 r5 r6 r7 p0 p1 p2 p3 p4 p5 p6 p7 */ \ +{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, \ +/*i0 i1 i2 i3 b0 b1 b2 b3 l0 l1 l2 l3 m0 m1 m2 m3 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, \ +/*a0 a1 cc rets/i/x/n/e astat seqstat usp argp lt0/1 lc0/1 */ \ + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*lb0/1 */ \ + 1, 1 \ +} + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + Aside from that, you can include as many other registers as you like. */ + +#define CALL_USED_REGISTERS \ +/*r0 r1 r2 r3 r4 r5 r6 r7 p0 p1 p2 p3 p4 p5 p6 p7 */ \ +{ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, \ +/*i0 i1 i2 i3 b0 b1 b2 b3 l0 l1 l2 l3 m0 m1 m2 m3 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*a0 a1 cc rets/i/x/n/e astat seqstat usp argp lt0/1 lc0/1 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ +/*lb0/1 */ \ + 1, 1 \ +} + +/* Order in which to allocate registers. Each register must be + listed once, even those in FIXED_REGISTERS. List frame pointer + late and fixed registers last. Note that, in general, we prefer + registers listed in CALL_USED_REGISTERS, keeping the others + available for storage of persistent values. */ + +#define REG_ALLOC_ORDER \ +{ REG_R0, REG_R1, REG_R2, REG_R3, REG_R7, REG_R6, REG_R5, REG_R4, \ + REG_P2, REG_P1, REG_P0, REG_P5, REG_P4, REG_P3, REG_P6, REG_P7, \ + REG_A0, REG_A1, \ + REG_I0, REG_I1, REG_I2, REG_I3, REG_B0, REG_B1, REG_B2, REG_B3, \ + REG_L0, REG_L1, REG_L2, REG_L3, REG_M0, REG_M1, REG_M2, REG_M3, \ + REG_RETS, REG_RETI, REG_RETX, REG_RETN, REG_RETE, \ + REG_ASTAT, REG_SEQSTAT, REG_USP, \ + REG_CC, REG_ARGP, \ + REG_LT0, REG_LT1, REG_LC0, REG_LC1, REG_LB0, REG_LB1 \ +} + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. */ + + +enum reg_class +{ + NO_REGS, + IREGS, + BREGS, + LREGS, + MREGS, + CIRCREGS, /* Circular buffering registers, Ix, Bx, Lx together form. See Automatic Circular Buffering. */ + DAGREGS, + EVEN_AREGS, + ODD_AREGS, + AREGS, + CCREGS, + EVEN_DREGS, + ODD_DREGS, + D0REGS, + D1REGS, + D2REGS, + D3REGS, + D4REGS, + D5REGS, + D6REGS, + D7REGS, + DREGS, + P0REGS, + FDPIC_REGS, + FDPIC_FPTR_REGS, + PREGS_CLOBBERED, + PREGS, + IPREGS, + DPREGS, + MOST_REGS, + LT_REGS, + LC_REGS, + LB_REGS, + PROLOGUE_REGS, + NON_A_CC_REGS, + ALL_REGS, LIM_REG_CLASSES +}; + +#define N_REG_CLASSES ((int)LIM_REG_CLASSES) + +#define GENERAL_REGS DPREGS + +/* Give names of register classes as strings for dump file. */ + +#define REG_CLASS_NAMES \ +{ "NO_REGS", \ + "IREGS", \ + "BREGS", \ + "LREGS", \ + "MREGS", \ + "CIRCREGS", \ + "DAGREGS", \ + "EVEN_AREGS", \ + "ODD_AREGS", \ + "AREGS", \ + "CCREGS", \ + "EVEN_DREGS", \ + "ODD_DREGS", \ + "D0REGS", \ + "D1REGS", \ + "D2REGS", \ + "D3REGS", \ + "D4REGS", \ + "D5REGS", \ + "D6REGS", \ + "D7REGS", \ + "DREGS", \ + "P0REGS", \ + "FDPIC_REGS", \ + "FDPIC_FPTR_REGS", \ + "PREGS_CLOBBERED", \ + "PREGS", \ + "IPREGS", \ + "DPREGS", \ + "MOST_REGS", \ + "LT_REGS", \ + "LC_REGS", \ + "LB_REGS", \ + "PROLOGUE_REGS", \ + "NON_A_CC_REGS", \ + "ALL_REGS" } + +/* An initializer containing the contents of the register classes, as integers + which are bit masks. The Nth integer specifies the contents of class N. + The way the integer MASK is interpreted is that register R is in the class + if `MASK & (1 << R)' is 1. + + When the machine has more than 32 registers, an integer does not suffice. + Then the integers are replaced by sub-initializers, braced groupings + containing several integers. Each sub-initializer must be suitable as an + initializer for the type `HARD_REG_SET' which is defined in + `hard-reg-set.h'. */ + +/* NOTE: DSP registers, IREGS - AREGS, are not GENERAL_REGS. We use + MOST_REGS as the union of DPREGS and DAGREGS. */ + +#define REG_CLASS_CONTENTS \ + /* 31 - 0 63-32 */ \ +{ { 0x00000000, 0 }, /* NO_REGS */ \ + { 0x000f0000, 0 }, /* IREGS */ \ + { 0x00f00000, 0 }, /* BREGS */ \ + { 0x0f000000, 0 }, /* LREGS */ \ + { 0xf0000000, 0 }, /* MREGS */ \ + { 0x0fff0000, 0 }, /* CIRCREGS */ \ + { 0xffff0000, 0 }, /* DAGREGS */ \ + { 0x00000000, 0x1 }, /* EVEN_AREGS */ \ + { 0x00000000, 0x2 }, /* ODD_AREGS */ \ + { 0x00000000, 0x3 }, /* AREGS */ \ + { 0x00000000, 0x4 }, /* CCREGS */ \ + { 0x00000055, 0 }, /* EVEN_DREGS */ \ + { 0x000000aa, 0 }, /* ODD_DREGS */ \ + { 0x00000001, 0 }, /* D0REGS */ \ + { 0x00000002, 0 }, /* D1REGS */ \ + { 0x00000004, 0 }, /* D2REGS */ \ + { 0x00000008, 0 }, /* D3REGS */ \ + { 0x00000010, 0 }, /* D4REGS */ \ + { 0x00000020, 0 }, /* D5REGS */ \ + { 0x00000040, 0 }, /* D6REGS */ \ + { 0x00000080, 0 }, /* D7REGS */ \ + { 0x000000ff, 0 }, /* DREGS */ \ + { 0x00000100, 0x000 }, /* P0REGS */ \ + { 0x00000800, 0x000 }, /* FDPIC_REGS */ \ + { 0x00000200, 0x000 }, /* FDPIC_FPTR_REGS */ \ + { 0x00004700, 0x800 }, /* PREGS_CLOBBERED */ \ + { 0x0000ff00, 0x800 }, /* PREGS */ \ + { 0x000fff00, 0x800 }, /* IPREGS */ \ + { 0x0000ffff, 0x800 }, /* DPREGS */ \ + { 0xffffffff, 0x800 }, /* MOST_REGS */\ + { 0x00000000, 0x3000 }, /* LT_REGS */\ + { 0x00000000, 0xc000 }, /* LC_REGS */\ + { 0x00000000, 0x30000 }, /* LB_REGS */\ + { 0x00000000, 0x3f7f8 }, /* PROLOGUE_REGS */\ + { 0xffffffff, 0x3fff8 }, /* NON_A_CC_REGS */\ + { 0xffffffff, 0x3ffff }} /* ALL_REGS */ + +#define IREG_POSSIBLE_P(OUTER) \ + ((OUTER) == POST_INC || (OUTER) == PRE_INC \ + || (OUTER) == POST_DEC || (OUTER) == PRE_DEC \ + || (OUTER) == MEM || (OUTER) == ADDRESS) + +#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OUTER, INDEX) \ + ((MODE) == HImode && IREG_POSSIBLE_P (OUTER) ? IPREGS : PREGS) + +#define INDEX_REG_CLASS PREGS + +#define REGNO_OK_FOR_BASE_STRICT_P(X, MODE, OUTER, INDEX) \ + (P_REGNO_P (X) || (X) == REG_ARGP \ + || (IREG_POSSIBLE_P (OUTER) && (MODE) == HImode \ + && I_REGNO_P (X))) + +#define REGNO_OK_FOR_BASE_NONSTRICT_P(X, MODE, OUTER, INDEX) \ + ((X) >= FIRST_PSEUDO_REGISTER \ + || REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX)) + +#ifdef REG_OK_STRICT +#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, AS, OUTER, INDEX) \ + REGNO_OK_FOR_BASE_STRICT_P (X, MODE, OUTER, INDEX) +#else +#define REGNO_MODE_CODE_OK_FOR_BASE_P(X, MODE, AS, OUTER, INDEX) \ + REGNO_OK_FOR_BASE_NONSTRICT_P (X, MODE, OUTER, INDEX) +#endif + +#define REGNO_OK_FOR_INDEX_P(X) 0 + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ + +#define REGNO_REG_CLASS(REGNO) \ +((REGNO) == REG_R0 ? D0REGS \ + : (REGNO) == REG_R1 ? D1REGS \ + : (REGNO) == REG_R2 ? D2REGS \ + : (REGNO) == REG_R3 ? D3REGS \ + : (REGNO) == REG_R4 ? D4REGS \ + : (REGNO) == REG_R5 ? D5REGS \ + : (REGNO) == REG_R6 ? D6REGS \ + : (REGNO) == REG_R7 ? D7REGS \ + : (REGNO) == REG_P0 ? P0REGS \ + : (REGNO) < REG_I0 ? PREGS \ + : (REGNO) == REG_ARGP ? PREGS \ + : (REGNO) >= REG_I0 && (REGNO) <= REG_I3 ? IREGS \ + : (REGNO) >= REG_L0 && (REGNO) <= REG_L3 ? LREGS \ + : (REGNO) >= REG_B0 && (REGNO) <= REG_B3 ? BREGS \ + : (REGNO) >= REG_M0 && (REGNO) <= REG_M3 ? MREGS \ + : (REGNO) == REG_A0 || (REGNO) == REG_A1 ? AREGS \ + : (REGNO) == REG_LT0 || (REGNO) == REG_LT1 ? LT_REGS \ + : (REGNO) == REG_LC0 || (REGNO) == REG_LC1 ? LC_REGS \ + : (REGNO) == REG_LB0 || (REGNO) == REG_LB1 ? LB_REGS \ + : (REGNO) == REG_CC ? CCREGS \ + : (REGNO) >= REG_RETS ? PROLOGUE_REGS \ + : NO_REGS) + +/* When this hook returns true for MODE, the compiler allows + registers explicitly used in the rtl to be used as spill registers + but prevents the compiler from extending the lifetime of these + registers. */ +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true + +/* Do not allow to store a value in REG_CC for any mode */ +/* Do not allow to store value in pregs if mode is not SI*/ +#define HARD_REGNO_MODE_OK(REGNO, MODE) hard_regno_mode_ok((REGNO), (MODE)) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((MODE) == V2PDImode && (CLASS) == AREGS ? 2 \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((MODE) == PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 1 \ + : (MODE) == V2PDImode && ((REGNO) == REG_A0 || (REGNO) == REG_A1) ? 2 \ + : CLASS_MAX_NREGS (GENERAL_REGS, MODE)) + +/* A C expression that is nonzero if hard register TO can be + considered for use as a rename register for FROM register */ +#define HARD_REGNO_RENAME_OK(FROM, TO) bfin_hard_regno_rename_ok (FROM, TO) + +/* A C expression that is nonzero if it is desirable to choose + register allocation so as to avoid move instructions between a + value of mode MODE1 and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, + MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1, + MODE2)' must be zero. */ +#define MODES_TIEABLE_P(MODE1, MODE2) \ + ((MODE1) == (MODE2) \ + || ((GET_MODE_CLASS (MODE1) == MODE_INT \ + || GET_MODE_CLASS (MODE1) == MODE_FLOAT) \ + && (GET_MODE_CLASS (MODE2) == MODE_INT \ + || GET_MODE_CLASS (MODE2) == MODE_FLOAT) \ + && (MODE1) != BImode && (MODE2) != BImode \ + && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD \ + && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)) + +/* `PREFERRED_RELOAD_CLASS (X, CLASS)' + A C expression that places additional restrictions on the register + class to use when it is necessary to copy value X into a register + in class CLASS. The value is a register class; perhaps CLASS, or + perhaps another, smaller class. */ +#define PREFERRED_RELOAD_CLASS(X, CLASS) \ + (GET_CODE (X) == POST_INC \ + || GET_CODE (X) == POST_DEC \ + || GET_CODE (X) == PRE_DEC ? PREGS : (CLASS)) + +/* Function Calling Conventions. */ + +/* The type of the current function; normal functions are of type + SUBROUTINE. */ +typedef enum { + SUBROUTINE, INTERRUPT_HANDLER, EXCPT_HANDLER, NMI_HANDLER +} e_funkind; +#define FUNCTION_RETURN_REGISTERS { REG_RETS, REG_RETI, REG_RETX, REG_RETN } + +#define FUNCTION_ARG_REGISTERS { REG_R0, REG_R1, REG_R2, -1 } + +/* Flags for the call/call_value rtl operations set up by function_arg */ +#define CALL_NORMAL 0x00000000 /* no special processing */ +#define CALL_LONG 0x00000001 /* always call indirect */ +#define CALL_SHORT 0x00000002 /* always call by symbol */ + +typedef struct { + int words; /* # words passed so far */ + int nregs; /* # registers available for passing */ + int *arg_regs; /* array of register -1 terminated */ + int call_cookie; /* Do special things for this call */ +} CUMULATIVE_ARGS; + +#define FUNCTION_ARG_REGNO_P(REGNO) function_arg_regno_p (REGNO) + + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT, N_NAMED_ARGS) \ + (init_cumulative_args (&CUM, FNTYPE, LIBNAME)) + +/* Define how to find the value returned by a function. + VALTYPE is the data type of the value (as a tree). + If the precise function being called is known, FUNC is its FUNCTION_DECL; + otherwise, FUNC is 0. +*/ + +#define VALUE_REGNO(MODE) (REG_R0) + +#define FUNCTION_VALUE(VALTYPE, FUNC) \ + gen_rtx_REG (TYPE_MODE (VALTYPE), \ + VALUE_REGNO(TYPE_MODE(VALTYPE))) + +/* Define how to find the value returned by a library function + assuming the value has mode MODE. */ + +#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, VALUE_REGNO(MODE)) + +#define FUNCTION_VALUE_REGNO_P(N) ((N) == REG_R0) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Before the prologue, the return address is in the RETS register. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, REG_RETS) + +#define RETURN_ADDR_RTX(COUNT, FRAME) bfin_return_addr_rtx (COUNT) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (REG_RETS) + +/* Call instructions don't modify the stack pointer on the Blackfin. */ +#define INCOMING_FRAME_SP_OFFSET 0 + +/* Describe how we implement __builtin_eh_return. */ +#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) : INVALID_REGNUM) +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, REG_P2) +#define EH_RETURN_HANDLER_RTX \ + gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, \ + UNITS_PER_WORD)) + +/* Addressing Modes */ + +/* A number, the maximum number of registers that can appear in a + valid memory address. Note that it is up to you to specify a + value equal to the maximum number that `TARGET_LEGITIMATE_ADDRESS_P' + would ever accept. */ +#define MAX_REGS_PER_ADDRESS 1 + +#define LEGITIMATE_MODE_FOR_AUTOINC_P(MODE) \ + (GET_MODE_SIZE (MODE) <= 4 || (MODE) == PDImode) + +#define HAVE_POST_INCREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_DECREMENT 1 + +/* `LEGITIMATE_PIC_OPERAND_P (X)' + A C expression that is nonzero if X is a legitimate immediate + operand on the target machine when generating position independent + code. You can assume that X satisfies `CONSTANT_P', so you need + not check this. You can also assume FLAG_PIC is true, so you need + not check it either. You need not define this macro if all + constants (including `SYMBOL_REF') can be immediate operands when + generating position independent code. */ +#define LEGITIMATE_PIC_OPERAND_P(X) ! SYMBOLIC_CONST (X) + +#define SYMBOLIC_CONST(X) \ +(GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == LABEL_REF \ + || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X))) + +#define NOTICE_UPDATE_CC(EXPR, INSN) 0 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX UNITS_PER_WORD + +/* If a memory-to-memory move would take MOVE_RATIO or more simple + move-instruction pairs, we will do a movmem or libcall instead. */ + +#define MOVE_RATIO(speed) 5 + +/* STORAGE LAYOUT: target machine storage layout + Define this macro as a C expression which is nonzero if accessing + less than a word of memory (i.e. a `char' or a `short') is no + faster than accessing a word of memory, i.e., if such access + require more than one instruction or if there is no difference in + cost between byte and (aligned) word loads. + + When this macro is not defined, the compiler will access a field by + finding the smallest containing object; when it is defined, a + fullword load will be used if alignment permits. Unless bytes + accesses are faster than word accesses, using word accesses is + preferable since it may eliminate subsequent memory access if + subsequent accesses occur to other fields in the same word of the + structure, but to different bytes. */ +#define SLOW_BYTE_ACCESS 0 +#define SLOW_SHORT_ACCESS 0 + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. + We can't access bytes but if we could we would in the Big Endian order. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is numbered. */ +#define WORDS_BIG_ENDIAN 0 + +/* Width in bits of a "word", which is the contents of a machine register. + Note that this is not necessarily the width of data type `int'; + if using 16-bit ints on a 68000, this would still be 32. + But on a machine with 16-bit registers, this would be 16. */ +#define BITS_PER_WORD 32 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 + +/* Width in bits of a pointer. + See also the macro `Pmode1' defined below. */ +#define POINTER_SIZE 32 + +/* Allocation boundary (in *bits*) for storing pointers in memory. */ +#define POINTER_BOUNDARY 32 + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY 32 + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Alignment of field after `int : 0' in a structure. */ +#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD + +/* No data type wants to be aligned rounder than this. */ +#define BIGGEST_ALIGNMENT 32 + +/* Define this if move instructions will actually fail to work + when given unaligned data. */ +#define STRICT_ALIGNMENT 1 + +/* (shell-command "rm c-decl.o stor-layout.o") + * never define PCC_BITFIELD_TYPE_MATTERS + * really cause some alignment problem + */ + +#define UNITS_PER_FLOAT ((FLOAT_TYPE_SIZE + BITS_PER_UNIT - 1) / \ + BITS_PER_UNIT) + +#define UNITS_PER_DOUBLE ((DOUBLE_TYPE_SIZE + BITS_PER_UNIT - 1) / \ + BITS_PER_UNIT) + + +/* what is the 'type' of size_t */ +#define SIZE_TYPE "long unsigned int" + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 1 +#define FLOAT_TYPE_SIZE BITS_PER_WORD +#define SHORT_TYPE_SIZE 16 +#define CHAR_TYPE_SIZE 8 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 + +/* Note: Fix this to depend on target switch. -- lev */ + +/* Note: Try to implement double and force long double. -- tonyko + * #define __DOUBLES_ARE_FLOATS__ + * #define DOUBLE_TYPE_SIZE FLOAT_TYPE_SIZE + * #define LONG_DOUBLE_TYPE_SIZE DOUBLE_TYPE_SIZE + * #define DOUBLES_ARE_FLOATS 1 + */ + +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* `PROMOTE_MODE (M, UNSIGNEDP, TYPE)' + A macro to update M and UNSIGNEDP when an object whose type is + TYPE and which has the specified mode and signedness is to be + stored in a register. This macro is only called when TYPE is a + scalar type. + + On most RISC machines, which only have operations that operate on + a full register, define this macro to set M to `word_mode' if M is + an integer mode narrower than `BITS_PER_WORD'. In most cases, + only integer modes should be widened because wider-precision + floating-point operations are usually more expensive than their + narrower counterparts. + + For most machines, the macro definition does not change UNSIGNEDP. + However, some machines, have instructions that preferentially + handle either signed or unsigned quantities of certain modes. For + example, on the DEC Alpha, 32-bit loads from memory and 32-bit add + instructions sign-extend the result to 64 bits. On such machines, + set UNSIGNEDP according to which kind of extension is more + efficient. + + Do not define this macro if it would never modify M.*/ + +#define BFIN_PROMOTE_MODE_P(MODE) \ + (!TARGET_DSP && GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (BFIN_PROMOTE_MODE_P(MODE)) \ + { \ + if (MODE == QImode) \ + UNSIGNEDP = 1; \ + else if (MODE == HImode) \ + UNSIGNEDP = 0; \ + (MODE) = SImode; \ + } + +/* Describing Relative Costs of Operations */ + +/* Do not put function addr into constant pool */ +#define NO_FUNCTION_CSE 1 + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE SImode + +#define JUMP_TABLES_IN_TEXT_SECTION flag_pic + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. +#define WORD_REGISTER_OPERATIONS +*/ + +/* Evaluates to true if A and B are mac flags that can be used + together in a single multiply insn. That is the case if they are + both the same flag not involving M, or if one is a combination of + the other with M. */ +#define MACFLAGS_MATCH_P(A, B) \ + ((A) == (B) \ + || ((A) == MACFLAG_NONE && (B) == MACFLAG_M) \ + || ((A) == MACFLAG_M && (B) == MACFLAG_NONE) \ + || ((A) == MACFLAG_IS && (B) == MACFLAG_IS_M) \ + || ((A) == MACFLAG_IS_M && (B) == MACFLAG_IS)) + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#define PRINT_OPERAND(FILE, RTX, CODE) print_operand (FILE, RTX, CODE) +#define PRINT_OPERAND_ADDRESS(FILE, RTX) print_address_operand (FILE, RTX) + +typedef enum sections { + CODE_DIR, + DATA_DIR, + LAST_SECT_NM +} SECT_ENUM_T; + +typedef enum directives { + LONG_CONST_DIR, + SHORT_CONST_DIR, + BYTE_CONST_DIR, + SPACE_DIR, + INIT_DIR, + LAST_DIR_NM +} DIR_ENUM_T; + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) \ + ((C) == ';' \ + || ((C) == '|' && (STR)[1] == '|')) + +#define TEXT_SECTION_ASM_OP ".text;" +#define DATA_SECTION_ASM_OP ".data;" + +#define ASM_APP_ON "" +#define ASM_APP_OFF "" + +#define ASM_GLOBALIZE_LABEL1(FILE, NAME) \ + do { fputs (".global ", FILE); \ + assemble_name (FILE, NAME); \ + fputc (';',FILE); \ + fputc ('\n',FILE); \ + } while (0) + +#define ASM_DECLARE_FUNCTION_NAME(FILE,NAME,DECL) \ + do { \ + fputs (".type ", FILE); \ + assemble_name (FILE, NAME); \ + fputs (", STT_FUNC", FILE); \ + fputc (';',FILE); \ + fputc ('\n',FILE); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + } while (0) + +#define ASM_OUTPUT_LABEL(FILE, NAME) \ + do { assemble_name (FILE, NAME); \ + fputs (":\n",FILE); \ + } while (0) + +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { fprintf (FILE, "_%s", NAME); \ + } while (0) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { char __buf[256]; \ + fprintf (FILE, "\t.dd\t"); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE); \ + assemble_name (FILE, __buf); \ + fputc (';', FILE); \ + fputc ('\n', FILE); \ + } while (0) + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL) + +#define MY_ASM_OUTPUT_ADDR_DIFF_ELT(FILE, VALUE, REL) \ + do { \ + char __buf[256]; \ + fprintf (FILE, "\t.dd\t"); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE); \ + assemble_name (FILE, __buf); \ + fputs (" - ", FILE); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", REL); \ + assemble_name (FILE, __buf); \ + fputc (';', FILE); \ + fputc ('\n', FILE); \ + } while (0) + +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + do { \ + if ((LOG) != 0) \ + fprintf (FILE, "\t.align %d\n", 1 << (LOG)); \ + } while (0) + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + do { \ + asm_output_skip (FILE, SIZE); \ + } while (0) + +#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \ +do { \ + switch_to_section (data_section); \ + if ((SIZE) >= (unsigned int) 4 ) ASM_OUTPUT_ALIGN(FILE,2); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + fprintf (FILE, "%s %ld;\n", ASM_SPACE, \ + (ROUNDED) > (unsigned int) 1 ? (ROUNDED) : 1); \ +} while (0) + +#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \ + do { \ + ASM_GLOBALIZE_LABEL1(FILE,NAME); \ + ASM_OUTPUT_LOCAL (FILE, NAME, SIZE, ROUNDED); } while(0) + +#define ASM_COMMENT_START "//" + +#define PROFILE_BEFORE_PROLOGUE +#define FUNCTION_PROFILER(FILE, LABELNO) \ + do { \ + fprintf (FILE, "\t[--SP] = RETS;\n"); \ + if (TARGET_LONG_CALLS) \ + { \ + fprintf (FILE, "\tP2.h = __mcount;\n"); \ + fprintf (FILE, "\tP2.l = __mcount;\n"); \ + fprintf (FILE, "\tCALL (P2);\n"); \ + } \ + else \ + fprintf (FILE, "\tCALL __mcount;\n"); \ + fprintf (FILE, "\tRETS = [SP++];\n"); \ + } while(0) + +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 1 + +#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) fprintf (FILE, "\t[--SP] = %s;\n", reg_names[REGNO]) +#define ASM_OUTPUT_REG_POP(FILE, REGNO) fprintf (FILE, "\t%s = [SP++];\n", reg_names[REGNO]) + +extern rtx bfin_cc_rtx, bfin_rets_rtx; + +/* This works for GAS and some other assemblers. */ +#define SET_ASM_OP ".set " + +/* DBX register number for a given compiler register number */ +#define DBX_REGISTER_NUMBER(REGNO) (REGNO) + +#define SIZE_ASM_OP "\t.size\t" + +extern int splitting_for_sched, splitting_loops; + +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) ((CHAR) == '!') + +#ifndef TARGET_SUPPORTS_SYNC_CALLS +#define TARGET_SUPPORTS_SYNC_CALLS 0 +#endif + +struct bfin_cpu +{ + const char *name; + bfin_cpu_t type; + int si_revision; + unsigned int workarounds; +}; + +extern const struct bfin_cpu bfin_cpus[]; + +#endif /* _BFIN_CONFIG */ diff --git a/gcc-4.9/gcc/config/bfin/bfin.md b/gcc-4.9/gcc/config/bfin/bfin.md new file mode 100644 index 000000000..f5e64d3ef --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin.md @@ -0,0 +1,4202 @@ +;;- Machine description for Blackfin for GNU compiler +;; Copyright (C) 2005-2014 Free Software Foundation, Inc. +;; Contributed by Analog Devices. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +; operand punctuation marks: +; +; X -- integer value printed as log2 +; Y -- integer value printed as log2(~value) - for bitclear +; h -- print half word register, low part +; d -- print half word register, high part +; D -- print operand as dregs pairs +; w -- print operand as accumulator register word (a0w, a1w) +; H -- high part of double mode operand +; T -- byte register representation Oct. 02 2001 + +; constant operand classes +; +; J 2**N 5bit imm scaled +; Ks7 -64 .. 63 signed 7bit imm +; Ku5 0..31 unsigned 5bit imm +; Ks4 -8 .. 7 signed 4bit imm +; Ks3 -4 .. 3 signed 3bit imm +; Ku3 0 .. 7 unsigned 3bit imm +; Pn 0, 1, 2 constants 0, 1 or 2, corresponding to n +; +; register operands +; d (r0..r7) +; a (p0..p5,fp,sp) +; e (a0, a1) +; b (i0..i3) +; f (m0..m3) +; v (b0..b3) +; c (i0..i3,m0..m3) CIRCREGS +; C (CC) CCREGS +; t (lt0,lt1) +; k (lc0,lc1) +; u (lb0,lb1) +; + +;; Define constants for hard registers. + +(define_constants + [(REG_R0 0) + (REG_R1 1) + (REG_R2 2) + (REG_R3 3) + (REG_R4 4) + (REG_R5 5) + (REG_R6 6) + (REG_R7 7) + + (REG_P0 8) + (REG_P1 9) + (REG_P2 10) + (REG_P3 11) + (REG_P4 12) + (REG_P5 13) + (REG_P6 14) + (REG_P7 15) + + (REG_SP 14) + (REG_FP 15) + + (REG_I0 16) + (REG_I1 17) + (REG_I2 18) + (REG_I3 19) + + (REG_B0 20) + (REG_B1 21) + (REG_B2 22) + (REG_B3 23) + + (REG_L0 24) + (REG_L1 25) + (REG_L2 26) + (REG_L3 27) + + (REG_M0 28) + (REG_M1 29) + (REG_M2 30) + (REG_M3 31) + + (REG_A0 32) + (REG_A1 33) + + (REG_CC 34) + (REG_RETS 35) + (REG_RETI 36) + (REG_RETX 37) + (REG_RETN 38) + (REG_RETE 39) + + (REG_ASTAT 40) + (REG_SEQSTAT 41) + (REG_USP 42) + + (REG_ARGP 43) + + (REG_LT0 44) + (REG_LT1 45) + (REG_LC0 46) + (REG_LC1 47) + (REG_LB0 48) + (REG_LB1 49)]) + +;; Constants used in UNSPECs and UNSPEC_VOLATILEs. + +(define_constants + [(UNSPEC_CBRANCH_TAKEN 0) + (UNSPEC_CBRANCH_NOPS 1) + (UNSPEC_RETURN 2) + (UNSPEC_MOVE_PIC 3) + (UNSPEC_LIBRARY_OFFSET 4) + (UNSPEC_PUSH_MULTIPLE 5) + ;; Multiply or MAC with extra CONST_INT operand specifying the macflag + (UNSPEC_MUL_WITH_FLAG 6) + (UNSPEC_MAC_WITH_FLAG 7) + (UNSPEC_MOVE_FDPIC 8) + (UNSPEC_FUNCDESC_GOT17M4 9) + (UNSPEC_LSETUP_END 10) + ;; Distinguish a 32-bit version of an insn from a 16-bit version. + (UNSPEC_32BIT 11) + (UNSPEC_NOP 12) + (UNSPEC_ONES 13) + (UNSPEC_ATOMIC 14)]) + +(define_constants + [(UNSPEC_VOLATILE_CSYNC 1) + (UNSPEC_VOLATILE_SSYNC 2) + (UNSPEC_VOLATILE_LOAD_FUNCDESC 3) + (UNSPEC_VOLATILE_STORE_EH_HANDLER 4) + (UNSPEC_VOLATILE_DUMMY 5) + (UNSPEC_VOLATILE_STALL 6)]) + +(define_constants + [(MACFLAG_NONE 0) + (MACFLAG_T 1) + (MACFLAG_FU 2) + (MACFLAG_TFU 3) + (MACFLAG_IS 4) + (MACFLAG_IU 5) + (MACFLAG_W32 6) + (MACFLAG_M 7) + (MACFLAG_IS_M 8) + (MACFLAG_S2RND 9) + (MACFLAG_ISS2 10) + (MACFLAG_IH 11)]) + +(define_attr "type" + "move,movcc,mvi,mcld,mcst,dsp32,dsp32shiftimm,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall" + (const_string "misc")) + +(define_attr "addrtype" "32bit,preg,spreg,ireg" + (cond [(and (eq_attr "type" "mcld") + (and (match_operand 0 "dp_register_operand" "") + (match_operand 1 "mem_p_address_operand" ""))) + (const_string "preg") + (and (eq_attr "type" "mcld") + (and (match_operand 0 "dp_register_operand" "") + (match_operand 1 "mem_spfp_address_operand" ""))) + (const_string "spreg") + (and (eq_attr "type" "mcld") + (and (match_operand 0 "dp_register_operand" "") + (match_operand 1 "mem_i_address_operand" ""))) + (const_string "ireg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "dp_register_operand" "") + (match_operand 0 "mem_p_address_operand" ""))) + (const_string "preg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "dp_register_operand" "") + (match_operand 0 "mem_spfp_address_operand" ""))) + (const_string "spreg") + (and (eq_attr "type" "mcst") + (and (match_operand 1 "dp_register_operand" "") + (match_operand 0 "mem_i_address_operand" ""))) + (const_string "ireg")] + (const_string "32bit"))) + +(define_attr "storereg" "preg,other" + (cond [(and (eq_attr "type" "mcst") + (match_operand 1 "p_register_operand" "")) + (const_string "preg")] + (const_string "other"))) + +;; Scheduling definitions + +(define_automaton "bfin") + +(define_cpu_unit "slot0" "bfin") +(define_cpu_unit "slot1" "bfin") +(define_cpu_unit "slot2" "bfin") + +;; Three units used to enforce parallel issue restrictions: +;; only one of the 16-bit slots can use a P register in an address, +;; and only one them can be a store. +(define_cpu_unit "store" "bfin") +(define_cpu_unit "pregs" "bfin") + +;; A dummy unit used to delay scheduling of loads after a conditional +;; branch. +(define_cpu_unit "load" "bfin") + +;; A logical unit used to work around anomaly 05000074. +(define_cpu_unit "anomaly_05000074" "bfin") + +(define_reservation "core" "slot0+slot1+slot2") + +(define_insn_reservation "alu" 1 + (eq_attr "type" "move,movcc,mvi,alu0,shft,brcc,br,call,misc,sync,compare") + "core") + +(define_insn_reservation "imul" 3 + (eq_attr "type" "mult") + "core*3") + +(define_insn_reservation "dsp32" 1 + (eq_attr "type" "dsp32") + "slot0") + +(define_insn_reservation "dsp32shiftimm" 1 + (and (eq_attr "type" "dsp32shiftimm") + (not (match_test "ENABLE_WA_05000074"))) + "slot0") + +(define_insn_reservation "dsp32shiftimm_anomaly_05000074" 1 + (and (eq_attr "type" "dsp32shiftimm") + (match_test "ENABLE_WA_05000074")) + "slot0+anomaly_05000074") + +(define_insn_reservation "load32" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit"))) + "core+load") + +(define_insn_reservation "loadp" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg"))) + "slot1+pregs+load") + +(define_insn_reservation "loadsp" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg"))) + "slot1+pregs") + +(define_insn_reservation "loadi" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg"))) + "(slot1|slot2)+load") + +(define_insn_reservation "store32" 1 + (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") (eq_attr "addrtype" "32bit"))) + "core") + +(define_insn_reservation "storep" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") + (ior (eq_attr "addrtype" "preg") + (eq_attr "addrtype" "spreg")))) + (ior (not (match_test "ENABLE_WA_05000074")) + (eq_attr "storereg" "other"))) + "slot1+pregs+store") + +(define_insn_reservation "storep_anomaly_05000074" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") + (ior (eq_attr "addrtype" "preg") + (eq_attr "addrtype" "spreg")))) + (and (match_test "ENABLE_WA_05000074") + (eq_attr "storereg" "preg"))) + "slot1+anomaly_05000074+pregs+store") + +(define_insn_reservation "storei" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg"))) + (ior (not (match_test "ENABLE_WA_05000074")) + (eq_attr "storereg" "other"))) + "(slot1|slot2)+store") + +(define_insn_reservation "storei_anomaly_05000074" 1 + (and (and (not (eq_attr "seq_insns" "multi")) + (and (eq_attr "type" "mcst") (eq_attr "addrtype" "ireg"))) + (and (match_test "ENABLE_WA_05000074") + (eq_attr "storereg" "preg"))) + "((slot1+anomaly_05000074)|slot2)+store") + +(define_insn_reservation "multi" 2 + (eq_attr "seq_insns" "multi") + "core") + +(define_insn_reservation "load_stall1" 1 + (and (eq_attr "type" "stall") + (match_operand 0 "const1_operand" "")) + "core+load*2") + +(define_insn_reservation "load_stall3" 1 + (and (eq_attr "type" "stall") + (match_operand 0 "const3_operand" "")) + "core+load*4") + +(absence_set "slot0" "slot1,slot2") +(absence_set "slot1" "slot2") + +;; Make sure genautomata knows about the maximum latency that can be produced +;; by the adjust_cost function. +(define_insn_reservation "dummy" 5 + (eq_attr "type" "dummy") + "core") + +;; Operand and operator predicates + +(include "predicates.md") +(include "constraints.md") + +;;; FRIO branches have been optimized for code density +;;; this comes at a slight cost of complexity when +;;; a compiler needs to generate branches in the general +;;; case. In order to generate the correct branching +;;; mechanisms the compiler needs keep track of instruction +;;; lengths. The follow table describes how to count instructions +;;; for the FRIO architecture. +;;; +;;; unconditional br are 12-bit imm pcrelative branches *2 +;;; conditional br are 10-bit imm pcrelative branches *2 +;;; brcc 10-bit: +;;; 1024 10-bit imm *2 is 2048 (-1024..1022) +;;; br 12-bit : +;;; 4096 12-bit imm *2 is 8192 (-4096..4094) +;;; NOTE : For brcc we generate instructions such as +;;; if cc jmp; jump.[sl] offset +;;; offset of jump.[sl] is from the jump instruction but +;;; gcc calculates length from the if cc jmp instruction +;;; furthermore gcc takes the end address of the branch instruction +;;; as (pc) for a forward branch +;;; hence our range is (-4094, 4092) instead of (-4096, 4094) for a br +;;; +;;; The way the (pc) rtx works in these calculations is somewhat odd; +;;; for backward branches it's the address of the current instruction, +;;; for forward branches it's the previously known address of the following +;;; instruction - we have to take this into account by reducing the range +;;; for a forward branch. + +;; Lengths for type "mvi" insns are always defined by the instructions +;; themselves. +(define_attr "length" "" + (cond [(eq_attr "type" "mcld") + (if_then_else (match_operand 1 "effective_address_32bit_p" "") + (const_int 4) (const_int 2)) + + (eq_attr "type" "mcst") + (if_then_else (match_operand 0 "effective_address_32bit_p" "") + (const_int 4) (const_int 2)) + + (eq_attr "type" "move") (const_int 2) + + (eq_attr "type" "dsp32") (const_int 4) + (eq_attr "type" "dsp32shiftimm") (const_int 4) + (eq_attr "type" "call") (const_int 4) + + (eq_attr "type" "br") + (if_then_else (and + (le (minus (match_dup 0) (pc)) (const_int 4092)) + (ge (minus (match_dup 0) (pc)) (const_int -4096))) + (const_int 2) + (const_int 4)) + + (eq_attr "type" "brcc") + (cond [(and + (le (minus (match_dup 3) (pc)) (const_int 1020)) + (ge (minus (match_dup 3) (pc)) (const_int -1024))) + (const_int 2) + (and + (le (minus (match_dup 3) (pc)) (const_int 4092)) + (ge (minus (match_dup 3) (pc)) (const_int -4094))) + (const_int 4)] + (const_int 6)) + ] + + (const_int 2))) + +;; Classify the insns into those that are one instruction and those that +;; are more than one in sequence. +(define_attr "seq_insns" "single,multi" + (const_string "single")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "type" "misc") + (set_attr "seq_insns" "multi") + (set_attr "length" "4")]) + +;; Conditional moves + +(define_mode_iterator CCMOV [QI HI SI]) + +(define_expand "movcc" + [(set (match_operand:CCMOV 0 "register_operand" "") + (if_then_else:CCMOV (match_operand 1 "comparison_operator" "") + (match_operand:CCMOV 2 "register_operand" "") + (match_operand:CCMOV 3 "register_operand" "")))] + "" +{ + operands[1] = bfin_gen_compare (operands[1], mode); +}) + +(define_insn "*movcc_insn1" + [(set (match_operand:CCMOV 0 "register_operand" "=da,da,da") + (if_then_else:CCMOV + (eq:BI (match_operand:BI 3 "register_operand" "C,C,C") + (const_int 0)) + (match_operand:CCMOV 1 "register_operand" "da,0,da") + (match_operand:CCMOV 2 "register_operand" "0,da,da")))] + "" + "@ + if !cc %0 = %1; + if cc %0 = %2; + if !cc %0 = %1; if cc %0 = %2;" + [(set_attr "length" "2,2,4") + (set_attr "type" "movcc") + (set_attr "seq_insns" "*,*,multi")]) + +(define_insn "*movcc_insn2" + [(set (match_operand:CCMOV 0 "register_operand" "=da,da,da") + (if_then_else:CCMOV + (ne:BI (match_operand:BI 3 "register_operand" "C,C,C") + (const_int 0)) + (match_operand:CCMOV 1 "register_operand" "0,da,da") + (match_operand:CCMOV 2 "register_operand" "da,0,da")))] + "" + "@ + if !cc %0 = %2; + if cc %0 = %1; + if cc %0 = %1; if !cc %0 = %2;" + [(set_attr "length" "2,2,4") + (set_attr "type" "movcc") + (set_attr "seq_insns" "*,*,multi")]) + +;; Insns to load HIGH and LO_SUM + +(define_insn "movsi_high" + [(set (match_operand:SI 0 "register_operand" "=x") + (high:SI (match_operand:SI 1 "immediate_operand" "i")))] + "reload_completed" + "%d0 = %d1;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movstricthi_high" + [(set (match_operand:SI 0 "register_operand" "+x") + (ior:SI (and:SI (match_dup 0) (const_int 65535)) + (match_operand:SI 1 "immediate_operand" "i")))] + "reload_completed" + "%d0 = %d1;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movsi_low" + [(set (match_operand:SI 0 "register_operand" "=x") + (lo_sum:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "reload_completed" + "%h0 = %h2;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movsi_high_pic" + [(set (match_operand:SI 0 "register_operand" "=x") + (high:SI (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_MOVE_PIC)))] + "" + "%d0 = %1@GOT_LOW;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +(define_insn "movsi_low_pic" + [(set (match_operand:SI 0 "register_operand" "=x") + (lo_sum:SI (match_operand:SI 1 "register_operand" "0") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_MOVE_PIC)))] + "" + "%h0 = %h2@GOT_HIGH;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +;;; Move instructions + +(define_insn_and_split "movdi_insn" + [(set (match_operand:DI 0 "nonimmediate_operand" "=x,mx,r") + (match_operand:DI 1 "general_operand" "iFx,r,mx"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx lo_half[2], hi_half[2]; + split_di (operands, 2, lo_half, hi_half); + + if (reg_overlap_mentioned_p (lo_half[0], hi_half[1])) + { + operands[2] = hi_half[0]; + operands[3] = hi_half[1]; + operands[4] = lo_half[0]; + operands[5] = lo_half[1]; + } + else + { + operands[2] = lo_half[0]; + operands[3] = lo_half[1]; + operands[4] = hi_half[0]; + operands[5] = hi_half[1]; + } +}) + +(define_insn "movbi" + [(set (match_operand:BI 0 "nonimmediate_operand" "=x,x,d,md,C,d,C,P1") + (match_operand:BI 1 "general_operand" "x,xKs3,md,d,d,C,P0,P1"))] + + "" + "@ + %0 = %1; + %0 = %1 (X); + %0 = B %1 (Z)%! + B %0 = %1; + CC = %1; + %0 = CC; + CC = R0 < R0; + CC = R0 == R0;" + [(set_attr "type" "move,mvi,mcld,mcst,compare,compare,compare,compare") + (set_attr "length" "2,2,*,*,2,2,2,2") + (set_attr "seq_insns" "*,*,*,*,*,*,*,*")]) + +(define_insn "movpdi" + [(set (match_operand:PDI 0 "nonimmediate_operand" "=e,<,e") + (match_operand:PDI 1 "general_operand" " e,e,>"))] + "" + "@ + %0 = %1; + %0 = %x1; %0 = %w1; + %w0 = %1; %x0 = %1;" + [(set_attr "type" "move,mcst,mcld") + (set_attr "seq_insns" "*,multi,multi")]) + +(define_insn "load_accumulator" + [(set (match_operand:PDI 0 "register_operand" "=e") + (sign_extend:PDI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = %1;" + [(set_attr "type" "move")]) + +(define_insn_and_split "load_accumulator_pair" + [(set (match_operand:V2PDI 0 "register_operand" "=e") + (sign_extend:V2PDI (vec_concat:V2SI + (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d"))))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (sign_extend:PDI (match_dup 1))) + (set (match_dup 4) (sign_extend:PDI (match_dup 2)))] +{ + operands[3] = gen_rtx_REG (PDImode, REGNO (operands[0])); + operands[4] = gen_rtx_REG (PDImode, REGNO (operands[0]) + 1); +}) + +(define_insn "*pushsi_insn" + [(set (mem:SI (pre_dec:SI (reg:SI REG_SP))) + (match_operand:SI 0 "register_operand" "xy"))] + "" + "[--SP] = %0;" + [(set_attr "type" "mcst") + (set_attr "addrtype" "32bit") + (set_attr "length" "2")]) + +(define_insn "*popsi_insn" + [(set (match_operand:SI 0 "register_operand" "=d,xy") + (mem:SI (post_inc:SI (reg:SI REG_SP))))] + "" + "%0 = [SP++]%!" + [(set_attr "type" "mcld") + (set_attr "addrtype" "preg,32bit") + (set_attr "length" "2")]) + +;; The first alternative is used to make reload choose a limited register +;; class when faced with a movsi_insn that had its input operand replaced +;; with a PLUS. We generally require fewer secondary reloads this way. + +(define_insn "*movsi_insn" + [(set (match_operand:SI 0 "nonimmediate_operand" "=da,x,da,y,da,x,x,x,da,mr") + (match_operand:SI 1 "general_operand" "da,x,y,da,xKs7,xKsh,xKuh,ix,mr,da"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %0 = %1; + %0 = %1; + %0 = %1; + %0 = %1; + %0 = %1 (X); + %0 = %1 (X); + %0 = %1 (Z); + # + %0 = %1%! + %0 = %1%!" + [(set_attr "type" "move,move,move,move,mvi,mvi,mvi,*,mcld,mcst") + (set_attr "length" "2,2,2,2,2,4,4,*,*,*")]) + +(define_insn "*movsi_insn32" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (unspec:SI [(match_operand:SI 1 "nonmemory_operand" "d,P0")] UNSPEC_32BIT))] + "" + "@ + %0 = ROT %1 BY 0%! + %0 = %0 -|- %0%!" + [(set_attr "type" "dsp32shiftimm,dsp32")]) + +(define_split + [(set (match_operand:SI 0 "d_register_operand" "") + (const_int 0))] + "splitting_for_sched && !optimize_size" + [(set (match_dup 0) (unspec:SI [(const_int 0)] UNSPEC_32BIT))]) + +(define_split + [(set (match_operand:SI 0 "d_register_operand" "") + (match_operand:SI 1 "d_register_operand" ""))] + "splitting_for_sched && !optimize_size" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_32BIT))]) + +(define_insn_and_split "*movv2hi_insn" + [(set (match_operand:V2HI 0 "nonimmediate_operand" "=da,da,d,dm") + (match_operand:V2HI 1 "general_operand" "i,di,md,d"))] + + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + # + %0 = %1; + %0 = %1%! + %0 = %1%!" + "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR" + [(set (match_dup 0) (high:SI (match_dup 2))) + (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 3)))] +{ + HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; + intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; + + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + operands[2] = operands[3] = GEN_INT (trunc_int_for_mode (intval, SImode)); +} + [(set_attr "type" "move,move,mcld,mcst") + (set_attr "length" "2,2,*,*")]) + +(define_insn "*movhi_insn" + [(set (match_operand:HI 0 "nonimmediate_operand" "=x,da,x,d,mr") + (match_operand:HI 1 "general_operand" "x,xKs7,xKsh,mr,d"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" +{ + static const char *templates[] = { + "%0 = %1;", + "%0 = %1 (X);", + "%0 = %1 (X);", + "%0 = W %1 (X)%!", + "W %0 = %1%!", + "%h0 = W %1%!", + "W %0 = %h1%!" + }; + int alt = which_alternative; + rtx mem = (MEM_P (operands[0]) ? operands[0] + : MEM_P (operands[1]) ? operands[1] : NULL_RTX); + if (mem && bfin_dsp_memref_p (mem)) + alt += 2; + return templates[alt]; +} + [(set_attr "type" "move,mvi,mvi,mcld,mcst") + (set_attr "length" "2,2,4,*,*")]) + +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "nonimmediate_operand" "=x,da,x,d,mr") + (match_operand:QI 1 "general_operand" "x,xKs7,xKsh,mr,d"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %0 = %1; + %0 = %1 (X); + %0 = %1 (X); + %0 = B %1 (X)%! + B %0 = %1%!" + [(set_attr "type" "move,mvi,mvi,mcld,mcst") + (set_attr "length" "2,2,4,*,*")]) + +(define_insn "*movsf_insn" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,x,da,mr") + (match_operand:SF 1 "general_operand" "x,Fx,mr,da"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %0 = %1; + # + %0 = %1%! + %0 = %1%!" + [(set_attr "type" "move,*,mcld,mcst")]) + +(define_insn_and_split "movdf_insn" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x,mx,r") + (match_operand:DF 1 "general_operand" "iFx,r,mx"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "#" + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx lo_half[2], hi_half[2]; + split_di (operands, 2, lo_half, hi_half); + + if (reg_overlap_mentioned_p (lo_half[0], hi_half[1])) + { + operands[2] = hi_half[0]; + operands[3] = hi_half[1]; + operands[4] = lo_half[0]; + operands[5] = lo_half[1]; + } + else + { + operands[2] = lo_half[0]; + operands[3] = lo_half[1]; + operands[4] = hi_half[0]; + operands[5] = hi_half[1]; + } +}) + +;; Storing halfwords. +(define_insn "*movsi_insv" + [(set (zero_extract:SI (match_operand 0 "register_operand" "+d,x") + (const_int 16) + (const_int 16)) + (match_operand:SI 1 "nonmemory_operand" "d,n"))] + "" + "@ + %d0 = %h1 << 0%! + %d0 = %1;" + [(set_attr "type" "dsp32shiftimm,mvi")]) + +(define_expand "insv" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")) + (match_operand:SI 3 "nonmemory_operand" ""))] + "" +{ + if (INTVAL (operands[1]) != 16 || INTVAL (operands[2]) != 16) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! register_operand (operands[0], VOIDmode)) + FAIL; +}) + +;; This is the main "hook" for PIC code. When generating +;; PIC, movsi is responsible for determining when the source address +;; needs PIC relocation and appropriately calling legitimize_pic_address +;; to perform the actual relocation. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" +{ + if (expand_move (operands, SImode)) + DONE; +}) + +(define_expand "movv2hi" + [(set (match_operand:V2HI 0 "nonimmediate_operand" "") + (match_operand:V2HI 1 "general_operand" ""))] + "" + "expand_move (operands, V2HImode);") + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + "expand_move (operands, DImode);") + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "expand_move (operands, SFmode);") + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + "expand_move (operands, DFmode);") + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + "expand_move (operands, HImode);") + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " expand_move (operands, QImode); ") + +;; Some define_splits to break up SI/SFmode loads of immediate constants. + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "symbolic_or_const_operand" ""))] + "reload_completed + /* Always split symbolic operands; split integer constants that are + too large for a single instruction. */ + && (GET_CODE (operands[1]) != CONST_INT + || (INTVAL (operands[1]) < -32768 + || INTVAL (operands[1]) >= 65536 + || (INTVAL (operands[1]) >= 32768 && PREG_P (operands[0]))))" + [(set (match_dup 0) (high:SI (match_dup 1))) + (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))] +{ + if (GET_CODE (operands[1]) == CONST_INT + && split_load_immediate (operands)) + DONE; + /* ??? Do something about TARGET_LOW_64K. */ +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "immediate_operand" ""))] + "reload_completed" + [(set (match_dup 2) (high:SI (match_dup 3))) + (set (match_dup 2) (lo_sum:SI (match_dup 2) (match_dup 3)))] +{ + long values; + REAL_VALUE_TYPE value; + + gcc_assert (GET_CODE (operands[1]) == CONST_DOUBLE); + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (value, values); + + operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0])); + operands[3] = GEN_INT (trunc_int_for_mode (values, SImode)); + if (values >= -32768 && values < 65536) + { + emit_move_insn (operands[2], operands[3]); + DONE; + } + if (split_load_immediate (operands + 2)) + DONE; +}) + +;; Sadly, this can't be a proper named movstrict pattern, since the compiler +;; expects to be able to use registers for operand 1. +;; Note that the asm instruction is defined by the manual to take an unsigned +;; constant, but it doesn't matter to the assembler, and the compiler only +;; deals with sign-extended constants. Hence "Ksh". +(define_insn "movstricthi_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+x")) + (match_operand:HI 1 "immediate_operand" "Ksh"))] + "" + "%h0 = %1;" + [(set_attr "type" "mvi") + (set_attr "length" "4")]) + +;; Sign and zero extensions + +(define_insn_and_split "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))] + "" + "@ + %0 = %h1 (X); + %0 = W %h1 (X)%!" + "reload_completed && bfin_dsp_memref_p (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] +{ + operands[2] = gen_lowpart (HImode, operands[0]); +} + [(set_attr "type" "alu0,mcld")]) + +(define_insn_and_split "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "d, m")))] + "" + "@ + %0 = %h1 (Z); + %0 = W %h1 (Z)%!" + "reload_completed && bfin_dsp_memref_p (operands[1])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:SI (match_dup 2)))] +{ + operands[2] = gen_lowpart (HImode, operands[0]); +} + [(set_attr "type" "alu0,mcld")]) + +(define_insn "zero_extendbisi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extend:SI (match_operand:BI 1 "nonimmediate_operand" "C")))] + "" + "%0 = %1;" + [(set_attr "type" "compare")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=d, d") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (X)%! + %0 = %T1 (X);" + [(set_attr "type" "mcld,alu0")]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (X)%! + %0 = %T1 (X);" + [(set_attr "type" "mcld,alu0")]) + + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=d, d") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (Z)%! + %0 = %T1 (Z);" + [(set_attr "type" "mcld,alu0")]) + + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=d, d") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "m, d")))] + "" + "@ + %0 = B %1 (Z)%! + %0 = %T1 (Z);" + [(set_attr "type" "mcld,alu0")]) + +;; DImode logical operations + +(define_code_iterator any_logical [and ior xor]) +(define_code_attr optab [(and "and") + (ior "ior") + (xor "xor")]) +(define_code_attr op [(and "&") + (ior "|") + (xor "^")]) +(define_code_attr high_result [(and "0") + (ior "%H1") + (xor "%H1")]) + +;; Keep this pattern around to avoid generating NO_CONFLICT blocks. +(define_expand "di3" + [(set (match_operand:DI 0 "register_operand" "=d") + (any_logical:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "general_operand" "d")))] + "" +{ + rtx hi_half[3], lo_half[3]; + enum insn_code icode = CODE_FOR_si3; + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + emit_clobber (operands[0]); + split_di (operands, 3, lo_half, hi_half); + if (!(*insn_data[icode].operand[2].predicate) (lo_half[2], SImode)) + lo_half[2] = force_reg (SImode, lo_half[2]); + emit_insn (GEN_FCN (icode) (lo_half[0], lo_half[1], lo_half[2])); + if (!(*insn_data[icode].operand[2].predicate) (hi_half[2], SImode)) + hi_half[2] = force_reg (SImode, hi_half[2]); + emit_insn (GEN_FCN (icode) (hi_half[0], hi_half[1], hi_half[2])); + DONE; +}) + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (match_operand:QI 1 "register_operand" "d")))] + "" + "%0 = %T1 (Z);\\n\\t%H0 = 0;" + [(set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = %h1 (Z);\\n\\t%H0 = 0;" + [(set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_insn_and_split "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI (match_operand:SI 1 "register_operand" "d")))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))] +{ + split_di (operands, 1, operands + 2, operands + 3); + if (REGNO (operands[0]) != REGNO (operands[1])) + emit_move_insn (operands[2], operands[1]); +}) + +(define_insn_and_split "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI (match_operand:QI 1 "register_operand" "d")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))] +{ + split_di (operands, 1, operands + 2, operands + 3); +}) + +(define_insn_and_split "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=d") + (sign_extend:DI (match_operand:HI 1 "register_operand" "d")))] + "" + "#" + "reload_completed" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (sign_extend:SI (match_dup 1))) + (set (match_dup 3) (ashiftrt:SI (match_dup 3) (const_int 31)))] +{ + split_di (operands, 1, operands + 2, operands + 3); +}) + +;; DImode arithmetic operations + +(define_insn "add_with_carry" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (plus:SI (match_operand:SI 1 "register_operand" "%0,d") + (match_operand:SI 2 "nonmemory_operand" "Ks7,d"))) + (set (match_operand:BI 3 "register_operand" "=C,C") + (ltu:BI (not:SI (match_dup 1)) (match_dup 2)))] + "" + "@ + %0 += %2; cc = ac0; + %0 = %1 + %2; cc = ac0;" + [(set_attr "type" "alu0") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_insn "sub_with_carry" + [(set (match_operand:SI 0 "register_operand" "=d") + (minus:SI (match_operand:SI 1 "register_operand" "%d") + (match_operand:SI 2 "nonmemory_operand" "d"))) + (set (match_operand:BI 3 "register_operand" "=C") + (leu:BI (match_dup 2) (match_dup 1)))] + "" + "%0 = %1 - %2; cc = ac0;" + [(set_attr "type" "alu0") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC 34))] + "" +{ + rtx xops[8]; + xops[0] = gen_lowpart (SImode, operands[0]); + xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4); + xops[2] = gen_lowpart (SImode, operands[1]); + xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4); + xops[4] = gen_lowpart (SImode, operands[2]); + xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4); + xops[6] = gen_reg_rtx (SImode); + xops[7] = gen_rtx_REG (BImode, REG_CC); + if (!register_operand (xops[4], SImode) + && (GET_CODE (xops[4]) != CONST_INT + || !satisfies_constraint_Ks7 (xops[4]))) + xops[4] = force_reg (SImode, xops[4]); + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + emit_clobber (operands[0]); + emit_insn (gen_add_with_carry (xops[0], xops[2], xops[4], xops[7])); + emit_insn (gen_movbisi (xops[6], xops[7])); + if (!register_operand (xops[5], SImode) + && (GET_CODE (xops[5]) != CONST_INT + || !satisfies_constraint_Ks7 (xops[5]))) + xops[5] = force_reg (SImode, xops[5]); + if (xops[5] != const0_rtx) + emit_insn (gen_addsi3 (xops[1], xops[3], xops[5])); + else + emit_move_insn (xops[1], xops[3]); + emit_insn (gen_addsi3 (xops[1], xops[1], xops[6])); + DONE; +}) + +(define_expand "subdi3" + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" ""))) + (clobber (reg:CC 34))] + "" +{ + rtx xops[8]; + xops[0] = gen_lowpart (SImode, operands[0]); + xops[1] = simplify_gen_subreg (SImode, operands[0], DImode, 4); + xops[2] = gen_lowpart (SImode, operands[1]); + xops[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4); + xops[4] = gen_lowpart (SImode, operands[2]); + xops[5] = simplify_gen_subreg (SImode, operands[2], DImode, 4); + xops[6] = gen_reg_rtx (SImode); + xops[7] = gen_rtx_REG (BImode, REG_CC); + if (!reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2])) + emit_clobber (operands[0]); + emit_insn (gen_sub_with_carry (xops[0], xops[2], xops[4], xops[7])); + emit_insn (gen_notbi (xops[7], xops[7])); + emit_insn (gen_movbisi (xops[6], xops[7])); + emit_insn (gen_subsi3 (xops[1], xops[3], xops[5])); + emit_insn (gen_subsi3 (xops[1], xops[1], xops[6])); + DONE; +}) + +;; Combined shift/add instructions + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a,d") + (ashift:SI (plus:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "register_operand" "a,d")) + (match_operand:SI 3 "pos_scale_operand" "P1P2,P1P2")))] + "" + "%0 = (%0 + %2) << %3;" /* "shadd %0,%2,%3;" */ + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (match_operand:SI 1 "register_operand" "a") + (mult:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "scale_by_operand" "i"))))] + "" + "%0 = %1 + (%2 << %X3);" + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (match_operand:SI 1 "register_operand" "a") + (ashift:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "pos_scale_operand" "i"))))] + "" + "%0 = %1 + (%2 << %3);" + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "scale_by_operand" "i")) + (match_operand:SI 3 "register_operand" "a")))] + "" + "%0 = %3 + (%1 << %X2);" + [(set_attr "type" "alu0")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "pos_scale_operand" "i")) + (match_operand:SI 3 "register_operand" "a")))] + "" + "%0 = %3 + (%1 << %2);" + [(set_attr "type" "alu0")]) + +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%d")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "d"))))] + "" + "%0 = %h1 * %h2 (IS)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%d")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "d"))))] + "" + "%0 = %h1 * %h2 (FU)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi3" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "W")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "W"))))] + "" + "%0 = %h2 * %h1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +;; The alternative involving IREGS requires that the corresponding L register +;; is zero. + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=ad,a,d,b") + (plus:SI (match_operand:SI 1 "register_operand" "%0, a,d,0") + (match_operand:SI 2 "reg_or_7bit_operand" "Ks7, a,d,fP2P4")))] + "" + "@ + %0 += %2; + %0 = %1 + %2; + %0 = %1 + %2; + %0 += %2;" + [(set_attr "type" "alu0") + (set_attr "length" "2,2,2,2")]) + +(define_insn "ssaddsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_plus:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = %1 + %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=da,d,a") + (minus:SI (match_operand:SI 1 "register_operand" "0,d,0") + (match_operand:SI 2 "reg_or_neg7bit_operand" "KN7,d,a")))] + "" +{ + static const char *const strings_subsi3[] = { + "%0 += -%2;", + "%0 = %1 - %2;", + "%0 -= %2;", + }; + + if (CONSTANT_P (operands[2]) && INTVAL (operands[2]) < 0) { + rtx tmp_op = operands[2]; + operands[2] = GEN_INT (-INTVAL (operands[2])); + output_asm_insn ("%0 += %2;", operands); + operands[2] = tmp_op; + return ""; + } + + return strings_subsi3[which_alternative]; +} + [(set_attr "type" "alu0")]) + +(define_insn "sssubsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_minus:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = %1 - %2 (S)%!" + [(set_attr "type" "dsp32")]) + +;; Accumulator addition + +(define_insn "addpdi3" + [(set (match_operand:PDI 0 "register_operand" "=A") + (ss_plus:PDI (match_operand:PDI 1 "register_operand" "%0") + (match_operand:PDI 2 "nonmemory_operand" "B")))] + "" + "A0 += A1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sum_of_accumulators" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_truncate:SI + (ss_plus:PDI (match_operand:PDI 2 "register_operand" "1") + (match_operand:PDI 3 "register_operand" "B")))) + (set (match_operand:PDI 1 "register_operand" "=A") + (ss_plus:PDI (match_dup 2) (match_dup 3)))] + "" + "%0 = (A0 += A1)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "us_truncpdisi2" + [(set (match_operand:SI 0 "register_operand" "=D,W") + (us_truncate:SI (match_operand:PDI 1 "register_operand" "A,B")))] + "" + "%0 = %1 (FU)%!" + [(set_attr "type" "dsp32")]) + +;; Bit test instructions + +(define_insn "*not_bittst" + [(set (match_operand:BI 0 "register_operand" "=C") + (eq:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d") + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5")) + (const_int 0)))] + "" + "cc = !BITTST (%1,%2);" + [(set_attr "type" "alu0")]) + +(define_insn "*bittst" + [(set (match_operand:BI 0 "register_operand" "=C") + (ne:BI (zero_extract:SI (match_operand:SI 1 "register_operand" "d") + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5")) + (const_int 0)))] + "" + "cc = BITTST (%1,%2);" + [(set_attr "type" "alu0")]) + +(define_insn_and_split "*bit_extract" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extract:SI (match_operand:SI 1 "register_operand" "d") + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5"))) + (clobber (reg:BI REG_CC))] + "" + "#" + "" + [(set (reg:BI REG_CC) + (ne:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (ne:SI (reg:BI REG_CC) (const_int 0)))]) + +(define_insn_and_split "*not_bit_extract" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extract:SI (not:SI (match_operand:SI 1 "register_operand" "d")) + (const_int 1) + (match_operand:SI 2 "immediate_operand" "Ku5"))) + (clobber (reg:BI REG_CC))] + "" + "#" + "" + [(set (reg:BI REG_CC) + (eq:BI (zero_extract:SI (match_dup 1) (const_int 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (ne:SI (reg:BI REG_CC) (const_int 0)))]) + +(define_insn "*andsi_insn" + [(set (match_operand:SI 0 "register_operand" "=d,d,d,d") + (and:SI (match_operand:SI 1 "register_operand" "%0,d,d,d") + (match_operand:SI 2 "rhs_andsi3_operand" "L,M1,M2,d")))] + "" + "@ + BITCLR (%0,%Y2); + %0 = %T1 (Z); + %0 = %h1 (Z); + %0 = %1 & %2;" + [(set_attr "type" "alu0")]) + +(define_expand "andsi3" + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (highbits_operand (operands[2], SImode)) + { + operands[2] = GEN_INT (exact_log2 (-INTVAL (operands[2]))); + emit_insn (gen_ashrsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2])); + DONE; + } + if (! rhs_andsi3_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (ior:SI (match_operand:SI 1 "register_operand" "%0,d") + (match_operand:SI 2 "regorlog2_operand" "J,d")))] + "" + "@ + BITSET (%0, %X2); + %0 = %1 | %2;" + [(set_attr "type" "alu0")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (xor:SI (match_operand:SI 1 "register_operand" "%0,d") + (match_operand:SI 2 "regorlog2_operand" "J,d")))] + "" + "@ + BITTGL (%0, %X2); + %0 = %1 ^ %2;" + [(set_attr "type" "alu0")]) + +(define_insn "ones" + [(set (match_operand:HI 0 "register_operand" "=d") + (unspec:HI [(match_operand:SI 1 "register_operand" "d")] + UNSPEC_ONES))] + "" + "%h0 = ONES %1;" + [(set_attr "type" "alu0")]) + +(define_insn "smaxsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (smax:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = max(%1,%2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (smin:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 = min(%1,%2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (abs:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = abs %1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssabssi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_abs:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = abs %1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (neg:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = -%1;" + [(set_attr "type" "alu0")]) + +(define_insn "ssnegsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (ss_neg:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = -%1 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (not:SI (match_operand:SI 1 "register_operand" "d")))] + "" + "%0 = ~%1;" + [(set_attr "type" "alu0")]) + +(define_expand "clrsbsi2" + [(set (match_dup 2) + (truncate:HI (clrsb:SI (match_operand:SI 1 "register_operand" "d")))) + (set (match_operand:SI 0 "register_operand") + (zero_extend:SI (match_dup 2)))] + "" +{ + operands[2] = gen_reg_rtx (HImode); +}) + +(define_insn "signbitssi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (truncate:HI (clrsb:SI (match_operand:SI 1 "register_operand" "d"))))] + "" + "%h0 = signbits %1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssroundsi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (truncate:HI + (lshiftrt:SI (ss_plus:SI (match_operand:SI 1 "register_operand" "d") + (const_int 32768)) + (const_int 16))))] + "" + "%h0 = %1 (RND)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "smaxhi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (smax:HI (match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d")))] + "" + "%0 = max(%1,%2) (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sminhi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (smin:HI (match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d")))] + "" + "%0 = min(%1,%2) (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "abshi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (abs:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = abs %1 (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "neghi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (neg:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = -%1;" + [(set_attr "type" "alu0")]) + +(define_insn "ssneghi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_neg:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%0 = -%1 (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "clrsbhi2" + [(set (match_operand:HI 0 "register_operand" "=d") + (clrsb:HI (match_operand:HI 1 "register_operand" "d")))] + "" + "%h0 = signbits %h1%!" + [(set_attr "type" "dsp32")]) + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "d")))] + "" + "%0 *= %2;" + [(set_attr "type" "mult")]) + +(define_expand "umulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (reg:PDI REG_A0)) + (clobber (reg:PDI REG_A1))])] + "" +{ + if (!optimize_size) + { + rtx a1reg = gen_rtx_REG (PDImode, REG_A1); + rtx a0reg = gen_rtx_REG (PDImode, REG_A0); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, + gen_lowpart (V2HImode, operands[1]), + gen_lowpart (V2HImode, operands[2]), + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_FU), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_flag_machi_parts_acconly (a1reg, + gen_lowpart (V2HImode, operands[2]), + gen_lowpart (V2HImode, operands[1]), + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_addpdi3 (a0reg, a0reg, a1reg)); + emit_insn (gen_us_truncpdisi2 (operands[0], a0reg)); + } + else + { + rtx umulsi3_highpart_libfunc + = init_one_libfunc ("__umulsi3_highpart"); + + emit_library_call_value (umulsi3_highpart_libfunc, + operands[0], LCT_NORMAL, SImode, + 2, operands[1], SImode, operands[2], SImode); + } + DONE; +}) + +(define_expand "smulsi3_highpart" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (reg:PDI REG_A0)) + (clobber (reg:PDI REG_A1))])] + "" +{ + if (!optimize_size) + { + rtx a1reg = gen_rtx_REG (PDImode, REG_A1); + rtx a0reg = gen_rtx_REG (PDImode, REG_A0); + emit_insn (gen_flag_macinit1hi (a1reg, + gen_lowpart (HImode, operands[1]), + gen_lowpart (HImode, operands[2]), + GEN_INT (MACFLAG_FU))); + emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, + gen_lowpart (V2HImode, operands[1]), + gen_lowpart (V2HImode, operands[2]), + const1_rtx, const1_rtx, + const1_rtx, const0_rtx, a1reg, + const0_rtx, GEN_INT (MACFLAG_IS), + GEN_INT (MACFLAG_IS_M))); + emit_insn (gen_flag_machi_parts_acconly (a1reg, + gen_lowpart (V2HImode, operands[2]), + gen_lowpart (V2HImode, operands[1]), + const1_rtx, const0_rtx, + a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M))); + emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16))); + emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg)); + } + else + { + rtx smulsi3_highpart_libfunc + = init_one_libfunc ("__smulsi3_highpart"); + + emit_library_call_value (smulsi3_highpart_libfunc, + operands[0], LCT_NORMAL, SImode, + 2, operands[1], SImode, operands[2], SImode); + } + DONE; +}) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" +{ + if (GET_CODE (operands[2]) == CONST_INT + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) + { + emit_insn (gen_movsi (operands[0], const0_rtx)); + DONE; + } +}) + +(define_insn_and_split "*ashlsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=d,d,a,a,a") + (ashift:SI (match_operand:SI 1 "register_operand" "0,d,a,a,a") + (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1,P2,?P3P4")))] + "" + "@ + %0 <<= %2; + %0 = %1 << %2%! + %0 = %1 + %1; + %0 = %1 << %2; + #" + "PREG_P (operands[0]) && INTVAL (operands[2]) > 2" + [(set (match_dup 0) (ashift:SI (match_dup 1) (const_int 2))) + (set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3)))] + "operands[3] = GEN_INT (INTVAL (operands[2]) - 2);" + [(set_attr "type" "shft,dsp32shiftimm,shft,shft,*")]) + +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,d") + (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5")))] + "" + "@ + %0 >>>= %2; + %0 = %1 >>> %2%!" + [(set_attr "type" "shft,dsp32shiftimm")]) + +(define_insn "rotl16" + [(set (match_operand:SI 0 "register_operand" "=d") + (rotate:SI (match_operand:SI 1 "register_operand" "d") + (const_int 16)))] + "" + "%0 = PACK (%h1, %d1)%!" + [(set_attr "type" "dsp32")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "") + (rotate:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "" +{ + if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 16) + FAIL; +}) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "register_operand" "") + (rotatert:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "" +{ + if (GET_CODE (operands[2]) != CONST_INT || INTVAL (operands[2]) != 16) + FAIL; + emit_insn (gen_rotl16 (operands[0], operands[1])); + DONE; +}) + + +(define_insn "ror_one" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "d") (const_int 1)) + (ashift:SI (zero_extend:SI (reg:BI REG_CC)) (const_int 31)))) + (set (reg:BI REG_CC) + (zero_extract:BI (match_dup 1) (const_int 1) (const_int 0)))] + "" + "%0 = ROT %1 BY -1%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_insn "rol_one" + [(set (match_operand:SI 0 "register_operand" "+d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") (const_int 1)) + (zero_extend:SI (reg:BI REG_CC)))) + (set (reg:BI REG_CC) + (zero_extract:BI (match_dup 1) (const_int 31) (const_int 0)))] + "" + "%0 = ROT %1 BY 1%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "" +{ + rtx lo_half[2], hi_half[2]; + + if (operands[2] != const1_rtx) + FAIL; + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 2, lo_half, hi_half); + + emit_move_insn (bfin_cc_rtx, const0_rtx); + emit_insn (gen_ror_one (hi_half[0], hi_half[0])); + emit_insn (gen_ror_one (lo_half[0], lo_half[0])); + DONE; +}) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "" +{ + rtx lo_half[2], hi_half[2]; + + if (operands[2] != const1_rtx) + FAIL; + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 2, lo_half, hi_half); + + emit_insn (gen_compare_lt (gen_rtx_REG (BImode, REG_CC), + hi_half[1], const0_rtx)); + emit_insn (gen_ror_one (hi_half[0], hi_half[0])); + emit_insn (gen_ror_one (lo_half[0], lo_half[0])); + DONE; +}) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "general_operand" "")))] + "" +{ + rtx lo_half[2], hi_half[2]; + + if (operands[2] != const1_rtx) + FAIL; + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_di (operands, 2, lo_half, hi_half); + + emit_move_insn (bfin_cc_rtx, const0_rtx); + emit_insn (gen_rol_one (lo_half[0], lo_half[0])); + emit_insn (gen_rol_one (hi_half[0], hi_half[0])); + DONE; +}) + +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d,a") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,d,a") + (match_operand:SI 2 "nonmemory_operand" "dKu5,Ku5,P1P2")))] + "" + "@ + %0 >>= %2; + %0 = %1 >> %2%! + %0 = %1 >> %2;" + [(set_attr "type" "shft,dsp32shiftimm,shft")]) + +(define_insn "lshrpdi3" + [(set (match_operand:PDI 0 "register_operand" "=e") + (lshiftrt:PDI (match_operand:PDI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "Ku5")))] + "" + "%0 = %1 >> %2%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_insn "ashrpdi3" + [(set (match_operand:PDI 0 "register_operand" "=e") + (ashiftrt:PDI (match_operand:PDI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "Ku5")))] + "" + "%0 = %1 >>> %2%!" + [(set_attr "type" "dsp32shiftimm")]) + +;; A pattern to reload the equivalent of +;; (set (Dreg) (plus (FP) (large_constant))) +;; or +;; (set (dagreg) (plus (FP) (arbitrary_constant))) +;; using a scratch register +(define_expand "reload_insi" + [(parallel [(set (match_operand:SI 0 "register_operand" "=w") + (match_operand:SI 1 "fp_plus_const_operand" "")) + (clobber (match_operand:SI 2 "register_operand" "=&a"))])] + "" +{ + rtx fp_op = XEXP (operands[1], 0); + rtx const_op = XEXP (operands[1], 1); + rtx primary = operands[0]; + rtx scratch = operands[2]; + + emit_move_insn (scratch, const_op); + emit_insn (gen_addsi3 (scratch, scratch, fp_op)); + emit_move_insn (primary, scratch); + DONE; +}) + +(define_mode_iterator AREG [PDI V2PDI]) + +(define_insn "reload_in" + [(set (match_operand:AREG 0 "register_operand" "=e") + (match_operand:AREG 1 "memory_operand" "m")) + (clobber (match_operand:SI 2 "register_operand" "=d"))] + "" +{ + rtx xops[4]; + xops[0] = operands[0]; + xops[1] = operands[2]; + split_di (operands + 1, 1, xops + 2, xops + 3); + output_asm_insn ("%1 = %2;", xops); + output_asm_insn ("%w0 = %1;", xops); + output_asm_insn ("%1 = %3;", xops); + output_asm_insn ("%x0 = %1;", xops); + return ""; +} + [(set_attr "seq_insns" "multi") + (set_attr "type" "mcld") + (set_attr "length" "12")]) + +(define_insn "reload_out" + [(set (match_operand:AREG 0 "memory_operand" "=m") + (match_operand:AREG 1 "register_operand" "e")) + (clobber (match_operand:SI 2 "register_operand" "=d"))] + "" +{ + rtx xops[4]; + xops[0] = operands[1]; + xops[1] = operands[2]; + split_di (operands, 1, xops + 2, xops + 3); + output_asm_insn ("%1 = %w0;", xops); + output_asm_insn ("%2 = %1;", xops); + output_asm_insn ("%1 = %x0;", xops); + output_asm_insn ("%3 = %1;", xops); + return ""; +} + [(set_attr "seq_insns" "multi") + (set_attr "type" "mcld") + (set_attr "length" "12")]) + +;; Jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" +{ + if (get_attr_length (insn) == 2) + return "jump.s %0;"; + else + return "jump.l %0;"; +} + [(set_attr "type" "br")]) + +(define_insn "indirect_jump" + [(set (pc) + (match_operand:SI 0 "register_operand" "a"))] + "" + "jump (%0);" + [(set_attr "type" "misc")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "a")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + /* In PIC mode, the table entries are stored PC relative. + Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + + operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], + op1, NULL_RTX, 0, OPTAB_DIRECT); + } +}) + +(define_insn "*tablejump_internal" + [(set (pc) (match_operand:SI 0 "register_operand" "a")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jump (%0);" + [(set_attr "type" "misc")]) + +;; Hardware loop + +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else + (ne (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_dup 2))])] ; match_scratch + "" +{ + /* The loop optimizer doesn't check the predicates... */ + if (GET_MODE (operands[0]) != SImode) + FAIL; + bfin_hardware_loop (); + operands[2] = gen_rtx_SCRATCH (SImode); +}) + +(define_insn "loop_end" + [(set (pc) + (if_then_else (ne (match_operand:SI 2 "nonimmediate_operand" "0,0,0") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=a*d,*b*v*f,m") + (plus (match_dup 2) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 3 "=X,&r,&r"))] + "" + "@ + /* loop end %0 %l1 */ + # + #" + [(set_attr "length" "6,10,14")]) + +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "nondp_reg_or_memory_operand" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END) + (clobber (match_scratch:SI 2 "=&r"))] + "memory_operand (operands[0], SImode) || splitting_loops" + [(set (match_dup 2) (match_dup 0)) + (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1))) + (set (match_dup 0) (match_dup 2)) + (set (reg:BI REG_CC) (eq:BI (match_dup 2) (const_int 0))) + (set (pc) + (if_then_else (eq (reg:BI REG_CC) + (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "") + +(define_insn "lsetup_with_autoinit" + [(set (match_operand:SI 0 "lt_register_operand" "=t") + (label_ref (match_operand 1 "" ""))) + (set (match_operand:SI 2 "lb_register_operand" "=u") + (label_ref (match_operand 3 "" ""))) + (set (match_operand:SI 4 "lc_register_operand" "=k") + (match_operand:SI 5 "register_operand" "a"))] + "" + "LSETUP (%1, %3) %4 = %5;" + [(set_attr "length" "4")]) + +(define_insn "lsetup_without_autoinit" + [(set (match_operand:SI 0 "lt_register_operand" "=t") + (label_ref (match_operand 1 "" ""))) + (set (match_operand:SI 2 "lb_register_operand" "=u") + (label_ref (match_operand 3 "" ""))) + (use (match_operand:SI 4 "lc_register_operand" "k"))] + "" + "LSETUP (%1, %3) %4;" + [(set_attr "length" "4")]) + +;; Call instructions.. + +;; The explicit MEM inside the UNSPEC prevents the compiler from moving +;; the load before a branch after a NULL test, or before a store that +;; initializes a function descriptor. + +(define_insn_and_split "load_funcdescsi" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec_volatile:SI [(mem:SI (match_operand:SI 1 "address_operand" "p"))] + UNSPEC_VOLATILE_LOAD_FUNCDESC))] + "" + "#" + "reload_completed" + [(set (match_dup 0) (mem:SI (match_dup 1)))]) + +(define_expand "call" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))])] + "" +{ + bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 0); + DONE; +}) + +(define_expand "sibcall" + [(parallel [(call (match_operand:SI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (return)])] + "" +{ + bfin_expand_call (NULL_RTX, operands[0], operands[1], operands[2], 1); + DONE; +}) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "register_operand" "") + (call (match_operand:SI 1 "" "") + (match_operand 2 "" ""))) + (use (match_operand 3 "" ""))])] + "" +{ + bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 0); + DONE; +}) + +(define_expand "sibcall_value" + [(parallel [(set (match_operand 0 "register_operand" "") + (call (match_operand:SI 1 "" "") + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (return)])] + "" +{ + bfin_expand_call (operands[0], operands[1], operands[2], operands[3], 1); + DONE; +}) + +(define_insn "*call_symbol_fdpic" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[3]))" + "call %0;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_symbol_fdpic" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[3]))" + "jump.l %0;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_value_symbol_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[4]))" + "call %1;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_value_symbol_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[4]))" + "jump.l %1;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_insn_fdpic" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%0);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_insn_fdpic" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "Y")) + (match_operand 1 "general_operand" "g")) + (use (match_operand:SI 2 "register_operand" "Z")) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%0);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +(define_insn "*call_value_insn_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%1);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_value_insn_fdpic" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "Y")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand:SI 3 "register_operand" "Z")) + (use (match_operand 4 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%1);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +(define_insn "*call_symbol" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[2]))" + "call %0;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_symbol" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "Q")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[0]) == SYMBOL_REF + && !bfin_longcall_p (operands[0], INTVAL (operands[2]))" + "jump.l %0;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_value_symbol" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[3]))" + "call %1;" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_insn "*sibcall_value_symbol" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "Q")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn) + && (!TARGET_ID_SHARED_LIBRARY || TARGET_LEAF_ID_SHARED_LIBRARY) + && GET_CODE (operands[1]) == SYMBOL_REF + && !bfin_longcall_p (operands[1], INTVAL (operands[3]))" + "jump.l %1;" + [(set_attr "type" "br") + (set_attr "length" "4")]) + +(define_insn "*call_insn" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "a")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%0);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_insn" + [(call (mem:SI (match_operand:SI 0 "register_no_elim_operand" "z")) + (match_operand 1 "general_operand" "g")) + (use (match_operand 2 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%0);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +(define_insn "*call_value_insn" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "a")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (clobber (reg:SI REG_RETS))] + "! SIBLING_CALL_P (insn)" + "call (%1);" + [(set_attr "type" "call") + (set_attr "length" "2")]) + +(define_insn "*sibcall_value_insn" + [(set (match_operand 0 "register_operand" "=d") + (call (mem:SI (match_operand:SI 1 "register_no_elim_operand" "z")) + (match_operand 2 "general_operand" "g"))) + (use (match_operand 3 "" "")) + (return)] + "SIBLING_CALL_P (insn)" + "jump (%1);" + [(set_attr "type" "br") + (set_attr "length" "2")]) + +;; Block move patterns + +;; We cheat. This copies one more word than operand 2 indicates. + +(define_insn "rep_movsi" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0") + (ashift:SI (match_operand:SI 2 "register_operand" "a") + (const_int 2))) + (const_int 4))) + (set (match_operand:SI 1 "register_operand" "=&b") + (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1") + (ashift:SI (match_dup 2) (const_int 2))) + (const_int 4))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 2)) + (clobber (match_scratch:HI 5 "=&d")) + (clobber (reg:SI REG_LT1)) + (clobber (reg:SI REG_LC1)) + (clobber (reg:SI REG_LB1))] + "" + "%5 = [%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || [%3++] = %5 || %5 = [%4++]; [%3++] = %5;" + [(set_attr "type" "misc") + (set_attr "length" "16") + (set_attr "seq_insns" "multi")]) + +(define_insn "rep_movhi" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI (plus:SI (match_operand:SI 3 "register_operand" "0") + (ashift:SI (match_operand:SI 2 "register_operand" "a") + (const_int 1))) + (const_int 2))) + (set (match_operand:SI 1 "register_operand" "=&b") + (plus:SI (plus:SI (match_operand:SI 4 "register_operand" "1") + (ashift:SI (match_dup 2) (const_int 1))) + (const_int 2))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 2)) + (clobber (match_scratch:HI 5 "=&d")) + (clobber (reg:SI REG_LT1)) + (clobber (reg:SI REG_LC1)) + (clobber (reg:SI REG_LB1))] + "" + "%h5 = W[%4++]; lsetup (1f, 1f) LC1 = %2; 1: MNOP || W [%3++] = %5 || %h5 = W [%4++]; W [%3++] = %5;" + [(set_attr "type" "misc") + (set_attr "length" "16") + (set_attr "seq_insns" "multi")]) + +(define_expand "movmemsi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" +{ + if (bfin_expand_movmem (operands[0], operands[1], operands[2], operands[3])) + DONE; + FAIL; +}) + +;; Conditional branch patterns +;; The Blackfin has only few condition codes: eq, lt, lte, ltu, leu + +(define_insn "compare_eq" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (eq:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "" + "cc =%1==%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_ne" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (ne:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "0" + "cc =%1!=%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_lt" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (lt:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "" + "cc =%1<%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_le" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (le:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKs3,aKs3")))] + "" + "cc =%1<=%2;" + [(set_attr "type" "compare")]) + +(define_insn "compare_leu" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (leu:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))] + "" + "cc =%1<=%2 (iu);" + [(set_attr "type" "compare")]) + +(define_insn "compare_ltu" + [(set (match_operand:BI 0 "register_operand" "=C,C") + (ltu:BI (match_operand:SI 1 "register_operand" "d,a") + (match_operand:SI 2 "reg_or_const_int_operand" "dKu3,aKu3")))] + "" + "cc =%1<%2 (iu);" + [(set_attr "type" "compare")]) + +;; Same as above, but and CC with the overflow bit generated by the first +;; multiplication. +(define_insn "flag_mul_macv2hi_parts_acconly_andcc0" + [(set (match_operand:PDI 0 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d,d,d") + (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand 10 "const_int_operand" "PB,PA,PA")] + UNSPEC_MUL_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_dup 2) + (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_dup 3) + (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand:PDI 8 "register_operand" "1,1,1") + (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 11 "const_int_operand" "PA,PB,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (reg:BI REG_CC) + (and:BI (reg:BI REG_CC) + (unspec:BI [(vec_select:HI (match_dup 2) (parallel [(match_dup 4)])) + (vec_select:HI (match_dup 3) (parallel [(match_dup 6)])) + (match_dup 10)] + UNSPEC_MUL_WITH_FLAG)))] + "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))" +{ + rtx xops[6]; + const char *templates[] = { + "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;", + "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5;\n\tCC &= %v0;" }; + int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1) + + (INTVAL (operands[6]) << 2) + (INTVAL (operands[7]) << 3)); + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = operands[3]; + xops[4] = operands[9]; + xops[5] = which_alternative == 0 ? operands[10] : operands[11]; + output_asm_insn (templates[alt], xops); + return ""; +} + [(set_attr "type" "misc") + (set_attr "length" "6") + (set_attr "seq_insns" "multi")]) + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "reg_or_const_int_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx bi_compare = bfin_gen_compare (operands[0], SImode); + emit_jump_insn (gen_cbranchbi4 (bi_compare, bfin_cc_rtx, CONST0_RTX (BImode), + operands[3])); + DONE; +}) + +(define_insn "cbranchbi4" + [(set (pc) + (if_then_else + (match_operator 0 "bfin_bimode_comparison_operator" + [(match_operand:BI 1 "register_operand" "C") + (match_operand:BI 2 "immediate_operand" "P0")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + asm_conditional_branch (insn, operands, 0, 0); + return ""; +} + [(set_attr "type" "brcc")]) + +;; Special cbranch patterns to deal with the speculative load problem - see +;; bfin_reorg for details. + +(define_insn "cbranch_predicted_taken" + [(set (pc) + (if_then_else + (match_operator 0 "bfin_bimode_comparison_operator" + [(match_operand:BI 1 "register_operand" "C") + (match_operand:BI 2 "immediate_operand" "P0")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (unspec [(const_int 0)] UNSPEC_CBRANCH_TAKEN)] + "" +{ + asm_conditional_branch (insn, operands, 0, 1); + return ""; +} + [(set_attr "type" "brcc")]) + +(define_insn "cbranch_with_nops" + [(set (pc) + (if_then_else + (match_operator 0 "bfin_bimode_comparison_operator" + [(match_operand:BI 1 "register_operand" "C") + (match_operand:BI 2 "immediate_operand" "P0")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (unspec [(match_operand 4 "immediate_operand" "")] UNSPEC_CBRANCH_NOPS)] + "reload_completed" +{ + asm_conditional_branch (insn, operands, INTVAL (operands[4]), 0); + return ""; +} + [(set_attr "type" "brcc") + (set_attr "length" "8")]) + +;; setcc insns. + +(define_expand "cstorebi4" + [(set (match_dup 4) + (match_operator:BI 1 "bfin_bimode_comparison_operator" + [(match_operand:BI 2 "register_operand" "") + (match_operand:BI 3 "reg_or_const_int_operand" "")])) + (set (match_operand:SI 0 "register_operand" "") + (ne:SI (match_dup 4) (const_int 0)))] + "" +{ + /* It could be expanded as a movbisi instruction, but the portable + alternative produces better code. */ + if (GET_CODE (operands[1]) == NE) + FAIL; + + operands[4] = bfin_cc_rtx; +}) + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "reg_or_const_int_operand" "")]))] + "" +{ + rtx bi_compare, test; + + if (!bfin_direct_comparison_operator (operands[1], SImode)) + { + if (!register_operand (operands[3], SImode) + || GET_CODE (operands[1]) == NE) + FAIL; + test = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), + SImode, operands[3], operands[2]); + } + else + test = operands[1]; + + bi_compare = bfin_gen_compare (test, SImode); + gcc_assert (GET_CODE (bi_compare) == NE); + emit_insn (gen_movbisi (operands[0], bfin_cc_rtx)); + DONE; +}) + +(define_insn "nop" + [(const_int 0)] + "" + "nop;") + +;; A nop which stays there when emitted. +(define_insn "forced_nop" + [(unspec [(const_int 0)] UNSPEC_NOP)] + "" + "nop;") + +(define_insn "mnop" + [(unspec [(const_int 0)] UNSPEC_32BIT)] + "" + "mnop%!" + [(set_attr "type" "dsp32")]) + +;;;;;;;;;;;;;;;;;;;; CC2dreg ;;;;;;;;;;;;;;;;;;;;;;;;; +(define_insn "movsibi" + [(set (match_operand:BI 0 "register_operand" "=C") + (ne:BI (match_operand:SI 1 "register_operand" "d") + (const_int 0)))] + "" + "CC = %1;" + [(set_attr "length" "2")]) + +(define_insn_and_split "movbisi" + [(set (match_operand:SI 0 "register_operand" "=d") + (ne:SI (match_operand:BI 1 "register_operand" "C") + (const_int 0)))] + "" + "#" + "" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:BI 1 "register_operand" "")))] + "") + +(define_insn "notbi" + [(set (match_operand:BI 0 "register_operand" "=C") + (eq:BI (match_operand:BI 1 "register_operand" " 0") + (const_int 0)))] + "" + "%0 = ! %0;" /* NOT CC;" */ + [(set_attr "type" "compare")]) + +;; Vector and DSP insns + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") + (const_int 24)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "d") + (const_int 8))))] + "" + "%0 = ALIGN8(%1, %2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") + (const_int 16)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "d") + (const_int 16))))] + "" + "%0 = ALIGN16(%1, %2)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=d") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "d") + (const_int 8)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "d") + (const_int 24))))] + "" + "%0 = ALIGN24(%1, %2)%!" + [(set_attr "type" "dsp32")]) + +;; Prologue and epilogue. + +(define_expand "prologue" + [(const_int 1)] + "" + "bfin_expand_prologue (); DONE;") + +(define_expand "epilogue" + [(const_int 1)] + "" + "bfin_expand_epilogue (1, 0, 0); DONE;") + +(define_expand "sibcall_epilogue" + [(const_int 1)] + "" + "bfin_expand_epilogue (0, 0, 1); DONE;") + +(define_expand "eh_return" + [(use (match_operand:SI 0 "register_operand" ""))] + "" +{ + emit_insn (gen_eh_store_handler (EH_RETURN_HANDLER_RTX, operands[0])); + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn "eh_store_handler" + [(unspec_volatile [(match_operand:SI 1 "register_operand" "da")] + UNSPEC_VOLATILE_STORE_EH_HANDLER) + (clobber (match_operand:SI 0 "memory_operand" "=m"))] + "" + "%0 = %1%!" + [(set_attr "type" "mcst")]) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 1)] + "bfin_expand_epilogue (1, 1, 0); DONE;") + +(define_insn "link" + [(set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -4))) (reg:SI REG_RETS)) + (set (mem:SI (plus:SI (reg:SI REG_SP) (const_int -8))) (reg:SI REG_FP)) + (set (reg:SI REG_FP) + (plus:SI (reg:SI REG_SP) (const_int -8))) + (set (reg:SI REG_SP) + (plus:SI (reg:SI REG_SP) (match_operand:SI 0 "immediate_operand" "i")))] + "" + "LINK %Z0;" + [(set_attr "length" "4")]) + +(define_insn "unlink" + [(set (reg:SI REG_FP) (mem:SI (reg:SI REG_FP))) + (set (reg:SI REG_RETS) (mem:SI (plus:SI (reg:SI REG_FP) (const_int 4)))) + (set (reg:SI REG_SP) (plus:SI (reg:SI REG_FP) (const_int 8)))] + "" + "UNLINK;" + [(set_attr "length" "4")]) + +;; This pattern is slightly clumsy. The stack adjust must be the final SET in +;; the pattern, otherwise dwarf2out becomes very confused about which reg goes +;; where on the stack, since it goes through all elements of the parallel in +;; sequence. +(define_insn "push_multiple" + [(match_parallel 0 "push_multiple_operation" + [(unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_PUSH_MULTIPLE)])] + "" +{ + output_push_multiple (insn, operands); + return ""; +}) + +(define_insn "pop_multiple" + [(match_parallel 0 "pop_multiple_operation" + [(set (reg:SI REG_SP) + (plus:SI (reg:SI REG_SP) (match_operand:SI 1 "immediate_operand" "i")))])] + "" +{ + output_pop_multiple (insn, operands); + return ""; +}) + +(define_insn "return_internal" + [(return) + (use (match_operand 0 "register_operand" ""))] + "reload_completed" +{ + switch (REGNO (operands[0])) + { + case REG_RETX: + return "rtx;"; + case REG_RETN: + return "rtn;"; + case REG_RETI: + return "rti;"; + case REG_RETS: + return "rts;"; + } + gcc_unreachable (); +}) + +;; When used at a location where CC contains 1, causes a speculative load +;; that is later cancelled. This is used for certain workarounds in +;; interrupt handler prologues. +(define_insn "dummy_load" + [(unspec_volatile [(match_operand 0 "register_operand" "a") + (match_operand 1 "register_operand" "C")] + UNSPEC_VOLATILE_DUMMY)] + "" + "if cc jump 4;\n\tr7 = [%0];" + [(set_attr "type" "misc") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +;; A placeholder insn inserted before the final scheduling pass. It is used +;; to improve scheduling of loads when workarounds for speculative loads are +;; needed, by not placing them in the first few cycles after a conditional +;; branch. +(define_insn "stall" + [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")] + UNSPEC_VOLATILE_STALL)] + "" + "" + [(set_attr "type" "stall")]) + +(define_insn "csync" + [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)] + "" + "csync;" + [(set_attr "type" "sync")]) + +(define_insn "ssync" + [(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_SSYNC)] + "" + "ssync;" + [(set_attr "type" "sync")]) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 3))] + "" + "excpt 3;" + [(set_attr "type" "misc") + (set_attr "length" "2")]) + +(define_insn "trapifcc" + [(trap_if (reg:BI REG_CC) (const_int 3))] + "" + "if !cc jump 4 (bp); excpt 3;" + [(set_attr "type" "misc") + (set_attr "length" "4") + (set_attr "seq_insns" "multi")]) + +;;; Vector instructions + +;; First, all sorts of move variants + +(define_insn "movhiv2hi_low" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (match_operand:HI 2 "register_operand" "d") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h2 << 0%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_insn "movhiv2hi_high" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (match_operand:HI 2 "register_operand" "d")))] + "" + "%d0 = %h2 << 0%!" + [(set_attr "type" "dsp32shiftimm")]) + +;; No earlyclobber on alternative two since our sequence ought to be safe. +;; The order of operands is intentional to match the VDSP builtin (high word +;; is passed first). +(define_insn_and_split "composev2hi" + [(set (match_operand:V2HI 0 "register_operand" "=d,d") + (vec_concat:V2HI (match_operand:HI 2 "register_operand" "0,d") + (match_operand:HI 1 "register_operand" "d,d")))] + "" + "@ + %d0 = %h1 << 0%! + #" + "reload_completed" + [(set (match_dup 0) + (vec_concat:V2HI + (vec_select:HI (match_dup 0) (parallel [(const_int 0)])) + (match_dup 1))) + (set (match_dup 0) + (vec_concat:V2HI + (match_dup 2) + (vec_select:HI (match_dup 0) (parallel [(const_int 1)]))))] + "" + [(set_attr "type" "dsp32shiftimm")]) + +; Like composev2hi, but operating on elements of V2HI vectors. +; Useful on its own, and as a combiner bridge for the multiply and +; mac patterns. +(define_insn "packv2hi" + [(set (match_operand:V2HI 0 "register_operand" "=d,d,d,d,d,d,d,d") + (vec_concat:V2HI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "0,0,d,d,d,d,d,d") + (parallel [(match_operand 3 "const01_operand" "P0,P0,P0,P1,P0,P1,P0,P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,0,0,d,d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0,P1,P1,P1,P0,P0,P1,P1")]))))] + "" + "@ + %d0 = %h2 << 0%! + %d0 = %d2 << 0%! + %h0 = %h1 << 0%! + %h0 = %d1 << 0%! + %0 = PACK (%h2,%h1)%! + %0 = PACK (%h2,%d1)%! + %0 = PACK (%d2,%h1)%! + %0 = PACK (%d2,%d1)%!" + [(set_attr "type" "dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32shiftimm,dsp32,dsp32,dsp32,dsp32")]) + +(define_insn "movv2hi_hi" + [(set (match_operand:HI 0 "register_operand" "=d,d,d") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,d,d") + (parallel [(match_operand 2 "const01_operand" "P0,P0,P1")])))] + "" + "@ + /* optimized out */ + %h0 = %h1 << 0%! + %h0 = %d1 << 0%!" + [(set_attr "type" "dsp32shiftimm")]) + +(define_expand "movv2hi_hi_low" + [(set (match_operand:HI 0 "register_operand" "") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "") + (parallel [(const_int 0)])))] + "" + "") + +(define_expand "movv2hi_hi_high" + [(set (match_operand:HI 0 "register_operand" "") + (vec_select:HI (match_operand:V2HI 1 "register_operand" "") + (parallel [(const_int 1)])))] + "" + "") + +;; Unusual arithmetic operations on 16-bit registers. + +(define_code_iterator sp_or_sm [ss_plus ss_minus]) +(define_code_attr spm_string [(ss_plus "+") (ss_minus "-")]) +(define_code_attr spm_name [(ss_plus "add") (ss_minus "sub")]) + +(define_insn "sshi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (sp_or_sm:HI (match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d")))] + "" + "%h0 = %h1 %h2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sshi3_parts" + [(set (match_operand:HI 0 "register_operand" "=d") + (sp_or_sm:HI (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")]))))] + "" +{ + const char *templates[] = { + "%h0 = %h1 %h2 (S)%!", + "%h0 = %d1 %h2 (S)%!", + "%h0 = %h1 %d2 (S)%!", + "%h0 = %d1 %d2 (S)%!" }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "sshi3_low_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 0)])) + (sp_or_sm:HI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])))))] + "" +{ + const char *templates[] = { + "%h0 = %h2 %h3 (S)%!", + "%h0 = %d2 %h3 (S)%!", + "%h0 = %h2 %d3 (S)%!", + "%h0 = %d2 %d3 (S)%!" }; + int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "sshi3_high_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (sp_or_sm:HI (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")]))) + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0") + (parallel [(const_int 1)]))))] + "" +{ + const char *templates[] = { + "%d0 = %h2 %h3 (S)%!", + "%d0 = %d2 %h3 (S)%!", + "%d0 = %h2 %d3 (S)%!", + "%d0 = %d2 %d3 (S)%!" }; + int alt = INTVAL (operands[4]) + (INTVAL (operands[5]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; V2HI vector insns + +(define_insn "addv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (plus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 +|+ %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_plus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 +|+ %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (minus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 -|- %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_minus:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = %1 -|- %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "addsubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 +|- %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subaddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 -|+ %2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddsubv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (ss_minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 +|- %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssubaddv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (vec_concat:V2HI + (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))) + (ss_plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %1 -|+ %2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sublohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 - %h2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "subhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 - %d2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssublohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 - %h2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sssubhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_minus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 - %d2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "addlohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 + %h2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "addhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 + %d2%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddlohiv2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)]))))] + "" + "%h0 = %d1 + %h2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssaddhilov2hi3" + [(set (match_operand:HI 0 "register_operand" "=d") + (ss_plus:HI (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)]))))] + "" + "%h0 = %h1 + %d2 (S)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "sminv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (smin:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = MIN (%1, %2) (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "smaxv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (smax:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%0 = MAX (%1, %2) (V)%!" + [(set_attr "type" "dsp32")]) + +;; Multiplications. + +;; The Blackfin allows a lot of different options, and we need many patterns to +;; cover most of the hardware's abilities. +;; There are a few simple patterns using MULT rtx codes, but most of them use +;; an unspec with a const_int operand that determines which flag to use in the +;; instruction. +;; There are variants for single and parallel multiplications. +;; There are variants which just use 16-bit lowparts as inputs, and variants +;; which allow the user to choose just which halves to use as input values. +;; There are variants which set D registers, variants which set accumulators, +;; variants which set both, some of them optionally using the accumulators as +;; inputs for multiply-accumulate operations. + +(define_insn "flag_mulhi" + [(set (match_operand:HI 0 "register_operand" "=d") + (unspec:HI [(match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" + "%h0 = %h1 * %h2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulhi_parts" + [(set (match_operand:HI 0 "register_operand" "=d") + (unspec:HI [(vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (match_operand 5 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = %h1 * %h2 %M5%!", + "%h0 = %d1 * %h2 %M5%!", + "%h0 = %h1 * %d2 %M5%!", + "%h0 = %d1 * %d2 %M5%!" }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulhisi" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(match_operand:HI 1 "register_operand" "d") + (match_operand:HI 2 "register_operand" "d") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" + "%0 = %h1 * %h2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulhisi_parts" + [(set (match_operand:SI 0 "register_operand" "=d") + (unspec:SI [(vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 4 "const01_operand" "P0P1")])) + (match_operand 5 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%0 = %h1 * %h2 %M5%!", + "%0 = %d1 * %h2 %M5%!", + "%0 = %h1 * %d2 %M5%!", + "%0 = %d1 * %d2 %M5%!" }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; Three alternatives here to cover all possible allocations: +;; 0. mac flag is usable only for accumulator 1 - use A1 and odd DREG +;; 1. mac flag is usable for accumulator 0 - use A0 and even DREG +;; 2. mac flag is usable in any accumulator - use A1 and odd DREG +;; Other patterns which don't have a DREG destination can collapse cases +;; 1 and 2 into one. +(define_insn "flag_machi" + [(set (match_operand:HI 0 "register_operand" "=W,D,W") + (unspec:HI [(match_operand:HI 2 "register_operand" "d,d,d") + (match_operand:HI 3 "register_operand" "d,d,d") + (match_operand 4 "register_operand" "1,1,1") + (match_operand 5 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 6 "const_int_operand" "PB,PA,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,A,B") + (unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3) + (match_dup 4) (match_dup 5)] + UNSPEC_MAC_WITH_FLAG))] + "" + "%h0 = (%1 %b5 %h2 * %h3) %M6%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_machi_acconly" + [(set (match_operand:PDI 0 "register_operand" "=B,e") + (unspec:PDI [(match_operand:HI 1 "register_operand" "d,d") + (match_operand:HI 2 "register_operand" "d,d") + (match_operand 3 "register_operand" "0,0") + (match_operand 4 "const01_operand" "P0P1,P0P1") + (match_operand 5 "const_int_operand" "PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "" + "%0 %b4 %h1 * %h2 %M5%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_machi_parts_acconly" + [(set (match_operand:PDI 0 "register_operand" "=B,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 1 "register_operand" "d,d") + (parallel [(match_operand 3 "const01_operand" "P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1")])) + (match_operand:PDI 5 "register_operand" "0,0") + (match_operand 6 "const01_operand" "P0P1,P0P1") + (match_operand 7 "const_int_operand" "PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%0 %b6 %h1 * %h2 %M7%!", + "%0 %b6 %d1 * %h2 %M7%!", + "%0 %b6 %h1 * %d2 %M7%!", + "%0 %b6 %d1 * %d2 %M7%!" + }; + int alt = INTVAL (operands[3]) + (INTVAL (operands[4]) << 1); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macinithi" + [(set (match_operand:HI 0 "register_operand" "=W,D,W") + (unspec:HI [(match_operand:HI 1 "register_operand" "d,d,d") + (match_operand:HI 2 "register_operand" "d,d,d") + (match_operand 3 "const_int_operand" "PB,PA,PA")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:PDI 4 "register_operand" "=B,A,B") + (unspec:PDI [(match_dup 1) (match_dup 2) (match_dup 3)] + UNSPEC_MAC_WITH_FLAG))] + "" + "%h0 = (%4 = %h1 * %h2) %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macinit1hi" + [(set (match_operand:PDI 0 "register_operand" "=B,e") + (unspec:PDI [(match_operand:HI 1 "register_operand" "d,d") + (match_operand:HI 2 "register_operand" "d,d") + (match_operand 3 "const_int_operand" "PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "" + "%0 = %h1 * %h2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "mulv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (mult:V2HI (match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d")))] + "" + "%h0 = %h1 * %h2, %d0 = %d1 * %d2 (IS)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulv2hi" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "d") + (match_operand:V2HI 2 "register_operand" "d") + (match_operand 3 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" + "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M3%!" + [(set_attr "type" "dsp32")]) + +(define_insn "flag_mulv2hi_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand 7 "const_int_operand" "n")] + UNSPEC_MUL_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = %h1 * %h2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %h1 * %h2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %h1 * %h2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %d1 * %h2 %M7%!", + "%h0 = %h1 * %h2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %h1 * %h2, %d0 = %d1 * %d2 %M7%!", + "%h0 = %d1 * %h2, %d0 = %d1 * %d2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %h1 * %d2 %M7%!", + "%h0 = %h1 * %d2, %d0 = %d1 * %d2 %M7%!", + "%h0 = %d1 * %d2, %d0 = %d1 * %d2 %M7%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; A slightly complicated pattern. +;; Operand 0 is the halfword output; operand 11 is the accumulator output +;; Halfword inputs are operands 1 and 2; operands 3, 4, 5 and 6 specify which +;; parts of these 2x16 bit registers to use. +;; Operand 7 is the accumulator input. +;; Operands 8/9 specify whether low/high parts are mac (0) or msu (1) +;; Operand 10 is the macflag to be used. +(define_insn "flag_macv2hi_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand:V2PDI 7 "register_operand" "e") + (match_operand 8 "const01_operand" "P0P1") + (match_operand 9 "const01_operand" "P0P1") + (match_operand 10 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:V2PDI 11 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI (match_dup 1) (parallel [(match_dup 3)])) + (vec_select:HI (match_dup 1) (parallel [(match_dup 4)]))) + (vec_concat:V2HI + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)])) + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))) + (match_dup 7) (match_dup 8) (match_dup 9) (match_dup 10)] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %h2) %M10%!", + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %h1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %h2), %d0 = (A1 %b9 %d1 * %d2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %h1 * %d2) %M10%!", + "%h0 = (A0 %b8 %h1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!", + "%h0 = (A0 %b8 %d1 * %d2), %d0 = (A1 %b9 %d1 * %d2) %M10%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macv2hi_parts_acconly" + [(set (match_operand:V2PDI 0 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand:V2PDI 7 "register_operand" "e") + (match_operand 8 "const01_operand" "P0P1") + (match_operand 9 "const01_operand" "P0P1") + (match_operand 10 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %h2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %h2 %M10%!", + "A0 %b8 %h1 * %h2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %h1 * %h2, A1 %b9 %d1 * %d2 %M10%!", + "A0 %b8 %d1 * %h2, A1 %b9 %d1 * %d2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %h1 * %d2 %M10%!", + "A0 %b8 %h1 * %d2, A1 %b9 %d1 * %d2 %M10%!", + "A0 %b8 %d1 * %d2, A1 %b9 %d1 * %d2 %M10%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; Same as above, but initializing the accumulators and therefore a couple fewer +;; necessary operands. +(define_insn "flag_macinitv2hi_parts" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (unspec:V2HI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand 7 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG)) + (set (match_operand:V2PDI 8 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI (match_dup 1) (parallel [(match_dup 3)])) + (vec_select:HI (match_dup 1) (parallel [(match_dup 4)]))) + (vec_concat:V2HI + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)])) + (vec_select:HI (match_dup 2) (parallel [(match_dup 5)]))) + (match_dup 7)] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %h2) %M7%!", + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %h1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %h2), %d0 = (A1 = %d1 * %d2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %h1 * %d2) %M7%!", + "%h0 = (A0 = %h1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!", + "%h0 = (A0 = %d1 * %d2), %d0 = (A1 = %d1 * %d2) %M7%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +(define_insn "flag_macinit1v2hi_parts" + [(set (match_operand:V2PDI 0 "register_operand" "=e") + (unspec:V2PDI [(vec_concat:V2HI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "d") + (parallel [(match_operand 3 "const01_operand" "P0P1")])) + (vec_select:HI + (match_dup 1) + (parallel [(match_operand 4 "const01_operand" "P0P1")]))) + (vec_concat:V2HI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(match_operand 5 "const01_operand" "P0P1")])) + (vec_select:HI (match_dup 2) + (parallel [(match_operand 6 "const01_operand" "P0P1")]))) + (match_operand 7 "const_int_operand" "n")] + UNSPEC_MAC_WITH_FLAG))] + "" +{ + const char *templates[] = { + "A0 = %h1 * %h2, A1 = %h1 * %h2 %M7%!", + "A0 = %d1 * %h2, A1 = %h1 * %h2 %M7%!", + "A0 = %h1 * %h2, A1 = %d1 * %h2 %M7%!", + "A0 = %d1 * %h2, A1 = %d1 * %h2 %M7%!", + "A0 = %h1 * %d2, A1 = %h1 * %h2 %M7%!", + "A0 = %d1 * %d2, A1 = %h1 * %h2 %M7%!", + "A0 = %h1 * %d2, A1 = %d1 * %h2 %M7%!", + "A0 = %d1 * %d2, A1 = %d1 * %h2 %M7%!", + "A0 = %h1 * %h2, A1 = %h1 * %d2 %M7%!", + "A0 = %d1 * %h2, A1 = %h1 * %d2 %M7%!", + "A0 = %h1 * %h2, A1 = %d1 * %d2 %M7%!", + "A0 = %d1 * %h2, A1 = %d1 * %d2 %M7%!", + "A0 = %h1 * %d2, A1 = %h1 * %d2 %M7%!", + "A0 = %d1 * %d2, A1 = %h1 * %d2 %M7%!", + "A0 = %h1 * %d2, A1 = %d1 * %d2 %M7%!", + "A0 = %d1 * %d2, A1 = %d1 * %d2 %M7%!" }; + int alt = (INTVAL (operands[3]) + (INTVAL (operands[4]) << 1) + + (INTVAL (operands[5]) << 2) + (INTVAL (operands[6]) << 3)); + return templates[alt]; +} + [(set_attr "type" "dsp32")]) + +;; A mixture of multiply and multiply-accumulate for when we only want to +;; initialize one part. +(define_insn "flag_mul_macv2hi_parts_acconly" + [(set (match_operand:PDI 0 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_operand:V2HI 2 "register_operand" "d,d,d") + (parallel [(match_operand 4 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_operand:V2HI 3 "register_operand" "d,d,d") + (parallel [(match_operand 6 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand 10 "const_int_operand" "PB,PA,PA")] + UNSPEC_MUL_WITH_FLAG)) + (set (match_operand:PDI 1 "register_operand" "=B,e,e") + (unspec:PDI [(vec_select:HI + (match_dup 2) + (parallel [(match_operand 5 "const01_operand" "P0P1,P0P1,P0P1")])) + (vec_select:HI + (match_dup 3) + (parallel [(match_operand 7 "const01_operand" "P0P1,P0P1,P0P1")])) + (match_operand:PDI 8 "register_operand" "1,1,1") + (match_operand 9 "const01_operand" "P0P1,P0P1,P0P1") + (match_operand 11 "const_int_operand" "PA,PB,PA")] + UNSPEC_MAC_WITH_FLAG))] + "MACFLAGS_MATCH_P (INTVAL (operands[10]), INTVAL (operands[11]))" +{ + rtx xops[6]; + const char *templates[] = { + "%0 = %h2 * %h3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %h2 * %h3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %h2 * %h3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %d2 * %h3 %M5%!", + "%0 = %h2 * %h3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %h2 * %h3, %1 %b4 %d2 * %d3 %M5%!", + "%0 = %d2 * %h3, %1 %b4 %d2 * %d3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %h2 * %d3 %M5%!", + "%0 = %h2 * %d3, %1 %b4 %d2 * %d3 %M5%!", + "%0 = %d2 * %d3, %1 %b4 %d2 * %d3 %M5%!" }; + int alt = (INTVAL (operands[4]) + (INTVAL (operands[5]) << 1) + + (INTVAL (operands[6]) << 2) + (INTVAL (operands[7]) << 3)); + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = operands[2]; + xops[3] = operands[3]; + xops[4] = operands[9]; + xops[5] = which_alternative == 0 ? operands[10] : operands[11]; + output_asm_insn (templates[alt], xops); + return ""; +} + [(set_attr "type" "dsp32")]) + + +(define_code_iterator s_or_u [sign_extend zero_extend]) +(define_code_attr su_optab [(sign_extend "mul") + (zero_extend "umul")]) +(define_code_attr su_modifier [(sign_extend "IS") + (zero_extend "FU")]) + +(define_insn "hisi_ll" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_lh" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_hl" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_hh" + [(set (match_operand:SI 0 "register_operand" "=d") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +;; Additional variants for signed * unsigned multiply. + +(define_insn "usmulhisi_ull" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %h2 * %h1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ulh" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %d2 * %h1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_uhl" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 0)])))))] + "" + "%0 = %h2 * %d1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_uhh" + [(set (match_operand:SI 0 "register_operand" "=W") + (mult:SI (zero_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "%d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d") + (parallel [(const_int 1)])))))] + "" + "%0 = %d2 * %d1 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +;; Parallel versions of these operations. First, normal signed or unsigned +;; multiplies. + +(define_insn "hisi_ll_lh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %h1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_ll_hl" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_ll_hh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_lh_hl" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %h2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_lh_hh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +(define_insn "hisi_hl_hh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (s_or_u:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (s_or_u:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (s_or_u:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %h2, %3 = %d1 * %d2 ()%!" + [(set_attr "type" "dsp32")]) + +;; Special signed * unsigned variants. + +(define_insn "usmulhisi_ll_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ll_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ll_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_ll_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %h2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %d2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_lh_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %h1 * %d2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %h2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %h2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %h2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hl_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 0)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %h2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_lul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %d2, %3 = %h1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_luh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %d2, %3 = %h1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_hul" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))))] + "" + "%0 = %d1 * %d2, %3 = %d1 * %h2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "usmulhisi_hh_huh" + [(set (match_operand:SI 0 "register_operand" "=q0,q2,q4,q6") + (mult:SI (sign_extend:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "d,d,d,d") + (parallel [(const_int 1)]))))) + (set (match_operand:SI 3 "register_operand" "=q1,q3,q5,q7") + (mult:SI (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (zero_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] + "" + "%0 = %d1 * %d2, %3 = %d1 * %d2 (IS,M)%!" + [(set_attr "type" "dsp32")]) + +;; Vector neg/abs. + +(define_insn "ssnegv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_neg:V2HI (match_operand:V2HI 1 "register_operand" "d")))] + "" + "%0 = - %1 (V)%!" + [(set_attr "type" "dsp32")]) + +(define_insn "ssabsv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=d") + (ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "d")))] + "" + "%0 = ABS %1 (V)%!" + [(set_attr "type" "dsp32")]) + +;; Shifts. + +(define_insn "ssashiftv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d,d,d") + (if_then_else:V2HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (ashiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:V2HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (V, S)%! + %0 = %1 << %2 (V,S)%! + %0 = %1 >>> %N2 (V,S)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "ssashifthi3" + [(set (match_operand:HI 0 "register_operand" "=d,d,d") + (if_then_else:HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (ashiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (V, S)%! + %0 = %1 << %2 (V,S)%! + %0 = %1 >>> %N2 (V,S)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "ssashiftsi3" + [(set (match_operand:SI 0 "register_operand" "=d,d,d") + (if_then_else:SI + (lt (match_operand:HI 2 "reg_or_const_int_operand" "d,Ku5,Ks5") (const_int 0)) + (ashiftrt:SI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ss_ashift:SI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = ASHIFT %1 BY %h2 (S)%! + %0 = %1 << %2 (S)%! + %0 = %1 >>> %N2 (S)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "lshiftv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=d,d,d") + (if_then_else:V2HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lshiftrt:V2HI (match_operand:V2HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ashift:V2HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = LSHIFT %1 BY %h2 (V)%! + %0 = %1 << %2 (V)%! + %0 = %1 >> %N2 (V)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +(define_insn "lshifthi3" + [(set (match_operand:HI 0 "register_operand" "=d,d,d") + (if_then_else:HI + (lt (match_operand:HI 2 "vec_shift_operand" "d,Ku4,Ks4") (const_int 0)) + (lshiftrt:HI (match_operand:HI 1 "register_operand" "d,d,d") + (match_dup 2)) + (ashift:HI (match_dup 1) (match_dup 2))))] + "" + "@ + %0 = LSHIFT %1 BY %h2 (V)%! + %0 = %1 << %2 (V)%! + %0 = %1 >> %N2 (V)%!" + [(set_attr "type" "dsp32,dsp32shiftimm,dsp32shiftimm")]) + +;; Load without alignment exception (masking off low bits) + +(define_insn "loadbytes" + [(set (match_operand:SI 0 "register_operand" "=d") + (mem:SI (and:SI (match_operand:SI 1 "register_operand" "b") + (const_int -4))))] + "" + "DISALGNEXCPT || %0 = [%1];" + [(set_attr "type" "mcld") + (set_attr "length" "8")]) + +(include "sync.md") diff --git a/gcc-4.9/gcc/config/bfin/bfin.opt b/gcc-4.9/gcc/config/bfin/bfin.opt new file mode 100644 index 000000000..b736d91f8 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/bfin.opt @@ -0,0 +1,118 @@ +; Options for the Blackfin port of the compiler +; +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/bfin/bfin-opts.h + +; Value of -mcpu=. +Variable +bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN + +; -msi-revision support. There are three special values: +; -1 -msi-revision=none. +; 0xffff -msi-revision=any. +Variable +int bfin_si_revision + +; The workarounds enabled. +Variable +unsigned int bfin_workarounds = 0 + +msim +Target RejectNegative +Use simulator runtime + +mcpu= +Target RejectNegative Joined +Specify the name of the target CPU + +momit-leaf-frame-pointer +Target Report Mask(OMIT_LEAF_FRAME_POINTER) +Omit frame pointer for leaf functions + +mlow64k +Target Report Mask(LOW_64K) +Program is entirely located in low 64k of memory + +mcsync-anomaly +Target Report Var(bfin_csync_anomaly) Init(-1) +Work around a hardware anomaly by adding a number of NOPs before a +CSYNC or SSYNC instruction. + +mspecld-anomaly +Target Report Var(bfin_specld_anomaly) Init(-1) +Avoid speculative loads to work around a hardware anomaly. + +mid-shared-library +Target Report Mask(ID_SHARED_LIBRARY) +Enabled ID based shared library + +mleaf-id-shared-library +Target Report Mask(LEAF_ID_SHARED_LIBRARY) +Generate code that won't be linked against any other ID shared libraries, +but may be used as a shared library. + +mshared-library-id= +Target RejectNegative Joined UInteger Var(bfin_library_id) +ID of shared library to build + +msep-data +Target Report Mask(SEP_DATA) +Enable separate data segment + +mlong-calls +Target Report Mask(LONG_CALLS) +Avoid generating pc-relative calls; use indirection + +mfast-fp +Target Report Mask(FAST_FP) +Link with the fast floating-point library + +mfdpic +Target Report Mask(FDPIC) +Enable Function Descriptor PIC mode + +minline-plt +Target Report Mask(INLINE_PLT) +Enable inlining of PLT in function calls + +mstack-check-l1 +Target Report Mask(STACK_CHECK_L1) +Do stack checking using bounds in L1 scratch memory + +mmulticore +Target Report Mask(MULTICORE) +Enable multicore support + +mcorea +Target Report Mask(COREA) +Build for Core A + +mcoreb +Target Report Mask(COREB) +Build for Core B + +msdram +Target Report Mask(SDRAM) +Build for SDRAM + +micplb +Target Report Mask(ICPLB) +Assume ICPLBs are enabled at runtime. diff --git a/gcc-4.9/gcc/config/bfin/constraints.md b/gcc-4.9/gcc/config/bfin/constraints.md new file mode 100644 index 000000000..7cebbbff9 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/constraints.md @@ -0,0 +1,225 @@ +;; Constraint definitions for Blackfin +;; Copyright (C) 2008-2014 Free Software Foundation, Inc. +;; Contributed by Analog Devices + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_register_constraint "a" "PREGS" + "A Pn register.") + +(define_register_constraint "d" "DREGS" + "A Rn register.") + +(define_register_constraint "z" "PREGS_CLOBBERED" + "A call clobbered Pn register.") + +(define_register_constraint "D" "EVEN_DREGS" + "An even-numbered Rn register.") + +(define_register_constraint "W" "ODD_DREGS" + "An odd-numbered Rn register.") + +(define_register_constraint "e" "AREGS" + "An accumulator register.") + +(define_register_constraint "A" "EVEN_AREGS" + "An even-numbered accumulator; A0.") + +(define_register_constraint "B" "ODD_AREGS" + "An odd-numbered accumulator; A1.") + +(define_register_constraint "b" "IREGS" + "An I register.") + +(define_register_constraint "v" "BREGS" + "A B register.") + +(define_register_constraint "f" "MREGS" + "An M register.") + +(define_register_constraint "c" "CIRCREGS" + "A register used for circular buffering, i.e. I, B, or L registers.") + +(define_register_constraint "C" "CCREGS" + "The CC register.") + +(define_register_constraint "t" "LT_REGS" + "LT0 or LT1.") + +(define_register_constraint "u" "LB_REGS" + "LB0 or LB1.") + +(define_register_constraint "k" "LC_REGS" + "LC0 or LC1.") + +(define_register_constraint "x" "MOST_REGS" + "Any R, P, B, M, I or L register.") + +(define_register_constraint "y" "PROLOGUE_REGS" + "Additional registers typically used only in prologues and epilogues: + RETS, RETN, RETI, RETX, RETE, ASTAT, SEQSTAT and USP.") + +(define_register_constraint "w" "NON_A_CC_REGS" + "Any register except accumulators or CC.") + +(define_register_constraint "Z" "FDPIC_REGS" + "@internal The FD-PIC GOT pointer; P3.") + +(define_register_constraint "Y" "FDPIC_FPTR_REGS" + "@internal The FD-PIC function pointer register; P1.") + +(define_register_constraint "q0" "D0REGS" + "The register R0.") + +(define_register_constraint "q1" "D1REGS" + "The register R1.") + +(define_register_constraint "q2" "D2REGS" + "The register R2.") + +(define_register_constraint "q3" "D3REGS" + "The register R3.") + +(define_register_constraint "q4" "D4REGS" + "The register R4.") + +(define_register_constraint "q5" "D5REGS" + "The register R5.") + +(define_register_constraint "q6" "D6REGS" + "The register R6.") + +(define_register_constraint "q7" "D7REGS" + "The register R7.") + +(define_register_constraint "qA" "P0REGS" + "The register P0.") + +;; Constant constraints. + +(define_constraint "J" + "A constant value of the form 2**N, where N 5-bit wide." + (and (match_code "const_int") + (match_test "log2constp (ival)"))) + +(define_constraint "Ks3" + "A signed 3 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -4 && ival <= 3"))) + +(define_constraint "Ku3" + "An unsigned 3 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 7"))) + +(define_constraint "Ks4" + "A signed 4 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -8 && ival <= 7"))) + +(define_constraint "Ku4" + "An unsigned 4 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 15"))) + +(define_constraint "Ks5" + "A signed 5 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -16 && ival <= 15"))) + +(define_constraint "Ku5" + "An unsigned 5 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 31"))) + +(define_constraint "Ks7" + "A signed 7 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -64 && ival <= 63"))) + +(define_constraint "KN7" + "A constant that when negated is a signed 7 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -63 && ival <= 64"))) + +(define_constraint "Ksh" + "A signed 16 bit immediate." + (and (match_code "const_int") + (match_test "ival >= -32768 && ival <= 32767"))) + +(define_constraint "Kuh" + "An unsigned 16 bit immediate." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 65535"))) + +(define_constraint "L" + "A constant value of the form ~(2**N)." + (and (match_code "const_int") + (match_test "log2constp (~ival)"))) + +(define_constraint "M1" + "An integer with the value 255." + (and (match_code "const_int") + (match_test "ival == 255"))) + +(define_constraint "M2" + "An integer with the value 65535." + (and (match_code "const_int") + (match_test "ival == 65535"))) + +(define_constraint "P0" + "An integer with the value 0." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "P1" + "An integer with the value 1." + (and (match_code "const_int") + (match_test "ival == 1"))) + +(define_constraint "P2" + "An integer with the value 2." + (and (match_code "const_int") + (match_test "ival == 2"))) + +(define_constraint "P3" + "An integer with the value 3." + (and (match_code "const_int") + (match_test "ival == 3"))) + +(define_constraint "P4" + "An integer with the value 4." + (and (match_code "const_int") + (match_test "ival == 4"))) + +(define_constraint "PA" + "An integer constant describing any macflag except variants involving M." + (and (match_code "const_int") + (match_test "ival != MACFLAG_M && ival != MACFLAG_IS_M"))) + +(define_constraint "PB" + "An integer constant describing any macflag involving M." + (and (match_code "const_int") + (match_test "ival == MACFLAG_M || ival == MACFLAG_IS_M"))) + + +;; Extra constraints + +(define_constraint "Q" + "A SYMBOL_REF." + (match_code "symbol_ref")) + diff --git a/gcc-4.9/gcc/config/bfin/elf.h b/gcc-4.9/gcc/config/bfin/elf.h new file mode 100644 index 000000000..7d6c97ada --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/elf.h @@ -0,0 +1,74 @@ +/* Copyright (C) 2005-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "\ +%{msim:%{!shared:crt0%O%s}} \ +%{!msim:%{!mcpu=bf561*:%{!msdram:basiccrt%O%s} %{msdram:basiccrts%O%s};: \ + %{!msdram:basiccrt561%O%s} %{msdram:basiccrt561s%O%s}} \ + %{mcpu=bf561*:%{mmulticore:%{!mcorea:%{!mcoreb:basiccrt561b%O%s}}}}} \ +crti%O%s crtbegin%O%s crtlibid%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef LIB_SPEC +#define LIB_SPEC "--start-group -lc %{msim:-lsim}%{!msim:-lnosys} --end-group \ +%{!T*:%{!msim:%{!msdram: \ + %{mcpu=bf512*:-T bf512.ld%s}%{mcpu=bf514*:-T bf514.ld%s} \ + %{mcpu=bf516*:-T bf516.ld%s}%{mcpu=bf518*:-T bf518.ld%s} \ + %{mcpu=bf522*:-T bf522.ld%s}%{mcpu=bf523*:-T bf523.ld%s} \ + %{mcpu=bf524*:-T bf524.ld%s}%{mcpu=bf525*:-T bf525.ld%s} \ + %{mcpu=bf526*:-T bf526.ld%s}%{mcpu=bf527*:-T bf527.ld%s} \ + %{mcpu=bf531*:-T bf531.ld%s}%{mcpu=bf532*:-T bf532.ld%s} \ + %{mcpu=bf533*:-T bf533.ld%s}%{mcpu=bf534*:-T bf534.ld%s} \ + %{mcpu=bf536*:-T bf536.ld%s}%{mcpu=bf537*:-T bf537.ld%s} \ + %{mcpu=bf538*:-T bf538.ld%s}%{mcpu=bf539*:-T bf539.ld%s} \ + %{mcpu=bf542*:-T bf542.ld%s}%{mcpu=bf544*:-T bf544.ld%s} \ + %{mcpu=bf547*:-T bf547.ld%s}%{mcpu=bf548*:-T bf548.ld%s} \ + %{mcpu=bf549*:-T bf549.ld%s} \ + %{mcpu=bf561*:%{!mmulticore:-T bf561.ld%s} \ + %{mmulticore:%{mcorea:-T bf561a.ld%s}} \ + %{mmulticore:%{mcoreb:-T bf561b.ld%s}} \ + %{mmulticore:%{!mcorea:%{!mcoreb:-T bf561m.ld%s}}}} \ + %{mcpu=bf592*:-T bf592.ld%s} \ + %{!mcpu=*:%eno processor type specified for linking} \ + %{!mcpu=bf561*:-T bfin-common-sc.ld%s} \ + %{mcpu=bf561*:%{!mmulticore:-T bfin-common-sc.ld%s} \ + %{mmulticore:-T bfin-common-mc.ld%s}}}}}" + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#ifdef __BFIN_FDPIC__ +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ +asm (SECTION_OP); \ +asm ("P3 = [SP + 20];\n\tcall " USER_LABEL_PREFIX #FUNC ";"); \ +asm (TEXT_SECTION_ASM_OP); +#endif + +#undef SUBTARGET_DRIVER_SELF_SPECS +#define SUBTARGET_DRIVER_SELF_SPECS \ + "%{mfdpic:-msim} %{mid-shared-library:-msim}" + +#define NO_IMPLICIT_EXTERN_C diff --git a/gcc-4.9/gcc/config/bfin/linux.h b/gcc-4.9/gcc/config/bfin/linux.h new file mode 100644 index 000000000..7e2dd455f --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/linux.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2007-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef SUBTARGET_DRIVER_SELF_SPECS +#define SUBTARGET_DRIVER_SELF_SPECS \ + "%{!mno-fdpic:-mfdpic} -micplb", + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS() + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} crtreloc.o%s \ + crti.o%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}" + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ + "%{static:--start-group} %{mfast-fp:-lbffastfp} %G %L %{static:--end-group} \ + %{!static:%{mfast-fp:-lbffastfp} %G}" + +#undef LINK_SPEC +#define LINK_SPEC "\ + %{mfdpic: -m elf32bfinfd -z text} %{shared} %{pie} \ + %{static:-dn -Bstatic} \ + %{shared:-G -Bdynamic} \ + %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + -dynamic-linker /lib/ld-uClibc.so.0} \ + %{static}} -init __init -fini __fini" + +#undef TARGET_SUPPORTS_SYNC_CALLS +#define TARGET_SUPPORTS_SYNC_CALLS 1 diff --git a/gcc-4.9/gcc/config/bfin/predicates.md b/gcc-4.9/gcc/config/bfin/predicates.md new file mode 100644 index 000000000..d73480f97 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/predicates.md @@ -0,0 +1,249 @@ +;; Predicate definitions for the Blackfin. +;; Copyright (C) 2005-2014 Free Software Foundation, Inc. +;; Contributed by Analog Devices. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Return nonzero iff OP is one of the integer constants 1 or 2. +(define_predicate "pos_scale_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 1 || INTVAL (op) == 2"))) + +;; Return nonzero iff OP is one of the integer constants 2 or 4. +(define_predicate "scale_by_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4"))) + +;; Return nonzero if OP is a constant that consists of two parts; lower +;; bits all zero and upper bits all ones. In this case, we can perform +;; an AND operation with a sequence of two shifts. Don't return nonzero +;; if the constant would be cheap to load. +(define_predicate "highbits_operand" + (and (match_code "const_int") + (match_test "log2constp (-INTVAL (op)) && !satisfies_constraint_Ks7 (op)"))) + +;; Return nonzero if OP is suitable as a right-hand side operand for an +;; andsi3 operation. +(define_predicate "rhs_andsi3_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "log2constp (~INTVAL (op)) || INTVAL (op) == 255 || INTVAL (op) == 65535")))) + +;; Return nonzero if OP is a register or a constant with exactly one bit +;; set. +(define_predicate "regorlog2_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "log2constp (INTVAL (op))")))) + +;; Return nonzero if OP is a register or an integer constant. +(define_predicate "reg_or_const_int_operand" + (ior (match_operand 0 "register_operand") + (match_code "const_int"))) + +(define_predicate "const01_operand" + (and (match_code "const_int") + (match_test "op == const0_rtx || op == const1_rtx"))) + +(define_predicate "const1_operand" + (and (match_code "const_int") + (match_test "op == const1_rtx"))) + +(define_predicate "const3_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 3"))) + +(define_predicate "vec_shift_operand" + (ior (and (match_code "const_int") + (match_test "INTVAL (op) >= -16 && INTVAL (op) < 15")) + (match_operand 0 "register_operand"))) + +;; Like register_operand, but make sure that hard regs have a valid mode. +(define_predicate "valid_reg_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + if (REGNO (op) < FIRST_PSEUDO_REGISTER) + return HARD_REGNO_MODE_OK (REGNO (op), mode); + return 1; +}) + +;; Return nonzero if OP is a D register. +(define_predicate "d_register_operand" + (and (match_code "reg") + (match_test "D_REGNO_P (REGNO (op))"))) + +(define_predicate "p_register_operand" + (and (match_code "reg") + (match_test "P_REGNO_P (REGNO (op))"))) + +(define_predicate "dp_register_operand" + (and (match_code "reg") + (match_test "D_REGNO_P (REGNO (op)) || P_REGNO_P (REGNO (op))"))) + +;; Return nonzero if OP is a LC register. +(define_predicate "lc_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_LC0 || REGNO (op) == REG_LC1"))) + +;; Return nonzero if OP is a LT register. +(define_predicate "lt_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_LT0 || REGNO (op) == REG_LT1"))) + +;; Return nonzero if OP is a LB register. +(define_predicate "lb_register_operand" + (and (match_code "reg") + (match_test "REGNO (op) == REG_LB0 || REGNO (op) == REG_LB1"))) + +;; Return nonzero if OP is a register or a 7-bit signed constant. +(define_predicate "reg_or_7bit_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "satisfies_constraint_Ks7 (op)")))) + +;; Return nonzero if OP is a register other than DREG and PREG. +(define_predicate "nondp_register_operand" + (match_operand 0 "register_operand") +{ + unsigned int regno; + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + + regno = REGNO (op); + return (regno >= FIRST_PSEUDO_REGISTER || !DP_REGNO_P (regno)); +}) + +;; Return nonzero if OP is a register other than DREG and PREG, or MEM. +(define_predicate "nondp_reg_or_memory_operand" + (ior (match_operand 0 "nondp_register_operand") + (match_operand 0 "memory_operand"))) + +;; Return nonzero if OP is a register or, when negated, a 7-bit signed +;; constant. +(define_predicate "reg_or_neg7bit_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "satisfies_constraint_KN7 (op)")))) + +;; Used for secondary reloads, this function returns 1 if OP is of the +;; form (plus (fp) (const_int)). +(define_predicate "fp_plus_const_operand" + (match_code "plus") +{ + rtx op1, op2; + + op1 = XEXP (op, 0); + op2 = XEXP (op, 1); + return (REG_P (op1) + && (REGNO (op1) == FRAME_POINTER_REGNUM + || REGNO (op1) == STACK_POINTER_REGNUM) + && GET_CODE (op2) == CONST_INT); +}) + +;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref, +;; possibly with an offset. +(define_predicate "symbolic_operand" + (ior (match_code "symbol_ref,label_ref") + (and (match_code "const") + (match_test "GET_CODE (XEXP (op,0)) == PLUS + && (GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF) + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT")))) + +;; Returns 1 if OP is a plain constant or matched by symbolic_operand. +(define_predicate "symbolic_or_const_operand" + (ior (match_code "const_int,const_double") + (match_operand 0 "symbolic_operand"))) + +;; Returns 1 if OP is a SYMBOL_REF. +(define_predicate "symbol_ref_operand" + (match_code "symbol_ref")) + +;; True for any non-virtual or eliminable register. Used in places where +;; instantiation of such a register may cause the pattern to not be recognized. +(define_predicate "register_no_elim_operand" + (match_operand 0 "register_operand") +{ + if (GET_CODE (op) == SUBREG) + op = SUBREG_REG (op); + return !(op == arg_pointer_rtx + || op == frame_pointer_rtx + || (REGNO (op) >= FIRST_PSEUDO_REGISTER + && REGNO (op) <= LAST_VIRTUAL_REGISTER)); +}) + +;; Test for an operator valid in a BImode conditional branch +(define_predicate "bfin_bimode_comparison_operator" + (match_code "eq,ne")) + +;; Test for an operator whose result is accessible with movbisi. +(define_predicate "bfin_direct_comparison_operator" + (match_code "eq,lt,le,leu,ltu")) + +;; The following three are used to compute the addrtype attribute. They return +;; true if passed a memory address usable for a 16-bit load or store using a +;; P or I register, respectively. If neither matches, we know we have a +;; 32-bit instruction. +;; We subdivide the P case into normal P registers, and SP/FP. We can assume +;; that speculative loads through SP and FP are no problem, so this has +;; an effect on the anomaly workaround code. + +(define_predicate "mem_p_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx; +}) + +(define_predicate "mem_spfp_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return op == stack_pointer_rtx || op == frame_pointer_rtx; +}) + +(define_predicate "mem_i_address_operand" + (match_code "mem") +{ + if (effective_address_32bit_p (op, mode)) + return 0; + op = XEXP (op, 0); + if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC) + op = XEXP (op, 0); + gcc_assert (REG_P (op)); + return IREG_P (op); +}) + +(define_predicate "push_multiple_operation" + (and (match_code "parallel") + (match_test "analyze_push_multiple_operation (op)"))) + +(define_predicate "pop_multiple_operation" + (and (match_code "parallel") + (match_test "analyze_pop_multiple_operation (op)"))) diff --git a/gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh b/gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh new file mode 100644 index 000000000..36a71b114 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/print-sysroot-suffix.sh @@ -0,0 +1,81 @@ +#!/bin/sh +# Copyright (C) 2007-2014 Free Software Foundation, Inc. +# This file is part of GCC. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# This script takes the following arguments: +# +# - the target sysroot +# - the value of $(MULTILIB_MATCHES) +# - the value of $(MULTILIB_OPTIONS) +# +# It uses these arguments to construct a definition of SYSROOT_SUFFIX_SPEC, +# which it prints to the standard output. For each multilib directory FOO, +# the script checks whether $sysroot has a subdirectory FOO, and if so will +# use /FOO for all compatible command-line options. It will not add a +# suffix for /FOO's options otherwise. These suffixes are concatenated, +# with one subspec for each space-separated entry in $(MULTILIB_OPTIONS). +set -e +sysroot=$1 +matches=$2 +options=$3 + +# For each multilib option OPT, add to $substs a sed command of the +# form "-e 's/OPT/OPT/'". +substs="" +for option in `echo "$options" | tr '/' ' '` +do + substs="$substs -e 's/$option/$option/g'" +done + +# For each ALIAS=CANONICAL entry in $MULTILIB_MATCHES, look for sed +# arguments in $substs of the form "-e 's/CANONICAL/.../'". Replace +# such entries with "-e 's/CANONICAL/ALIAS|.../'". Both the ALIAS and +# CANONICAL parts of $MULTILIB_MATCHES use '?' to stand for '='. +# +# After this loop, a command of the form "echo FOO | eval sed $substs" +# will replace a canonical option FOO with a %{...}-style spec pattern. +for match in $matches +do + canonical=`echo "$match" | sed -e 's/=.*//' -e 's/?/=/g'` + alias=`echo "$match" | sed -e 's/.*=//' -e 's/?/=/g'` + substs=`echo "$substs" | sed -e "s,s/$canonical/,&$alias|,"` +done + +# Build up the final SYSROOT_SUFFIX_SPEC in $spec. +spec= +for combo in $options +do + # See which option alternatives in $combo have their own sysroot + # directory. Create a subspec of the form "%{PAT1:/DIR1;...;PATn:DIRn}" + # from each such option OPTi, where DIRi is the directory associated + # with OPTi and PATi is the result of passing OPTi through $substs. + subspec= + for option in `echo "$combo" | tr '/' ' '` + do + dir=`echo "$option" | sed 's/mcpu=//'` + if test -d "$sysroot/$dir"; then + test -z "$subspec" || subspec="$subspec;" + subspec="$subspec"`echo "$option" | eval sed $substs`":/$dir" + fi + done + # Concatenate all the subspecs. + test -z "$subspec" || spec="$spec%{$subspec}" +done +if test -n "$spec"; then + echo "#undef SYSROOT_SUFFIX_SPEC" + echo "#define SYSROOT_SUFFIX_SPEC \"$spec\"" +fi diff --git a/gcc-4.9/gcc/config/bfin/rtems.h b/gcc-4.9/gcc/config/bfin/rtems.h new file mode 100644 index 000000000..0897e26b8 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/rtems.h @@ -0,0 +1,28 @@ +/* Definitions for rtems targeting a bfin + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by Ralf Corsépius (ralf.corsepius@rtems.org). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Target OS preprocessor built-ins. */ +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__rtems__"); \ + builtin_assert ("system=rtems"); \ + } \ + while (0) diff --git a/gcc-4.9/gcc/config/bfin/sync.md b/gcc-4.9/gcc/config/bfin/sync.md new file mode 100644 index 000000000..62a87b7e1 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/sync.md @@ -0,0 +1,178 @@ +;; GCC machine description for Blackfin synchronization instructions. +;; Copyright (C) 2005-2014 Free Software Foundation, Inc. +;; Contributed by Analog Devices. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_code_iterator FETCHOP [plus minus ior and xor]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "ior") (and "and") (xor "xor")]) +(define_code_attr fetchop_addr + [(plus "1072") (minus "1088") (ior "1104") (and "1120") (xor "1136")]) + +(define_insn "sync_si_internal" + [(set (mem:SI (match_operand:SI 0 "register_operand" "qA")) + (unspec:SI + [(FETCHOP:SI (mem:SI (match_dup 0)) + (match_operand:SI 1 "register_operand" "q0")) + (match_operand:SI 2 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "=q0")) + (clobber (match_scratch:SI 4 "=q1")) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%2);" + [(set_attr "type" "call")]) + +(define_expand "sync_si" + [(parallel + [(set (match_operand:SI 0 "memory_operand" "+m") + (unspec:SI + [(FETCHOP:SI (match_dup 0) + (match_operand:SI 1 "register_operand" "q0")) + (match_dup 2)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[0], 0))) + { + operands[0] = shallow_copy_rtx (operands[0]); + XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0)); + } + operands[2] = force_reg (Pmode, GEN_INT ()); +}) + +(define_insn "sync_old_si_internal" + [(set (match_operand:SI 0 "register_operand" "=q1") + (mem:SI (match_operand:SI 1 "register_operand" "qA"))) + (set (mem:SI (match_dup 1)) + (unspec:SI + [(FETCHOP:SI (mem:SI (match_dup 1)) + (match_operand:SI 2 "register_operand" "q0")) + (match_operand:SI 3 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "=q0")) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%3);" + [(set_attr "type" "call")]) + +(define_expand "sync_old_si" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) + (match_operand:SI 2 "register_operand" "")) + (match_dup 3)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + operands[1] = shallow_copy_rtx (operands[1]); + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + } + operands[3] = force_reg (Pmode, GEN_INT ()); +}) + +(define_insn "sync_new_si_internal" + [(set (match_operand:SI 0 "register_operand" "=q0") + (unspec:SI + [(FETCHOP:SI + (mem:SI (match_operand:SI 1 "register_operand" "qA")) + (match_operand:SI 2 "register_operand" "q0")) + (match_operand:SI 3 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (set (mem:SI (match_dup 1)) + (unspec:SI + [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2)) + (match_dup 3)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "=q1")) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%3);" + [(set_attr "type" "call")]) + +(define_expand "sync_new_si" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(FETCHOP:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "register_operand" "")) + (match_dup 3)] + UNSPEC_ATOMIC)) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) (match_dup 2)) + (match_dup 3)] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + operands[1] = shallow_copy_rtx (operands[1]); + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + } + operands[3] = force_reg (Pmode, GEN_INT ()); +}) + +(define_insn "sync_compare_and_swapsi_internal" + [(set (match_operand:SI 0 "register_operand" "=q0") + (mem:SI (match_operand:SI 1 "register_operand" "qA"))) + (set (mem:SI (match_dup 1)) + (unspec:SI + [(mem:SI (match_dup 1)) + (match_operand:SI 2 "register_operand" "q1") + (match_operand:SI 3 "register_operand" "q2") + (match_operand:SI 4 "register_no_elim_operand" "a")] + UNSPEC_ATOMIC)) + (clobber (reg:SI REG_RETS))] + "TARGET_SUPPORTS_SYNC_CALLS" + "call (%4);" + [(set_attr "type" "call")]) + +(define_expand "sync_compare_and_swapsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:SI + [(match_dup 1) + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "") + (match_dup 4)] + UNSPEC_ATOMIC)) + (clobber (reg:SI REG_RETS))])] + "TARGET_SUPPORTS_SYNC_CALLS" +{ + if (!REG_P (XEXP (operands[1], 0))) + { + operands[1] = shallow_copy_rtx (operands[1]); + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + } + operands[4] = force_reg (Pmode, GEN_INT (0x420)); +}) diff --git a/gcc-4.9/gcc/config/bfin/t-bfin-elf b/gcc-4.9/gcc/config/bfin/t-bfin-elf new file mode 100644 index 000000000..1e08c98b7 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/t-bfin-elf @@ -0,0 +1,49 @@ +# Copyright (C) 2005-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +MULTILIB_OPTIONS=mcpu=bf532-none +MULTILIB_OPTIONS+=mid-shared-library/msep-data/mfdpic mleaf-id-shared-library +MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mfdpic mleaf-id-shared-library + +MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf592-none + +MULTILIB_EXCEPTIONS=mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=*mfdpic/mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library* diff --git a/gcc-4.9/gcc/config/bfin/t-bfin-linux b/gcc-4.9/gcc/config/bfin/t-bfin-linux new file mode 100644 index 000000000..8726d8fd6 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/t-bfin-linux @@ -0,0 +1,52 @@ +# Copyright (C) 2007-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +MULTILIB_OPTIONS=mcpu=bf532-none +MULTILIB_DIRNAMES=bf532-none + +MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf592-none + +# This rule uses MULTILIB_MATCHES to generate a definition of +# SYSROOT_SUFFIX_SPEC. +linux-sysroot-suffix.h: $(srcdir)/config/bfin/print-sysroot-suffix.sh + $(SHELL) $(srcdir)/config/bfin/print-sysroot-suffix.sh \ + "$(SYSTEM_HEADER_DIR)/../.." "$(MULTILIB_MATCHES)" \ + "$(MULTILIB_OPTIONS)" > $@ + +generated_files += linux-sysroot-suffix.h diff --git a/gcc-4.9/gcc/config/bfin/t-bfin-uclinux b/gcc-4.9/gcc/config/bfin/t-bfin-uclinux new file mode 100644 index 000000000..158ca3766 --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/t-bfin-uclinux @@ -0,0 +1,48 @@ +# Copyright (C) 2007-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +## Target part of the Makefile + +MULTILIB_OPTIONS=mcpu=bf532-none +MULTILIB_OPTIONS+=mid-shared-library/msep-data mleaf-id-shared-library +MULTILIB_DIRNAMES=bf532-none mid-shared-library msep-data mleaf-id-shared-library + +MULTILIB_MATCHES=mcpu?bf532-none=mcpu?bf512-none mcpu?bf532-none=mcpu?bf514-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf516-none mcpu?bf532-none=mcpu?bf518-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf522-none mcpu?bf532-none=mcpu?bf523-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf524-none mcpu?bf532-none=mcpu?bf525-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf526-none mcpu?bf532-none=mcpu?bf527-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf531-none mcpu?bf532-none=mcpu?bf533-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf534-none mcpu?bf532-none=mcpu?bf536-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf537-none mcpu?bf532-none=mcpu?bf538-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf539-none mcpu?bf532-none=mcpu?bf542-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf542m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf544m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf547m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf548m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf549m-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf561-none +MULTILIB_MATCHES+=mcpu?bf532-none=mcpu?bf592-none + +MULTILIB_EXCEPTIONS=mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=mcpu=bf532-none/mleaf-id-shared-library* +MULTILIB_EXCEPTIONS+=*msep-data/mleaf-id-shared-library* diff --git a/gcc-4.9/gcc/config/bfin/t-rtems b/gcc-4.9/gcc/config/bfin/t-rtems new file mode 100644 index 000000000..728ab1c4f --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/t-rtems @@ -0,0 +1,6 @@ +# Multilibs for fbin RTEMS targets. + +MULTILIB_OPTIONS = +MULTILIB_DIRNAMES = +MULTILIB_EXTRA_OPTS = +MULTILIB_EXCEPTIONS = diff --git a/gcc-4.9/gcc/config/bfin/uclinux.h b/gcc-4.9/gcc/config/bfin/uclinux.h new file mode 100644 index 000000000..0ae03b28e --- /dev/null +++ b/gcc-4.9/gcc/config/bfin/uclinux.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2005-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared: crt1%O%s} crti%O%s crtbegin%O%s crtlibid%O%s" + +#define TARGET_OS_CPP_BUILTINS() GNU_USER_TARGET_OS_CPP_BUILTINS() + +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC "\ + %{mfast-fp:-lbffastfp} %G %L %{mfast-fp:-lbffastfp} %G \ +" + +#undef TARGET_SUPPORTS_SYNC_CALLS +#define TARGET_SUPPORTS_SYNC_CALLS 1 + +#define SUBTARGET_FDPIC_NOT_SUPPORTED diff --git a/gcc-4.9/gcc/config/c6x/c6x-isas.def b/gcc-4.9/gcc/config/c6x/c6x-isas.def new file mode 100644 index 000000000..1447a5d14 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-isas.def @@ -0,0 +1,37 @@ +/* C6X ISA names. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Define ISAs for the -march option, used both in C6X.c and to + generate c6x-tables.opt. Before including this file, define a + macro: + + C6X_ISA (NAME, ENUM_VALUE, FLAGS) + + where NAME is the name for use with -march=, ENUM_VALUE is an enum + corresponding to this arch, and FLAGS is a combination of flags + that together specify the available instructions. */ + +C6X_ISA("c62x", C6X_CPU_C62X, C6X_INSNS_C62X) +C6X_ISA("c64x", C6X_CPU_C64X, C6X_INSNS_C62X | C6X_INSNS_C64X) +C6X_ISA("c64x+", C6X_CPU_C64XP, C6X_INSNS_C62X | C6X_INSNS_C64X | C6X_INSNS_C64XP) +C6X_ISA("c67x", C6X_CPU_C67X, C6X_INSNS_C62X | C6X_INSNS_C67X) +C6X_ISA("c67x+", C6X_CPU_C67XP, C6X_INSNS_C62X | C6X_INSNS_C67X | C6X_INSNS_C67XP) +C6X_ISA("c674x", C6X_CPU_C674X, + (C6X_INSNS_C62X | C6X_INSNS_C64X | C6X_INSNS_C64XP | C6X_INSNS_C67X + | C6X_INSNS_C67XP | C6X_INSNS_C674X)) diff --git a/gcc-4.9/gcc/config/c6x/c6x-modes.def b/gcc-4.9/gcc/config/c6x/c6x-modes.def new file mode 100644 index 000000000..a438e2808 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-modes.def @@ -0,0 +1,24 @@ +/* Definitions of target machine for GNU compiler, for TI C6x. + Copyright (C) 2010-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ + +VECTOR_MODE (FRACT, SQ, 2); /* V2SQ. */ +VECTOR_MODE (FRACT, HQ, 2); /* V2HQ. */ diff --git a/gcc-4.9/gcc/config/c6x/c6x-mult.md b/gcc-4.9/gcc/config/c6x/c6x-mult.md new file mode 100644 index 000000000..d8e262652 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-mult.md @@ -0,0 +1,844 @@ +;; -*- buffer-read-only: t -*- +;; Generated automatically from c6x-mult.md.in by genmult.sh +;; Multiplication patterns for TI C6X. +;; This file is processed by genmult.sh to produce two variants of each +;; pattern, a normal one and a real_mult variant for modulo scheduling. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; ------------------------------------------------------------------------- +;; Miscellaneous insns that execute on the M units +;; ------------------------------------------------------------------------- + +(define_insn "rotlsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (rotate:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")))] + "TARGET_INSNS_64" + "%|%.\\trotl\\t%$\\t%1, %2, %0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "bitrevsi2" + [(set (match_operand:SI 0 "register_operand" "=a,a,b,b") + (unspec:SI [(match_operand:SI 1 "register_operand" "a,?b,b,?a")] + UNSPEC_BITREV))] + "TARGET_INSNS_64" + "%|%.\\tbitr\\t%$\\t%1, %0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,y,n,y")]) + +;; Vector average. + +(define_insn "avgv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "a,b,?b,?a") + (match_operand:V2HI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG))] + "TARGET_INSNS_64" + "%|%.\\tavg2\\t%$\\t%1, %2, %0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "uavgv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=a,b,a,b") + (unspec:V4QI [(match_operand:V4QI 1 "register_operand" "a,b,?b,?a") + (match_operand:V4QI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG))] + "TARGET_INSNS_64" + "%|%.\\tavgu4\\t%$\\t%1, %2, %0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +;; ------------------------------------------------------------------------- +;; Multiplication +;; ------------------------------------------------------------------------- + +(define_insn "mulhi3" + [(set (match_operand:HI 0 "register_operand" "=a,b,a,b") + (mult:HI (match_operand:HI 1 "register_operand" "a,b,?b,?a") + (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5")))] + "" + "%|%.\\tmpy\\t%$\\t%2, %1, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "op_pattern" "sxs") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_const" + [(set (match_operand:SI 0 "register_operand" "=a,b,ab") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?ab")) + (match_operand:HI 2 "scst5_operand" "Is5,Is5,Is5")))] + "" + "%|%.\\tmpy\\t%$\\t%2, %1, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y")]) + +(define_insn "*mulhisi3_insn" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "%a,b,?a,?b")) + (sign_extend:SI + (match_operand:HI 2 "reg_or_scst5_operand" "a,b,b,a"))))] + "" + "%|%.\\tmpy\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "op_pattern" "ssx") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_lh" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16))))] + "" + "%|%.\\tmpylh\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_hl" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tmpyhl\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_hh" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16))))] + "" + "%|%.\\tmpyh\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "%a,b,?a,?b")) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tmpyu\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_lh" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (lshiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16))))] + "" + "%|%.\\tmpylhu\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_hl" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tmpyhlu\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_hh" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16))))] + "" + "%|%.\\tmpyhu\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_const" + [(set (match_operand:SI 0 "register_operand" "=a,b,ab") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?ab")) + (match_operand:SI 2 "scst5_operand" "Is5,Is5,Is5")))] + "" + "%|%.\\tmpysu\\t%$\\t%2, %1, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y")]) + +(define_insn "*usmulhisi3_insn" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (sign_extend:SI + (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,bIs5,aIs5"))))] + "" + "%|%.\\tmpyus\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_lh" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16))))] + "" + "%|%.\\tmpyluhs\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_hl" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tmpyhuls\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_hh" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16))))] + "" + "%|%.\\tmpyhus\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulsi3_insn" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (mult:SI (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (match_operand:SI 2 "register_operand" "a,b,b,a")))] + "TARGET_MPY32" + "%|%.\\tmpy32\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=a,b,a,b") + (mult:DI (any_ext:DI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b")) + (any_ext:DI + (match_operand:SI 2 "register_operand" "a,b,b,a"))))] + "TARGET_MPY32" + "%|%.\\tmpy32\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulsidi3" + [(set (match_operand:DI 0 "register_operand" "=a,b,a,b") + (mult:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "a,b,?a,?b")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" "a,b,b,a"))))] + "TARGET_MPY32" + "%|%.\\tmpy32us\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; Widening vector multiply and dot product + +(define_insn "mulv2hiv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=a,b,a,b") + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" "a,b,a,b")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" "a,b,?b,?a"))))] + "TARGET_INSNS_64" + "%|%.\\tmpy2\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulv4qiv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=a,b,a,b") + (mult:V4HI + (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,a,b")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,?b,?a"))))] + "TARGET_INSNS_64" + "%|%.\\tmpyu4\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulv4qiv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=a,b,a,b") + (mult:V4HI + (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,?b,?a")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,a,b"))))] + "TARGET_INSNS_64" + "%|%.\\tmpyus4\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "dotv2hi" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (plus:SI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "a,b,a,b") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "a,b,?b,?a") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))] + "TARGET_INSNS_64" + "%|%.\\tdotp2\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; Fractional multiply + +(define_insn "mulv2hqv2sq3" + [(set (match_operand:V2SQ 0 "register_operand" "=a,b,a,b") + (ss_mult:V2SQ + (fract_convert:V2SQ + (match_operand:V2HQ 1 "register_operand" "%a,b,?a,?b")) + (fract_convert:V2SQ + (match_operand:V2HQ 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tsmpy2\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3" + [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b") + (ss_mult:SQ + (fract_convert:SQ + (match_operand:HQ 1 "register_operand" "%a,b,?a,?b")) + (fract_convert:SQ + (match_operand:HQ 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tsmpy\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_lh" + [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b") + (ss_mult:SQ + (fract_convert:SQ + (match_operand:HQ 1 "register_operand" "a,b,?a,?b")) + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a")))))] + "" + "%|%.\\tsmpylh\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_hl" + [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b") + (ss_mult:SQ + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a"))) + (fract_convert:SQ + (match_operand:HQ 2 "register_operand" "a,b,b,a"))))] + "" + "%|%.\\tsmpyhl\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_hh" + [(set (match_operand:SQ 0 "register_operand" "=a,b,a,b") + (ss_mult:SQ + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a"))) + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a")))))] + "" + "%|%.\\tsmpyh\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) +;; Multiplication patterns for TI C6X. +;; This file is processed by genmult.sh to produce two variants of each +;; pattern, a normal one and a real_mult variant for modulo scheduling. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; ------------------------------------------------------------------------- +;; Miscellaneous insns that execute on the M units +;; ------------------------------------------------------------------------- + +(define_insn "rotlsi3_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (rotate:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5"))] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\trotl\\t%$\\t%1, %2, %k0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "bitrevsi2_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JA,JB,JB") + (unspec:SI [(match_operand:SI 1 "register_operand" "a,?b,b,?a")] + UNSPEC_BITREV)] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tbitr\\t%$\\t%1, %k0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,y,n,y")]) + +;; Vector average. + +(define_insn "avgv2hi3_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "a,b,?b,?a") + (match_operand:V2HI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tavg2\\t%$\\t%1, %2, %k0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "uavgv4qi3_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (unspec:V4QI [(match_operand:V4QI 1 "register_operand" "a,b,?b,?a") + (match_operand:V4QI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tavgu4\\t%$\\t%1, %2, %k0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +;; ------------------------------------------------------------------------- +;; Multiplication +;; ------------------------------------------------------------------------- + +(define_insn "mulhi3_real" + [(unspec [(match_operand:HI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:HI (match_operand:HI 1 "register_operand" "a,b,?b,?a") + (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5"))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpy\\t%$\\t%2, %1, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "op_pattern" "sxs") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_const_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JAJB") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?ab")) + (match_operand:HI 2 "scst5_operand" "Is5,Is5,Is5"))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpy\\t%$\\t%2, %1, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y")]) + +(define_insn "*mulhisi3_insn_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "%a,b,?a,?b")) + (sign_extend:SI + (match_operand:HI 2 "reg_or_scst5_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpy\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "op_pattern" "ssx") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_lh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpylh\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_hl_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyhl\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_hh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyh\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "%a,b,?a,?b")) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyu\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_lh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (lshiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpylhu\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_hl_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyhlu\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_hh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyhu\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_const_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JAJB") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?ab")) + (match_operand:SI 2 "scst5_operand" "Is5,Is5,Is5"))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpysu\\t%$\\t%2, %1, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y")]) + +(define_insn "*usmulhisi3_insn_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (sign_extend:SI + (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,bIs5,aIs5")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyus\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_lh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyluhs\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_hl_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyhuls\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_hh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tmpyhus\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulsi3_insn_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:SI (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (match_operand:SI 2 "register_operand" "a,b,b,a"))] UNSPEC_REAL_MULT)] + "TARGET_MPY32" + "%|%.\\tmpy32\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulsidi3_real" + [(unspec [(match_operand:DI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:DI (any_ext:DI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b")) + (any_ext:DI + (match_operand:SI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "TARGET_MPY32" + "%|%.\\tmpy32\\t%$\\t%1, %2, %K0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulsidi3_real" + [(unspec [(match_operand:DI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "a,b,?a,?b")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "TARGET_MPY32" + "%|%.\\tmpy32us\\t%$\\t%1, %2, %K0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; Widening vector multiply and dot product + +(define_insn "mulv2hiv2si3_real" + [(unspec [(match_operand:V2SI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" "a,b,a,b")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tmpy2\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulv4qiv4hi3_real" + [(unspec [(match_operand:V4HI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:V4HI + (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,a,b")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tmpyu4\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulv4qiv4hi3_real" + [(unspec [(match_operand:V4HI 0 "const_int_operand" "=JA,JB,JA,JB") + (mult:V4HI + (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,?b,?a")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,a,b")))] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tmpyus4\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "dotv2hi_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (plus:SI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "a,b,a,b") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "a,b,?b,?a") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))] UNSPEC_REAL_MULT)] + "TARGET_INSNS_64" + "%|%.\\tdotp2\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; Fractional multiply + +(define_insn "mulv2hqv2sq3_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (ss_mult:V2SQ + (fract_convert:V2SQ + (match_operand:V2HQ 1 "register_operand" "%a,b,?a,?b")) + (fract_convert:V2SQ + (match_operand:V2HQ 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tsmpy2\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (ss_mult:SQ + (fract_convert:SQ + (match_operand:HQ 1 "register_operand" "%a,b,?a,?b")) + (fract_convert:SQ + (match_operand:HQ 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tsmpy\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_lh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (ss_mult:SQ + (fract_convert:SQ + (match_operand:HQ 1 "register_operand" "a,b,?a,?b")) + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tsmpylh\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_hl_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (ss_mult:SQ + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a"))) + (fract_convert:SQ + (match_operand:HQ 2 "register_operand" "a,b,b,a")))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tsmpyhl\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_hh_real" + [(unspec [(match_operand:SI 0 "const_int_operand" "=JA,JB,JA,JB") + (ss_mult:SQ + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a"))) + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))] UNSPEC_REAL_MULT)] + "" + "%|%.\\tsmpyh\\t%$\\t%1, %2, %k0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) diff --git a/gcc-4.9/gcc/config/c6x/c6x-mult.md.in b/gcc-4.9/gcc/config/c6x/c6x-mult.md.in new file mode 100644 index 000000000..f09c7c085 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-mult.md.in @@ -0,0 +1,421 @@ +;; Multiplication patterns for TI C6X. +;; This file is processed by genmult.sh to produce two variants of each +;; pattern, a normal one and a real_mult variant for modulo scheduling. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; ------------------------------------------------------------------------- +;; Miscellaneous insns that execute on the M units +;; ------------------------------------------------------------------------- + +(define_insn "rotlsi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (rotate:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5"))_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\trotl\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "bitrevsi2_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_A_,_B_,_B_") + (unspec:SI [(match_operand:SI 1 "register_operand" "a,?b,b,?a")] + UNSPEC_BITREV)_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tbitr\\t%$\\t%1, %_MODk_0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,y,n,y")]) + +;; Vector average. + +(define_insn "avgv2hi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MV2HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (unspec:V2HI [(match_operand:V2HI 1 "register_operand" "a,b,?b,?a") + (match_operand:V2HI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tavg2\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "uavgv4qi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MV4QI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (unspec:V4QI [(match_operand:V4QI 1 "register_operand" "a,b,?b,?a") + (match_operand:V4QI 2 "register_operand" "a,b,a,b")] UNSPEC_AVG)_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tavgu4\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "units" "m") + (set_attr "type" "mpy2") + (set_attr "cross" "n,n,y,y")]) + +;; ------------------------------------------------------------------------- +;; Multiplication +;; ------------------------------------------------------------------------- + +(define_insn "mulhi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:HI (match_operand:HI 1 "register_operand" "a,b,?b,?a") + (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5"))_CBRK_)] + "" + "%|%.\\tmpy\\t%$\\t%2, %1, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "op_pattern" "sxs") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_const_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A__B_") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?ab")) + (match_operand:HI 2 "scst5_operand" "Is5,Is5,Is5"))_CBRK_)] + "" + "%|%.\\tmpy\\t%$\\t%2, %1, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y")]) + +(define_insn "*mulhisi3_insn_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "%a,b,?a,?b")) + (sign_extend:SI + (match_operand:HI 2 "reg_or_scst5_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tmpy\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "op_pattern" "ssx") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_lh_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (sign_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))_CBRK_)] + "" + "%|%.\\tmpylh\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_hl_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tmpyhl\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhisi3_hh_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (ashiftrt:SI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))_CBRK_)] + "" + "%|%.\\tmpyh\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "%a,b,?a,?b")) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tmpyu\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_lh_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (lshiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))_CBRK_)] + "" + "%|%.\\tmpylhu\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_hl_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (zero_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tmpyhlu\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulhisi3_hh_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (const_int 16)) + (lshiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))_CBRK_)] + "" + "%|%.\\tmpyhu\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_const_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A__B_") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?ab")) + (match_operand:SI 2 "scst5_operand" "Is5,Is5,Is5"))_CBRK_)] + "" + "%|%.\\tmpysu\\t%$\\t%2, %1, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y")]) + +(define_insn "*usmulhisi3_insn_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (sign_extend:SI + (match_operand:HI 2 "reg_or_scst5_operand" "aIs5,bIs5,bIs5,aIs5")))_CBRK_)] + "" + "%|%.\\tmpyus\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_lh_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (zero_extend:SI + (match_operand:HI 1 "register_operand" "a,b,?a,?b")) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))_CBRK_)] + "" + "%|%.\\tmpyluhs\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_hl_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (sign_extend:SI + (match_operand:HI 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tmpyhuls\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulhisi3_hh_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (lshiftrt:SI + (match_operand:SI 1 "register_operand" "a,b,?a,?b") + (const_int 16)) + (ashiftrt:SI + (match_operand:SI 2 "register_operand" "a,b,b,a") + (const_int 16)))_CBRK_)] + "" + "%|%.\\tmpyhus\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulsi3_insn_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:SI (match_operand:SI 1 "register_operand" "%a,b,?a,?b") + (match_operand:SI 2 "register_operand" "a,b,b,a"))_CBRK_)] + "TARGET_MPY32" + "%|%.\\tmpy32\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulsidi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:DI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:DI (any_ext:DI + (match_operand:SI 1 "register_operand" "%a,b,?a,?b")) + (any_ext:DI + (match_operand:SI 2 "register_operand" "a,b,b,a")))_CBRK_)] + "TARGET_MPY32" + "%|%.\\tmpy32\\t%$\\t%1, %2, %_MODK_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulsidi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:DI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "a,b,?a,?b")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" "a,b,b,a")))_CBRK_)] + "TARGET_MPY32" + "%|%.\\tmpy32us\\t%$\\t%1, %2, %_MODK_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; Widening vector multiply and dot product + +(define_insn "mulv2hiv2si3_VARIANT_" + [(_SET_ _OBRK_(match_operand:V2SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 1 "register_operand" "a,b,a,b")) + (sign_extend:V2SI (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tmpy2\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umulv4qiv4hi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:V4HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:V4HI + (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,a,b")) + (zero_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tmpyu4\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "usmulv4qiv4hi3_VARIANT_" + [(_SET_ _OBRK_(match_operand:V4HI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (mult:V4HI + (zero_extend:V4HI (match_operand:V4QI 1 "register_operand" "a,b,?b,?a")) + (sign_extend:V4HI (match_operand:V4QI 2 "register_operand" "a,b,a,b")))_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tmpyus4\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "dotv2hi_VARIANT_" + [(_SET_ _OBRK_(match_operand:SI 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (plus:SI + (mult:SI + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "a,b,a,b") + (parallel [(const_int 0)]))) + (sign_extend:SI + (vec_select:HI + (match_operand:V2HI 2 "register_operand" "a,b,?b,?a") + (parallel [(const_int 0)])))) + (mult:SI + (sign_extend:SI + (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (sign_extend:SI + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))_CBRK_)] + "TARGET_INSNS_64" + "%|%.\\tdotp2\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; Fractional multiply + +(define_insn "mulv2hqv2sq3_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MV2SQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (ss_mult:V2SQ + (fract_convert:V2SQ + (match_operand:V2HQ 1 "register_operand" "%a,b,?a,?b")) + (fract_convert:V2SQ + (match_operand:V2HQ 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tsmpy2\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (ss_mult:SQ + (fract_convert:SQ + (match_operand:HQ 1 "register_operand" "%a,b,?a,?b")) + (fract_convert:SQ + (match_operand:HQ 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tsmpy\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_lh_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (ss_mult:SQ + (fract_convert:SQ + (match_operand:HQ 1 "register_operand" "a,b,?a,?b")) + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))_CBRK_)] + "" + "%|%.\\tsmpylh\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_hl_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (ss_mult:SQ + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a"))) + (fract_convert:SQ + (match_operand:HQ 2 "register_operand" "a,b,b,a")))_CBRK_)] + "" + "%|%.\\tsmpyhl\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "mulhqsq3_hh_VARIANT_" + [(_SET_ _OBRK_(match_operand:_MSQ 0 "_DESTOPERAND_" "=_A_,_B_,_A_,_B_") + (ss_mult:SQ + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 1 "register_operand" "a,b,b,a"))) + (fract_convert:SQ + (truncate:HQ (match_operand:SQ 2 "register_operand" "a,b,b,a"))))_CBRK_)] + "" + "%|%.\\tsmpyh\\t%$\\t%1, %2, %_MODk_0" + [(set_attr "type" "mpy2") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) diff --git a/gcc-4.9/gcc/config/c6x/c6x-opts.h b/gcc-4.9/gcc/config/c6x/c6x-opts.h new file mode 100644 index 000000000..6bc3fe846 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-opts.h @@ -0,0 +1,35 @@ +/* Definitions for option handling for TI C6X. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef C6X_OPTS_H +#define C6X_OPTS_H + +/* An enumeration of all supported target devices. */ +typedef enum c6x_cpu_type +{ +#define C6X_ISA(NAME,ENUM_VALUE,FLAGS) \ + ENUM_VALUE, +#include "c6x-isas.def" +#undef C6X_ISA + unk_isa +} c6x_cpu_t; + +enum c6x_sdata { C6X_SDATA_NONE, C6X_SDATA_DEFAULT, C6X_SDATA_ALL }; + +#endif diff --git a/gcc-4.9/gcc/config/c6x/c6x-protos.h b/gcc-4.9/gcc/config/c6x/c6x-protos.h new file mode 100644 index 000000000..e360ebff8 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-protos.h @@ -0,0 +1,65 @@ +/* Prototypes for exported functions defined in c6x.c. + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_C6X_PROTOS_H +#define GCC_C6X_PROTOS_H + +/* Functions defined in c6x.c. */ + +#ifdef RTX_CODE +extern void c6x_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, int); +extern bool c6x_block_reg_pad_upward (enum machine_mode, const_tree, bool); + +extern bool c6x_legitimate_address_p_1 (enum machine_mode, rtx, bool, bool); +extern bool c6x_mem_operand (rtx, enum reg_class, bool); +extern bool expand_move (rtx *, enum machine_mode); + +extern bool c6x_long_call_p (rtx); +extern void c6x_expand_call (rtx, rtx, bool); +extern rtx c6x_expand_compare (rtx, enum machine_mode); +extern bool c6x_force_op_for_comparison_p (enum rtx_code, rtx); +extern bool c6x_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx); + +extern rtx c6x_subword (rtx, bool); +extern void split_di (rtx *, int, rtx *, rtx *); +extern bool c6x_valid_mask_p (HOST_WIDE_INT); + +extern char c6x_get_unit_specifier (rtx); + +extern void c6x_final_prescan_insn(rtx insn, rtx *opvec, int noperands); + +extern int c6x_nsaved_regs (void); +extern HOST_WIDE_INT c6x_initial_elimination_offset (int, int); +extern void c6x_expand_prologue (void); +extern void c6x_expand_epilogue (bool); + +extern rtx c6x_return_addr_rtx (int); + +extern void c6x_set_return_address (rtx, rtx); +#endif + +extern void c6x_override_options (void); +extern void c6x_optimization_options (int, int); + +extern void c6x_output_file_unwind (FILE *); + +extern void c6x_function_end (FILE *, const char *); + +#endif /* GCC_C6X_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/c6x/c6x-sched.md b/gcc-4.9/gcc/config/c6x/c6x-sched.md new file mode 100644 index 000000000..d85c1a9b9 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-sched.md @@ -0,0 +1,934 @@ +;; -*- buffer-read-only: t -*- +;; Generated automatically from c6x-sched.md.in by gensched.sh + +;; Definitions for side 1, cross n + +;; Scheduling description for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Input file for gensched.sh We process this file multiple times, +;; replacing 1 with either 1 or 2 for each of the sides of the +;; machine, and a correspondingly with "a" or "b". n and +;; are replaced with yes/no and the appropriate reservation. + +(define_insn_reservation "load_d1n" 5 + (and (eq_attr "type" "load") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t1") + +(define_insn_reservation "store_d1n" 1 + (and (eq_attr "type" "store") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t1") + +(define_insn_reservation "loadn_d1n" 5 + (and (eq_attr "type" "loadn") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t1+t2") + +(define_insn_reservation "storen_d1n" 1 + (and (eq_attr "type" "storen") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t1+t2") + +(define_insn_reservation "single_d1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d") + (eq_attr "dest_regfile" "a")))) + "d1") + +(define_insn_reservation "single_l1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1+l1w") + +(define_insn_reservation "fp4_l1n" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1,nothing*2,l1w") + +(define_insn_reservation "intdp_l1n" 5 + (and (eq_attr "type" "intdp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1,nothing*2,l1w*2") + +(define_insn_reservation "adddp_l1n" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "(l1)*2,nothing*3,l1w*2") + +(define_insn_reservation "branch_s1n" 6 + (and (eq_attr "type" "branch") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "(s1+s1w)+br1") + +(define_insn_reservation "call_addkpc_s1n" 6 + (and (eq_attr "type" "call") + (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a"))))) + "(s1+s1w)+br1,s2+br0+br1") + +(define_insn_reservation "call_mvk_s1n" 6 + (and (eq_attr "type" "call") + (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a"))))) + "(s1+s1w)+br1,s2,s2") + +(define_insn_reservation "single_s1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "(s1+s1w)") + +(define_insn_reservation "cmpdp_s1n" 2 + (and (eq_attr "type" "cmpdp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "s1,(s1)+s1w") + +(define_insn_reservation "dp2_s1n" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "s1+s1w,s1w") + +(define_insn_reservation "fp4_s1n" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "s1,nothing*2,s1w") + +(define_insn_reservation "mvilc4_s1n" 4 + (and (eq_attr "type" "mvilc") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "(s1+s1w)") + +(define_insn_reservation "single_dl1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "dl") + (eq_attr "dest_regfile" "a")))) + "(d1|(l1+l1w))") + +(define_insn_reservation "single_ds1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ds") + (eq_attr "dest_regfile" "a")))) + "(d1|(s1+s1w))") + +(define_insn_reservation "single_ls1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "a")))) + "((l1+l1w)|(s1+s1w))") + +(define_insn_reservation "dp2_l1n" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1+l1w,l1w") + +(define_insn_reservation "fp4_ls1n" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "a")))) + "(fps1+s1,nothing*2,s1w)|(fpl1+l1,nothing*2,l1w)") + +(define_insn_reservation "adddp_ls1n" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "a")))) + "(adddps1+(s1)*2,nothing*3,s1w*2)|(adddpl1+(l1)*2,nothing*3,l1w*2)") + +(define_insn_reservation "single_dls1n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "dls") + (eq_attr "dest_regfile" "a")))) + "(d1|(l1+l1w)|(s1+s1w))") + +(define_insn_reservation "mpy2_m1n" 2 + (and (eq_attr "type" "mpy2") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "m1,m1w") + +(define_insn_reservation "mpy4_m1n" 4 + (and (eq_attr "type" "mpy4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "m1,nothing,nothing,m1w") + +(define_insn_reservation "mpydp_m1n" 10 + (and (eq_attr "type" "mpydp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "(m1)*4,nothing*4,m1w*2") + +(define_insn_reservation "mpyspdp_m1n" 7 + (and (eq_attr "type" "mpyspdp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "(m1)*2,nothing*3,m1w*2") + +(define_insn_reservation "mpysp2dp_m1n" 5 + (and (eq_attr "type" "mpysp2dp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "m1,nothing*2,m1w*2") + +;; Definitions for side 2, cross n + +;; Scheduling description for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Input file for gensched.sh We process this file multiple times, +;; replacing 2 with either 1 or 2 for each of the sides of the +;; machine, and b correspondingly with "a" or "b". n and +;; are replaced with yes/no and the appropriate reservation. + +(define_insn_reservation "load_d2n" 5 + (and (eq_attr "type" "load") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t2") + +(define_insn_reservation "store_d2n" 1 + (and (eq_attr "type" "store") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t2") + +(define_insn_reservation "loadn_d2n" 5 + (and (eq_attr "type" "loadn") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t1+t2") + +(define_insn_reservation "storen_d2n" 1 + (and (eq_attr "type" "storen") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t1+t2") + +(define_insn_reservation "single_d2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "d") + (eq_attr "dest_regfile" "b")))) + "d2") + +(define_insn_reservation "single_l2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2+l2w") + +(define_insn_reservation "fp4_l2n" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2,nothing*2,l2w") + +(define_insn_reservation "intdp_l2n" 5 + (and (eq_attr "type" "intdp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2,nothing*2,l2w*2") + +(define_insn_reservation "adddp_l2n" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "(l2)*2,nothing*3,l2w*2") + +(define_insn_reservation "branch_s2n" 6 + (and (eq_attr "type" "branch") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "(s2+s2w)+br1") + +(define_insn_reservation "call_addkpc_s2n" 6 + (and (eq_attr "type" "call") + (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b"))))) + "(s2+s2w)+br1,s2+br0+br1") + +(define_insn_reservation "call_mvk_s2n" 6 + (and (eq_attr "type" "call") + (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b"))))) + "(s2+s2w)+br1,s2,s2") + +(define_insn_reservation "single_s2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "(s2+s2w)") + +(define_insn_reservation "cmpdp_s2n" 2 + (and (eq_attr "type" "cmpdp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "s2,(s2)+s2w") + +(define_insn_reservation "dp2_s2n" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "s2+s2w,s2w") + +(define_insn_reservation "fp4_s2n" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "s2,nothing*2,s2w") + +(define_insn_reservation "mvilc4_s2n" 4 + (and (eq_attr "type" "mvilc") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "(s2+s2w)") + +(define_insn_reservation "single_dl2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "dl") + (eq_attr "dest_regfile" "b")))) + "(d2|(l2+l2w))") + +(define_insn_reservation "single_ds2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ds") + (eq_attr "dest_regfile" "b")))) + "(d2|(s2+s2w))") + +(define_insn_reservation "single_ls2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "b")))) + "((l2+l2w)|(s2+s2w))") + +(define_insn_reservation "dp2_l2n" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2+l2w,l2w") + +(define_insn_reservation "fp4_ls2n" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "b")))) + "(fps2+s2,nothing*2,s2w)|(fpl2+l2,nothing*2,l2w)") + +(define_insn_reservation "adddp_ls2n" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "b")))) + "(adddps2+(s2)*2,nothing*3,s2w*2)|(adddpl2+(l2)*2,nothing*3,l2w*2)") + +(define_insn_reservation "single_dls2n" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "dls") + (eq_attr "dest_regfile" "b")))) + "(d2|(l2+l2w)|(s2+s2w))") + +(define_insn_reservation "mpy2_m2n" 2 + (and (eq_attr "type" "mpy2") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "m2,m2w") + +(define_insn_reservation "mpy4_m2n" 4 + (and (eq_attr "type" "mpy4") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "m2,nothing,nothing,m2w") + +(define_insn_reservation "mpydp_m2n" 10 + (and (eq_attr "type" "mpydp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "(m2)*4,nothing*4,m2w*2") + +(define_insn_reservation "mpyspdp_m2n" 7 + (and (eq_attr "type" "mpyspdp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "(m2)*2,nothing*3,m2w*2") + +(define_insn_reservation "mpysp2dp_m2n" 5 + (and (eq_attr "type" "mpysp2dp") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "m2,nothing*2,m2w*2") + +;; Definitions for side 1, cross y + +;; Scheduling description for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Input file for gensched.sh We process this file multiple times, +;; replacing 1 with either 1 or 2 for each of the sides of the +;; machine, and a correspondingly with "a" or "b". y and +;; +x1 are replaced with yes/no and the appropriate reservation. + +(define_insn_reservation "load_d1y" 5 + (and (eq_attr "type" "load") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t2") + +(define_insn_reservation "store_d1y" 1 + (and (eq_attr "type" "store") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t2") + +(define_insn_reservation "loadn_d1y" 5 + (and (eq_attr "type" "loadn") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t1+t2") + +(define_insn_reservation "storen_d1y" 1 + (and (eq_attr "type" "storen") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "a")))) + "d1+t1+t2") + +(define_insn_reservation "single_d1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d") + (eq_attr "dest_regfile" "a")))) + "d1+x1") + +(define_insn_reservation "single_l1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1+l1w+x1") + +(define_insn_reservation "fp4_l1y" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1+x1,nothing*2,l1w") + +(define_insn_reservation "intdp_l1y" 5 + (and (eq_attr "type" "intdp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1+x1,nothing*2,l1w*2") + +(define_insn_reservation "adddp_l1y" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "(l1+x1)*2,nothing*3,l1w*2") + +(define_insn_reservation "branch_s1y" 6 + (and (eq_attr "type" "branch") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "(s1+s1w)+x1+br1") + +(define_insn_reservation "call_addkpc_s1y" 6 + (and (eq_attr "type" "call") + (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a"))))) + "(s1+s1w)+x1+br1,s2+br0+br1") + +(define_insn_reservation "call_mvk_s1y" 6 + (and (eq_attr "type" "call") + (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a"))))) + "(s1+s1w)+x1+br1,s2,s2") + +(define_insn_reservation "single_s1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "(s1+s1w)+x1") + +(define_insn_reservation "cmpdp_s1y" 2 + (and (eq_attr "type" "cmpdp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "s1+x1,(s1+x1)+s1w") + +(define_insn_reservation "dp2_s1y" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "s1+s1w+x1,s1w") + +(define_insn_reservation "fp4_s1y" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "s1+x1,nothing*2,s1w") + +(define_insn_reservation "mvilc4_s1y" 4 + (and (eq_attr "type" "mvilc") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "a")))) + "(s1+s1w)+x1") + +(define_insn_reservation "single_dl1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "dl") + (eq_attr "dest_regfile" "a")))) + "(d1|(l1+l1w))+x1") + +(define_insn_reservation "single_ds1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ds") + (eq_attr "dest_regfile" "a")))) + "(d1|(s1+s1w))+x1") + +(define_insn_reservation "single_ls1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "a")))) + "((l1+l1w)|(s1+s1w))+x1") + +(define_insn_reservation "dp2_l1y" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "a")))) + "l1+l1w+x1,l1w") + +(define_insn_reservation "fp4_ls1y" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "a")))) + "(fps1+s1+x1,nothing*2,s1w)|(fpl1+l1+x1,nothing*2,l1w)") + +(define_insn_reservation "adddp_ls1y" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "a")))) + "(adddps1+(s1+x1)*2,nothing*3,s1w*2)|(adddpl1+(l1+x1)*2,nothing*3,l1w*2)") + +(define_insn_reservation "single_dls1y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "dls") + (eq_attr "dest_regfile" "a")))) + "(d1|(l1+l1w)|(s1+s1w))+x1") + +(define_insn_reservation "mpy2_m1y" 2 + (and (eq_attr "type" "mpy2") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "m1+x1,m1w") + +(define_insn_reservation "mpy4_m1y" 4 + (and (eq_attr "type" "mpy4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "m1+x1,nothing,nothing,m1w") + +(define_insn_reservation "mpydp_m1y" 10 + (and (eq_attr "type" "mpydp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "(m1+x1)*4,nothing*4,m1w*2") + +(define_insn_reservation "mpyspdp_m1y" 7 + (and (eq_attr "type" "mpyspdp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "(m1+x1)*2,nothing*3,m1w*2") + +(define_insn_reservation "mpysp2dp_m1y" 5 + (and (eq_attr "type" "mpysp2dp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "a")))) + "m1+x1,nothing*2,m1w*2") + +;; Definitions for side 2, cross y + +;; Scheduling description for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Input file for gensched.sh We process this file multiple times, +;; replacing 2 with either 1 or 2 for each of the sides of the +;; machine, and b correspondingly with "a" or "b". y and +;; +x2 are replaced with yes/no and the appropriate reservation. + +(define_insn_reservation "load_d2y" 5 + (and (eq_attr "type" "load") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t1") + +(define_insn_reservation "store_d2y" 1 + (and (eq_attr "type" "store") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t1") + +(define_insn_reservation "loadn_d2y" 5 + (and (eq_attr "type" "loadn") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t1+t2") + +(define_insn_reservation "storen_d2y" 1 + (and (eq_attr "type" "storen") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "b")))) + "d2+t1+t2") + +(define_insn_reservation "single_d2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "d") + (eq_attr "dest_regfile" "b")))) + "d2+x2") + +(define_insn_reservation "single_l2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2+l2w+x2") + +(define_insn_reservation "fp4_l2y" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2+x2,nothing*2,l2w") + +(define_insn_reservation "intdp_l2y" 5 + (and (eq_attr "type" "intdp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2+x2,nothing*2,l2w*2") + +(define_insn_reservation "adddp_l2y" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "(l2+x2)*2,nothing*3,l2w*2") + +(define_insn_reservation "branch_s2y" 6 + (and (eq_attr "type" "branch") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "(s2+s2w)+x2+br1") + +(define_insn_reservation "call_addkpc_s2y" 6 + (and (eq_attr "type" "call") + (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b"))))) + "(s2+s2w)+x2+br1,s2+br0+br1") + +(define_insn_reservation "call_mvk_s2y" 6 + (and (eq_attr "type" "call") + (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b"))))) + "(s2+s2w)+x2+br1,s2,s2") + +(define_insn_reservation "single_s2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "(s2+s2w)+x2") + +(define_insn_reservation "cmpdp_s2y" 2 + (and (eq_attr "type" "cmpdp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "s2+x2,(s2+x2)+s2w") + +(define_insn_reservation "dp2_s2y" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "s2+s2w+x2,s2w") + +(define_insn_reservation "fp4_s2y" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "s2+x2,nothing*2,s2w") + +(define_insn_reservation "mvilc4_s2y" 4 + (and (eq_attr "type" "mvilc") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "b")))) + "(s2+s2w)+x2") + +(define_insn_reservation "single_dl2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "dl") + (eq_attr "dest_regfile" "b")))) + "(d2|(l2+l2w))+x2") + +(define_insn_reservation "single_ds2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ds") + (eq_attr "dest_regfile" "b")))) + "(d2|(s2+s2w))+x2") + +(define_insn_reservation "single_ls2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "b")))) + "((l2+l2w)|(s2+s2w))+x2") + +(define_insn_reservation "dp2_l2y" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "b")))) + "l2+l2w+x2,l2w") + +(define_insn_reservation "fp4_ls2y" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "b")))) + "(fps2+s2+x2,nothing*2,s2w)|(fpl2+l2+x2,nothing*2,l2w)") + +(define_insn_reservation "adddp_ls2y" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "b")))) + "(adddps2+(s2+x2)*2,nothing*3,s2w*2)|(adddpl2+(l2+x2)*2,nothing*3,l2w*2)") + +(define_insn_reservation "single_dls2y" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "dls") + (eq_attr "dest_regfile" "b")))) + "(d2|(l2+l2w)|(s2+s2w))+x2") + +(define_insn_reservation "mpy2_m2y" 2 + (and (eq_attr "type" "mpy2") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "m2+x2,m2w") + +(define_insn_reservation "mpy4_m2y" 4 + (and (eq_attr "type" "mpy4") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "m2+x2,nothing,nothing,m2w") + +(define_insn_reservation "mpydp_m2y" 10 + (and (eq_attr "type" "mpydp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "(m2+x2)*4,nothing*4,m2w*2") + +(define_insn_reservation "mpyspdp_m2y" 7 + (and (eq_attr "type" "mpyspdp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "(m2+x2)*2,nothing*3,m2w*2") + +(define_insn_reservation "mpysp2dp_m2y" 5 + (and (eq_attr "type" "mpysp2dp") + (and (eq_attr "cross" "y") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "b")))) + "m2+x2,nothing*2,m2w*2") diff --git a/gcc-4.9/gcc/config/c6x/c6x-sched.md.in b/gcc-4.9/gcc/config/c6x/c6x-sched.md.in new file mode 100644 index 000000000..2a98dddac --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-sched.md.in @@ -0,0 +1,230 @@ +;; Scheduling description for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Input file for gensched.sh We process this file multiple times, +;; replacing _N_ with either 1 or 2 for each of the sides of the +;; machine, and _RF_ correspondingly with "a" or "b". _CROSS_ and +;; _CUNIT_ are replaced with yes/no and the appropriate reservation. + +(define_insn_reservation "load_d_N__CROSS_" 5 + (and (eq_attr "type" "load") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "_RF_")))) + "d_N_+t_NX_") + +(define_insn_reservation "store_d_N__CROSS_" 1 + (and (eq_attr "type" "store") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "_RF_")))) + "d_N_+t_NX_") + +(define_insn_reservation "loadn_d_N__CROSS_" 5 + (and (eq_attr "type" "loadn") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "_RF_")))) + "d_N_+t1+t2") + +(define_insn_reservation "storen_d_N__CROSS_" 1 + (and (eq_attr "type" "storen") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "d_addr") + (eq_attr "addr_regfile" "_RF_")))) + "d_N_+t1+t2") + +(define_insn_reservation "single_d_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "d") + (eq_attr "dest_regfile" "_RF_")))) + "d_N__CUNIT_") + +(define_insn_reservation "single_l_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "_RF_")))) + "l_N_+l_N_w_CUNIT_") + +(define_insn_reservation "fp4_l_N__CROSS_" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "_RF_")))) + "l_N__CUNIT_,nothing*2,l_N_w") + +(define_insn_reservation "intdp_l_N__CROSS_" 5 + (and (eq_attr "type" "intdp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "_RF_")))) + "l_N__CUNIT_,nothing*2,l_N_w*2") + +(define_insn_reservation "adddp_l_N__CROSS_" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "_RF_")))) + "(l_N__CUNIT_)*2,nothing*3,l_N_w*2") + +(define_insn_reservation "branch_s_N__CROSS_" 6 + (and (eq_attr "type" "branch") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_")))) + "(s_N_+s_N_w)_CUNIT_+br1") + +(define_insn_reservation "call_addkpc_s_N__CROSS_" 6 + (and (eq_attr "type" "call") + (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_"))))) + "(s_N_+s_N_w)_CUNIT_+br1,s2+br0+br1") + +(define_insn_reservation "call_mvk_s_N__CROSS_" 6 + (and (eq_attr "type" "call") + (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_"))))) + "(s_N_+s_N_w)_CUNIT_+br1,s2,s2") + +(define_insn_reservation "single_s_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_")))) + "(s_N_+s_N_w)_CUNIT_") + +(define_insn_reservation "cmpdp_s_N__CROSS_" 2 + (and (eq_attr "type" "cmpdp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_")))) + "s_N__CUNIT_,(s_N__CUNIT_)+s_N_w") + +(define_insn_reservation "dp2_s_N__CROSS_" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_")))) + "s_N_+s_N_w_CUNIT_,s_N_w") + +(define_insn_reservation "fp4_s_N__CROSS_" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_")))) + "s_N__CUNIT_,nothing*2,s_N_w") + +(define_insn_reservation "mvilc4_s_N__CROSS_" 4 + (and (eq_attr "type" "mvilc") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "_RF_")))) + "(s_N_+s_N_w)_CUNIT_") + +(define_insn_reservation "single_dl_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "dl") + (eq_attr "dest_regfile" "_RF_")))) + "(d_N_|(l_N_+l_N_w))_CUNIT_") + +(define_insn_reservation "single_ds_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "ds") + (eq_attr "dest_regfile" "_RF_")))) + "(d_N_|(s_N_+s_N_w))_CUNIT_") + +(define_insn_reservation "single_ls_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "_RF_")))) + "((l_N_+l_N_w)|(s_N_+s_N_w))_CUNIT_") + +(define_insn_reservation "dp2_l_N__CROSS_" 2 + (and (eq_attr "type" "dp2") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "l") + (eq_attr "dest_regfile" "_RF_")))) + "l_N_+l_N_w_CUNIT_,l_N_w") + +(define_insn_reservation "fp4_ls_N__CROSS_" 4 + (and (eq_attr "type" "fp4") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "_RF_")))) + "(fps_N_+s_N__CUNIT_,nothing*2,s_N_w)|(fpl_N_+l_N__CUNIT_,nothing*2,l_N_w)") + +(define_insn_reservation "adddp_ls_N__CROSS_" 7 + (and (eq_attr "type" "adddp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "ls") + (eq_attr "dest_regfile" "_RF_")))) + "(adddps_N_+(s_N__CUNIT_)*2,nothing*3,s_N_w*2)|(adddpl_N_+(l_N__CUNIT_)*2,nothing*3,l_N_w*2)") + +(define_insn_reservation "single_dls_N__CROSS_" 1 + (and (eq_attr "type" "single") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "dls") + (eq_attr "dest_regfile" "_RF_")))) + "(d_N_|(l_N_+l_N_w)|(s_N_+s_N_w))_CUNIT_") + +(define_insn_reservation "mpy2_m_N__CROSS_" 2 + (and (eq_attr "type" "mpy2") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "_RF_")))) + "m_N__CUNIT_,m_N_w") + +(define_insn_reservation "mpy4_m_N__CROSS_" 4 + (and (eq_attr "type" "mpy4") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "_RF_")))) + "m_N__CUNIT_,nothing,nothing,m_N_w") + +(define_insn_reservation "mpydp_m_N__CROSS_" 10 + (and (eq_attr "type" "mpydp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "_RF_")))) + "(m_N__CUNIT_)*4,nothing*4,m_N_w*2") + +(define_insn_reservation "mpyspdp_m_N__CROSS_" 7 + (and (eq_attr "type" "mpyspdp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "_RF_")))) + "(m_N__CUNIT_)*2,nothing*3,m_N_w*2") + +(define_insn_reservation "mpysp2dp_m_N__CROSS_" 5 + (and (eq_attr "type" "mpysp2dp") + (and (eq_attr "cross" "_CROSS_") + (and (eq_attr "units" "m") + (eq_attr "dest_regfile" "_RF_")))) + "m_N__CUNIT_,nothing*2,m_N_w*2") diff --git a/gcc-4.9/gcc/config/c6x/c6x-tables.opt b/gcc-4.9/gcc/config/c6x/c6x-tables.opt new file mode 100644 index 000000000..a4eb62fab --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x-tables.opt @@ -0,0 +1,43 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by genopt.sh from c6x-isas.def. +; +; Copyright (C) 2011-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +Enum +Name(c6x_isa) Type(int) +Known C6X ISAs (for use with the -march= option): + +EnumValue +Enum(c6x_isa) String(c62x) Value(0) + +EnumValue +Enum(c6x_isa) String(c64x) Value(1) + +EnumValue +Enum(c6x_isa) String(c64x+) Value(2) + +EnumValue +Enum(c6x_isa) String(c67x) Value(3) + +EnumValue +Enum(c6x_isa) String(c67x+) Value(4) + +EnumValue +Enum(c6x_isa) String(c674x) Value(5) + diff --git a/gcc-4.9/gcc/config/c6x/c6x.c b/gcc-4.9/gcc/config/c6x/c6x.c new file mode 100644 index 000000000..9ba10df73 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x.c @@ -0,0 +1,6846 @@ +/* Target Code for TI C6X + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Andrew Jenner + Contributed by Bernd Schmidt + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "stor-layout.h" +#include "varasm.h" +#include "calls.h" +#include "stringpool.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "insn-codes.h" +#include "expr.h" +#include "regs.h" +#include "optabs.h" +#include "recog.h" +#include "ggc.h" +#include "sched-int.h" +#include "timevar.h" +#include "tm_p.h" +#include "tm-preds.h" +#include "tm-constrs.h" +#include "df.h" +#include "function.h" +#include "diagnostic-core.h" +#include "cgraph.h" +#include "langhooks.h" +#include "target.h" +#include "target-def.h" +#include "sel-sched.h" +#include "debug.h" +#include "opts.h" +#include "hw-doloop.h" +#include "regrename.h" +#include "dumpfile.h" +#include "gimple-expr.h" + +/* Table of supported architecture variants. */ +typedef struct +{ + const char *arch; + enum c6x_cpu_type type; + unsigned short features; +} c6x_arch_table; + +/* A list of all ISAs, mapping each one to a representative device. + Used for -march selection. */ +static const c6x_arch_table all_isas[] = +{ +#define C6X_ISA(NAME,DEVICE,FLAGS) \ + { NAME, DEVICE, FLAGS }, +#include "c6x-isas.def" +#undef C6X_ISA + { NULL, C6X_CPU_C62X, 0 } +}; + +/* This is the parsed result of the "-march=" option, if given. */ +enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH; + +/* A mask of insn types that are allowed by the architecture selected by + the -march option. */ +unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK; + +/* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN). + */ +static rtx c6x_current_insn = NULL_RTX; + +/* A decl we build to access __c6xabi_DSBT_base. */ +static GTY(()) tree dsbt_decl; + +/* Determines whether we run our final scheduling pass or not. We always + avoid the normal second scheduling pass. */ +static int c6x_flag_schedule_insns2; + +/* Determines whether we run variable tracking in machine dependent + reorganization. */ +static int c6x_flag_var_tracking; + +/* Determines whether we use modulo scheduling. */ +static int c6x_flag_modulo_sched; + +/* Record the state of flag_pic before we set it to 1 for DSBT. */ +int c6x_initial_flag_pic; + +typedef struct +{ + /* We record the clock cycle for every insn during scheduling. */ + int clock; + /* After scheduling, we run assign_reservations to choose unit + reservations for all insns. These are recorded here. */ + int reservation; + /* Records the new condition for insns which must be made + conditional after scheduling. An entry of NULL_RTX means no such + change is necessary. */ + rtx new_cond; + /* True for the first insn that was scheduled in an ebb. */ + bool ebb_start; + /* The scheduler state after the insn, transformed into a mask of UNIT_QID + bits rather than storing the state. Meaningful only for the last + insn in a cycle. */ + unsigned int unit_mask; +} c6x_sched_insn_info; + + +/* Record a c6x_sched_insn_info structure for every insn in the function. */ +static vec insn_info; + +#define INSN_INFO_LENGTH (insn_info).length () +#define INSN_INFO_ENTRY(N) (insn_info[(N)]) + +static bool done_cfi_sections; + +#define RESERVATION_FLAG_D 1 +#define RESERVATION_FLAG_L 2 +#define RESERVATION_FLAG_S 4 +#define RESERVATION_FLAG_M 8 +#define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L) +#define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S) +#define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S) +#define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS) + +/* The DFA names of the units. */ +static const char *const c6x_unit_names[] = +{ + "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1", + "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2" +}; + +/* The DFA unit number for each unit in c6x_unit_names[]. */ +static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)]; + +/* Unit query IDs. */ +#define UNIT_QID_D1 0 +#define UNIT_QID_L1 1 +#define UNIT_QID_S1 2 +#define UNIT_QID_M1 3 +#define UNIT_QID_FPS1 4 +#define UNIT_QID_FPL1 5 +#define UNIT_QID_ADDDPS1 6 +#define UNIT_QID_ADDDPL1 7 +#define UNIT_QID_SIDE_OFFSET 8 + +#define RESERVATION_S1 2 +#define RESERVATION_S2 10 + +/* An enum for the unit requirements we count in the UNIT_REQS table. */ +enum unitreqs +{ + UNIT_REQ_D, + UNIT_REQ_L, + UNIT_REQ_S, + UNIT_REQ_M, + UNIT_REQ_DL, + UNIT_REQ_DS, + UNIT_REQ_LS, + UNIT_REQ_DLS, + UNIT_REQ_T, + UNIT_REQ_X, + UNIT_REQ_MAX +}; + +/* A table used to count unit requirements. Used when computing minimum + iteration intervals. */ +typedef int unit_req_table[2][UNIT_REQ_MAX]; +static unit_req_table unit_reqs; + +/* Register map for debugging. */ +unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* A0 - A15. */ + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, /* A16 - A32. */ + 50, 51, 52, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* B0 - B15. */ + 29, 30, 31, + 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* B16 - B32. */ + 66, 67, 68, + -1, -1, -1 /* FP, ARGP, ILC. */ +}; + +/* Allocate a new, cleared machine_function structure. */ + +static struct machine_function * +c6x_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Implement TARGET_OPTION_OVERRIDE. */ + +static void +c6x_option_override (void) +{ + unsigned i; + + if (global_options_set.x_c6x_arch_option) + { + c6x_arch = all_isas[c6x_arch_option].type; + c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS; + c6x_insn_mask |= all_isas[c6x_arch_option].features; + } + + c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload; + flag_schedule_insns_after_reload = 0; + + c6x_flag_modulo_sched = flag_modulo_sched; + flag_modulo_sched = 0; + + init_machine_status = c6x_init_machine_status; + + for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++) + c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]); + + if (flag_pic && !TARGET_DSBT) + { + error ("-fpic and -fPIC not supported without -mdsbt on this target"); + flag_pic = 0; + } + c6x_initial_flag_pic = flag_pic; + if (TARGET_DSBT && !flag_pic) + flag_pic = 1; +} + + +/* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook. */ + +static void +c6x_conditional_register_usage (void) +{ + int i; + if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X) + for (i = 16; i < 32; i++) + { + fixed_regs[i] = 1; + fixed_regs[32 + i] = 1; + } + if (TARGET_INSNS_64) + { + SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS], + REG_A0); + SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS], + REG_A0); + CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS], + REG_A0); + CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS], + REG_A0); + } +} + +static GTY(()) rtx eqdf_libfunc; +static GTY(()) rtx nedf_libfunc; +static GTY(()) rtx ledf_libfunc; +static GTY(()) rtx ltdf_libfunc; +static GTY(()) rtx gedf_libfunc; +static GTY(()) rtx gtdf_libfunc; +static GTY(()) rtx eqsf_libfunc; +static GTY(()) rtx nesf_libfunc; +static GTY(()) rtx lesf_libfunc; +static GTY(()) rtx ltsf_libfunc; +static GTY(()) rtx gesf_libfunc; +static GTY(()) rtx gtsf_libfunc; +static GTY(()) rtx strasgi_libfunc; +static GTY(()) rtx strasgi64p_libfunc; + +/* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library + functions to match the C6x ABI. */ + +static void +c6x_init_libfuncs (void) +{ + /* Double-precision floating-point arithmetic. */ + set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd"); + set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd"); + set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd"); + set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd"); + set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd"); + + /* Single-precision floating-point arithmetic. */ + set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf"); + set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf"); + set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf"); + set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf"); + set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf"); + + /* Floating-point comparisons. */ + eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf"); + nesf_libfunc = init_one_libfunc ("__c6xabi_neqf"); + lesf_libfunc = init_one_libfunc ("__c6xabi_lef"); + ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf"); + gesf_libfunc = init_one_libfunc ("__c6xabi_gef"); + gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf"); + eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd"); + nedf_libfunc = init_one_libfunc ("__c6xabi_neqd"); + ledf_libfunc = init_one_libfunc ("__c6xabi_led"); + ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd"); + gedf_libfunc = init_one_libfunc ("__c6xabi_ged"); + gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd"); + + set_optab_libfunc (eq_optab, SFmode, NULL); + set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf"); + set_optab_libfunc (gt_optab, SFmode, NULL); + set_optab_libfunc (ge_optab, SFmode, NULL); + set_optab_libfunc (lt_optab, SFmode, NULL); + set_optab_libfunc (le_optab, SFmode, NULL); + set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf"); + set_optab_libfunc (eq_optab, DFmode, NULL); + set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd"); + set_optab_libfunc (gt_optab, DFmode, NULL); + set_optab_libfunc (ge_optab, DFmode, NULL); + set_optab_libfunc (lt_optab, DFmode, NULL); + set_optab_libfunc (le_optab, DFmode, NULL); + set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd"); + + /* Floating-point to integer conversions. */ + set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi"); + set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu"); + set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli"); + set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull"); + set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi"); + set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu"); + set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli"); + set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull"); + + /* Conversions between floating types. */ + set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf"); + set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd"); + + /* Integer to floating-point conversions. */ + set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid"); + set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud"); + set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid"); + set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld"); + set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif"); + set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf"); + set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif"); + set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf"); + + /* Long long. */ + set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll"); + set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl"); + set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru"); + set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr"); + + set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi"); + set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu"); + set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi"); + set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu"); + set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi"); + set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu"); + set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli"); + set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull"); + set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli"); + set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull"); + set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull"); + + /* Block move. */ + strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi"); + strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus"); +} + +/* Begin the assembly file. */ + +static void +c6x_file_start (void) +{ + /* Variable tracking should be run after all optimizations which change order + of insns. It also needs a valid CFG. This can't be done in + c6x_override_options, because flag_var_tracking is finalized after + that. */ + c6x_flag_var_tracking = flag_var_tracking; + flag_var_tracking = 0; + + done_cfi_sections = false; + default_file_start (); + + /* Arrays are aligned to 8-byte boundaries. */ + asm_fprintf (asm_out_file, + "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n"); + asm_fprintf (asm_out_file, + "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n"); + + /* Stack alignment is 8 bytes. */ + asm_fprintf (asm_out_file, + "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n"); + asm_fprintf (asm_out_file, + "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n"); + +#if 0 /* FIXME: Reenable when TI's tools are fixed. */ + /* ??? Ideally we'd check flag_short_wchar somehow. */ + asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2); +#endif + + /* We conform to version 1.0 of the ABI. */ + asm_fprintf (asm_out_file, + "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n"); + +} + +/* The LTO frontend only enables exceptions when it sees a function that + uses it. This changes the return value of dwarf2out_do_frame, so we + have to check before every function. */ + +void +c6x_output_file_unwind (FILE * f) +{ + if (done_cfi_sections) + return; + + /* Output a .cfi_sections directive. */ + if (dwarf2out_do_frame ()) + { + if (flag_unwind_tables || flag_exceptions) + { + if (write_symbols == DWARF2_DEBUG + || write_symbols == VMS_AND_DWARF2_DEBUG) + asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n"); + else + asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n"); + } + else + asm_fprintf (f, "\t.cfi_sections .debug_frame\n"); + done_cfi_sections = true; + } +} + +/* Output unwind directives at the end of a function. */ + +static void +c6x_output_fn_unwind (FILE * f) +{ + /* Return immediately if we are not generating unwinding tables. */ + if (! (flag_unwind_tables || flag_exceptions)) + return; + + /* If this function will never be unwound, then mark it as such. */ + if (!(flag_unwind_tables || crtl->uses_eh_lsda) + && (TREE_NOTHROW (current_function_decl) + || crtl->all_throwers_are_sibcalls)) + fputs("\t.cantunwind\n", f); + + fputs ("\t.endp\n", f); +} + + +/* Stack and Calling. */ + +int argument_registers[10] = +{ + REG_A4, REG_B4, + REG_A6, REG_B6, + REG_A8, REG_B8, + REG_A10, REG_B10, + REG_A12, REG_B12 +}; + +/* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h. */ + +void +c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname, + int n_named_args ATTRIBUTE_UNUSED) +{ + cum->count = 0; + cum->nregs = 10; + if (!libname && fntype) + { + /* We need to find out the number of named arguments. Unfortunately, + for incoming arguments, N_NAMED_ARGS is set to -1. */ + if (stdarg_p (fntype)) + cum->nregs = type_num_arguments (fntype) - 1; + if (cum->nregs > 10) + cum->nregs = 10; + } +} + +/* Implements the macro FUNCTION_ARG defined in c6x.h. */ + +static rtx +c6x_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + if (cum->count >= cum->nregs) + return NULL_RTX; + if (type) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type)) + { + if (size > 4) + { + rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1); + rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]); + rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4))); + return gen_rtx_PARALLEL (mode, vec); + } + } + } + return gen_rtx_REG (mode, argument_registers[cum->count]); +} + +static void +c6x_function_arg_advance (cumulative_args_t cum_v, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + cum->count++; +} + + +/* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return + upward rather than downward. */ + +bool +c6x_block_reg_pad_upward (enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type, bool first) +{ + HOST_WIDE_INT size; + + if (!TARGET_BIG_ENDIAN) + return true; + if (!first) + return true; + if (!type) + return true; + size = int_size_in_bytes (type); + return size == 3; +} + +/* Implement TARGET_FUNCTION_ARG_BOUNDARY. */ + +static unsigned int +c6x_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode); + + if (boundary > BITS_PER_WORD) + return 2 * BITS_PER_WORD; + + if (mode == BLKmode) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size > 4) + return 2 * BITS_PER_WORD; + if (boundary < BITS_PER_WORD) + { + if (size >= 3) + return BITS_PER_WORD; + if (size >= 2) + return 2 * BITS_PER_UNIT; + } + } + return boundary; +} + +/* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY. */ +static unsigned int +c6x_function_arg_round_boundary (enum machine_mode mode, const_tree type) +{ + return c6x_function_arg_boundary (mode, type); +} + +/* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place + where function FUNC returns or receives a value of data type TYPE. */ + +static rtx +c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + /* Functions return values in register A4. When returning aggregates, we may + have to adjust for endianness. */ + if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); + if (size > 4) + { + + rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1); + rtx reg2 = gen_rtx_REG (SImode, REG_A4); + rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx), + gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4))); + return gen_rtx_PARALLEL (TYPE_MODE (type), vec); + } + } + return gen_rtx_REG (TYPE_MODE (type), REG_A4); +} + +/* Implement TARGET_LIBCALL_VALUE. */ + +static rtx +c6x_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, REG_A4); +} + +/* TARGET_STRUCT_VALUE_RTX implementation. */ + +static rtx +c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, REG_A3); +} + +/* Implement TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +c6x_function_value_regno_p (const unsigned int regno) +{ + return regno == REG_A4; +} + +/* Types larger than 64 bit, and variable sized types, are passed by + reference. The callee must copy them; see c6x_callee_copies. */ + +static bool +c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + int size = -1; + if (type) + size = int_size_in_bytes (type); + else if (mode != VOIDmode) + size = GET_MODE_SIZE (mode); + return size > 2 * UNITS_PER_WORD || size == -1; +} + +/* Decide whether a type should be returned in memory (true) + or in a register (false). This is called by the macro + TARGET_RETURN_IN_MEMORY. */ + +static bool +c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + int size = int_size_in_bytes (type); + return size > 2 * UNITS_PER_WORD || size == -1; +} + +/* Values which must be returned in the most-significant end of the return + register. */ + +static bool +c6x_return_in_msb (const_tree valtype) +{ + HOST_WIDE_INT size = int_size_in_bytes (valtype); + return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3; +} + +/* Implement TARGET_CALLEE_COPIES. */ + +static bool +c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED, + bool named ATTRIBUTE_UNUSED) +{ + return true; +} + +/* Return the type to use as __builtin_va_list. */ +static tree +c6x_build_builtin_va_list (void) +{ + return build_pointer_type (char_type_node); +} + +static void +c6x_asm_trampoline_template (FILE *f) +{ + fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */ + fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */ + fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */ + fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */ + fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */ + fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */ + fprintf (f, "\t.long\t0x00000000\n"); /* nop */ + fprintf (f, "\t.long\t0x00000000\n"); /* nop */ +} + +/* Emit RTL insns to initialize the variable parts of a trampoline at + TRAMP. FNADDR is an RTX for the address of the function's pure + code. CXT is an RTX for the static chain value for the function. */ + +static void +c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx t1 = copy_to_reg (fnaddr); + rtx t2 = copy_to_reg (cxt); + rtx mask = gen_reg_rtx (SImode); + int i; + + emit_block_move (tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + emit_move_insn (mask, GEN_INT (0xffff << 7)); + + for (i = 0; i < 4; i++) + { + rtx mem = adjust_address (tramp, SImode, i * 4); + rtx t = (i & 1) ? t2 : t1; + rtx v1 = gen_reg_rtx (SImode); + rtx v2 = gen_reg_rtx (SImode); + emit_move_insn (v1, mem); + if (i < 2) + emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7))); + else + emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9))); + emit_insn (gen_andsi3 (v2, v2, mask)); + emit_insn (gen_iorsi3 (v2, v2, v1)); + emit_move_insn (mem, v2); + } +#ifdef CLEAR_INSN_CACHE + tramp = XEXP (tramp, 0); + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"), + LCT_NORMAL, VOIDmode, 2, tramp, Pmode, + plus_constant (Pmode, tramp, TRAMPOLINE_SIZE), + Pmode); +#endif +} + +/* Determine whether c6x_output_mi_thunk can succeed. */ + +static bool +c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta ATTRIBUTE_UNUSED, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + const_tree function ATTRIBUTE_UNUSED) +{ + return !TARGET_LONG_CALLS; +} + +/* Output the assembler code for a thunk function. THUNK is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ + +static void +c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, + tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, tree function) +{ + rtx xops[5]; + /* The this parameter is passed as the first argument. */ + rtx this_rtx = gen_rtx_REG (Pmode, REG_A4); + + c6x_current_insn = NULL_RTX; + + xops[4] = XEXP (DECL_RTL (function), 0); + if (!vcall_offset) + { + output_asm_insn ("b .s2 \t%4", xops); + if (!delta) + output_asm_insn ("nop 5", xops); + } + + /* Adjust the this parameter by a fixed constant. */ + if (delta) + { + xops[0] = GEN_INT (delta); + xops[1] = this_rtx; + if (delta >= -16 && delta <= 15) + { + output_asm_insn ("add .s1 %0, %1, %1", xops); + if (!vcall_offset) + output_asm_insn ("nop 4", xops); + } + else if (delta >= 16 && delta < 32) + { + output_asm_insn ("add .d1 %0, %1, %1", xops); + if (!vcall_offset) + output_asm_insn ("nop 4", xops); + } + else if (delta >= -32768 && delta < 32768) + { + output_asm_insn ("mvk .s1 %0, A0", xops); + output_asm_insn ("add .d1 %1, A0, %1", xops); + if (!vcall_offset) + output_asm_insn ("nop 3", xops); + } + else + { + output_asm_insn ("mvkl .s1 %0, A0", xops); + output_asm_insn ("mvkh .s1 %0, A0", xops); + output_asm_insn ("add .d1 %1, A0, %1", xops); + if (!vcall_offset) + output_asm_insn ("nop 3", xops); + } + } + + /* Adjust the this parameter by a value stored in the vtable. */ + if (vcall_offset) + { + rtx a0tmp = gen_rtx_REG (Pmode, REG_A0); + rtx a3tmp = gen_rtx_REG (Pmode, REG_A3); + + xops[1] = a3tmp; + xops[2] = a0tmp; + xops[3] = gen_rtx_MEM (Pmode, a0tmp); + output_asm_insn ("mv .s1 a4, %2", xops); + output_asm_insn ("ldw .d1t1 %3, %2", xops); + + /* Adjust the this parameter. */ + xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp, + vcall_offset)); + if (!memory_operand (xops[0], Pmode)) + { + rtx tmp2 = gen_rtx_REG (Pmode, REG_A1); + xops[0] = GEN_INT (vcall_offset); + xops[1] = tmp2; + output_asm_insn ("mvkl .s1 %0, %1", xops); + output_asm_insn ("mvkh .s1 %0, %1", xops); + output_asm_insn ("nop 2", xops); + output_asm_insn ("add .d1 %2, %1, %2", xops); + xops[0] = gen_rtx_MEM (Pmode, a0tmp); + } + else + output_asm_insn ("nop 4", xops); + xops[2] = this_rtx; + output_asm_insn ("ldw .d1t1 %0, %1", xops); + output_asm_insn ("|| b .s2 \t%4", xops); + output_asm_insn ("nop 4", xops); + output_asm_insn ("add .d1 %2, %1, %2", xops); + } +} + +/* Return true if EXP goes in small data/bss. */ + +static bool +c6x_in_small_data_p (const_tree exp) +{ + /* We want to merge strings, so we never consider them small data. */ + if (TREE_CODE (exp) == STRING_CST) + return false; + + /* Functions are never small data. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp)) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); + + if (strcmp (section, ".neardata") == 0 + || strncmp (section, ".neardata.", 10) == 0 + || strncmp (section, ".gnu.linkonce.s.", 16) == 0 + || strcmp (section, ".bss") == 0 + || strncmp (section, ".bss.", 5) == 0 + || strncmp (section, ".gnu.linkonce.sb.", 17) == 0 + || strcmp (section, ".rodata") == 0 + || strncmp (section, ".rodata.", 8) == 0 + || strncmp (section, ".gnu.linkonce.s2.", 17) == 0) + return true; + } + else + return PLACE_IN_SDATA_P (exp); + + return false; +} + +/* Return a section for X. The only special thing we do here is to + honor small data. We don't have a tree type, so we can't use the + PLACE_IN_SDATA_P macro we use everywhere else; we choose to place + everything sized 8 bytes or smaller into small data. */ + +static section * +c6x_select_rtx_section (enum machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) +{ + if (c6x_sdata_mode == C6X_SDATA_ALL + || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8)) + /* ??? Consider using mergeable sdata sections. */ + return sdata_section; + else + return default_elf_select_rtx_section (mode, x, align); +} + +static section * +c6x_elf_select_section (tree decl, int reloc, + unsigned HOST_WIDE_INT align) +{ + const char *sname = NULL; + unsigned int flags = SECTION_WRITE; + if (c6x_in_small_data_p (decl)) + { + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_SRODATA: + sname = ".rodata"; + flags = 0; + break; + case SECCAT_SDATA: + sname = ".neardata"; + break; + case SECCAT_SBSS: + sname = ".bss"; + flags |= SECTION_BSS; + default: + break; + } + } + else + { + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_DATA: + sname = ".fardata"; + break; + case SECCAT_DATA_REL: + sname = ".fardata.rel"; + break; + case SECCAT_DATA_REL_LOCAL: + sname = ".fardata.rel.local"; + break; + case SECCAT_DATA_REL_RO: + sname = ".fardata.rel.ro"; + break; + case SECCAT_DATA_REL_RO_LOCAL: + sname = ".fardata.rel.ro.local"; + break; + case SECCAT_BSS: + sname = ".far"; + flags |= SECTION_BSS; + break; + case SECCAT_RODATA: + sname = ".const"; + flags = 0; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + default: + break; + } + } + if (sname) + { + /* We might get called with string constants, but get_named_section + doesn't like them as they are not DECLs. Also, we need to set + flags in that case. */ + if (!DECL_P (decl)) + return get_section (sname, flags, NULL); + return get_named_section (decl, sname, reloc); + } + + return default_elf_select_section (decl, reloc, align); +} + +/* Build up a unique section name, expressed as a + STRING_CST node, and assign it to DECL_SECTION_NAME (decl). + RELOC indicates whether the initial value of EXP requires + link-time relocations. */ + +static void ATTRIBUTE_UNUSED +c6x_elf_unique_section (tree decl, int reloc) +{ + const char *prefix = NULL; + /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ + bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; + + if (c6x_in_small_data_p (decl)) + { + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_SDATA: + prefix = one_only ? ".s" : ".neardata"; + break; + case SECCAT_SBSS: + prefix = one_only ? ".sb" : ".bss"; + break; + case SECCAT_SRODATA: + prefix = one_only ? ".s2" : ".rodata"; + break; + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + case SECCAT_RODATA: + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + gcc_unreachable (); + default: + /* Everything else we place into default sections and hope for the + best. */ + break; + } + } + else + { + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + prefix = one_only ? ".fd" : ".fardata"; + break; + case SECCAT_BSS: + prefix = one_only ? ".fb" : ".far"; + break; + case SECCAT_RODATA: + case SECCAT_RODATA_MERGE_STR: + case SECCAT_RODATA_MERGE_STR_INIT: + case SECCAT_RODATA_MERGE_CONST: + prefix = one_only ? ".fr" : ".const"; + break; + case SECCAT_SRODATA: + case SECCAT_SDATA: + case SECCAT_SBSS: + gcc_unreachable (); + default: + break; + } + } + + if (prefix) + { + const char *name, *linkonce; + char *string; + + name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); + name = targetm.strip_name_encoding (name); + + /* If we're using one_only, then there needs to be a .gnu.linkonce + prefix to the section name. */ + linkonce = one_only ? ".gnu.linkonce" : ""; + + string = ACONCAT ((linkonce, prefix, ".", name, NULL)); + + DECL_SECTION_NAME (decl) = build_string (strlen (string), string); + return; + } + default_unique_section (decl, reloc); +} + +static unsigned int +c6x_section_type_flags (tree decl, const char *name, int reloc) +{ + unsigned int flags = 0; + + if (strcmp (name, ".far") == 0 + || strncmp (name, ".far.", 5) == 0) + flags |= SECTION_BSS; + + flags |= default_section_type_flags (decl, name, reloc); + + return flags; +} + +/* Checks whether the given CALL_EXPR would use a caller saved + register. This is used to decide whether sibling call optimization + could be performed on the respective function call. */ + +static bool +c6x_call_saved_register_used (tree call_expr) +{ + CUMULATIVE_ARGS cum_v; + cumulative_args_t cum; + HARD_REG_SET call_saved_regset; + tree parameter; + enum machine_mode mode; + tree type; + rtx parm_rtx; + int i; + + INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0); + cum = pack_cumulative_args (&cum_v); + + COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set); + for (i = 0; i < call_expr_nargs (call_expr); i++) + { + parameter = CALL_EXPR_ARG (call_expr, i); + gcc_assert (parameter); + + /* For an undeclared variable passed as parameter we will get + an ERROR_MARK node here. */ + if (TREE_CODE (parameter) == ERROR_MARK) + return true; + + type = TREE_TYPE (parameter); + gcc_assert (type); + + mode = TYPE_MODE (type); + gcc_assert (mode); + + if (pass_by_reference (&cum_v, mode, type, true)) + { + mode = Pmode; + type = build_pointer_type (type); + } + + parm_rtx = c6x_function_arg (cum, mode, type, 0); + + c6x_function_arg_advance (cum, mode, type, 0); + + if (!parm_rtx) + continue; + + if (REG_P (parm_rtx) + && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx), + REGNO (parm_rtx))) + return true; + if (GET_CODE (parm_rtx) == PARALLEL) + { + int n = XVECLEN (parm_rtx, 0); + while (n-- > 0) + { + rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0); + if (REG_P (x) + && overlaps_hard_reg_set_p (call_saved_regset, + GET_MODE (x), REGNO (x))) + return true; + } + } + } + return false; +} + +/* Decide whether we can make a sibling call to a function. DECL is the + declaration of the function being targeted by the call and EXP is the + CALL_EXPR representing the call. */ + +static bool +c6x_function_ok_for_sibcall (tree decl, tree exp) +{ + /* Registers A10, A12, B10 and B12 are available as arguments + register but unfortunately caller saved. This makes functions + needing these registers for arguments not suitable for + sibcalls. */ + if (c6x_call_saved_register_used (exp)) + return false; + + if (!flag_pic) + return true; + + if (TARGET_DSBT) + { + /* When compiling for DSBT, the calling function must be local, + so that when we reload B14 in the sibcall epilogue, it will + not change its value. */ + struct cgraph_local_info *this_func; + + if (!decl) + /* Not enough information. */ + return false; + + this_func = cgraph_local_info (current_function_decl); + return this_func->local; + } + + return true; +} + +/* Return true if DECL is known to be linked into section SECTION. */ + +static bool +c6x_function_in_section_p (tree decl, section *section) +{ + /* We can only be certain about functions defined in the same + compilation unit. */ + if (!TREE_STATIC (decl)) + return false; + + /* Make sure that SYMBOL always binds to the definition in this + compilation unit. */ + if (!targetm.binds_local_p (decl)) + return false; + + /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ + if (!DECL_SECTION_NAME (decl)) + { + /* Make sure that we will not create a unique section for DECL. */ + if (flag_function_sections || DECL_ONE_ONLY (decl)) + return false; + } + + return function_section (decl) == section; +} + +/* Return true if a call to OP, which is a SYMBOL_REF, must be expanded + as a long call. */ +bool +c6x_long_call_p (rtx op) +{ + tree decl; + + if (!TARGET_LONG_CALLS) + return false; + + decl = SYMBOL_REF_DECL (op); + + /* Try to determine whether the symbol is in the same section as the current + function. Be conservative, and only cater for cases in which the + whole of the current function is placed in the same section. */ + if (decl != NULL_TREE + && !flag_reorder_blocks_and_partition + && TREE_CODE (decl) == FUNCTION_DECL + && c6x_function_in_section_p (decl, current_function_section ())) + return false; + + return true; +} + +/* Emit the sequence for a call. */ +void +c6x_expand_call (rtx retval, rtx address, bool sibcall) +{ + rtx callee = XEXP (address, 0); + rtx call_insn; + + if (!c6x_call_operand (callee, Pmode)) + { + callee = force_reg (Pmode, callee); + address = change_address (address, Pmode, callee); + } + call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx); + if (sibcall) + { + call_insn = emit_call_insn (call_insn); + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), + gen_rtx_REG (Pmode, REG_B3)); + } + else + { + if (retval == NULL_RTX) + call_insn = emit_call_insn (call_insn); + else + call_insn = emit_call_insn (gen_rtx_SET (GET_MODE (retval), retval, + call_insn)); + } + if (flag_pic) + use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx); +} + +/* Legitimize PIC addresses. If the address is already position-independent, + we return ORIG. Newly generated position-independent addresses go into a + reg. This is REG if nonzero, otherwise we allocate register(s) as + necessary. PICREG is the register holding the pointer to the PIC offset + table. */ + +static rtx +legitimize_pic_address (rtx orig, rtx reg, rtx picreg) +{ + rtx addr = orig; + rtx new_rtx = orig; + + if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF) + { + int unspec = UNSPEC_LOAD_GOT; + rtx tmp; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + if (flag_pic == 2) + { + if (can_create_pseudo_p ()) + tmp = gen_reg_rtx (Pmode); + else + tmp = reg; + emit_insn (gen_movsi_gotoff_high (tmp, addr)); + emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr)); + emit_insn (gen_load_got_gotoff (reg, picreg, tmp)); + } + else + { + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec); + new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp)); + + emit_move_insn (reg, new_rtx); + } + if (picreg == pic_offset_table_rtx) + crtl->uses_pic_offset_table = 1; + return reg; + } + + else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS) + { + rtx base; + + if (GET_CODE (addr) == CONST) + { + addr = XEXP (addr, 0); + gcc_assert (GET_CODE (addr) == PLUS); + } + + if (XEXP (addr, 0) == picreg) + return orig; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + base = legitimize_pic_address (XEXP (addr, 0), reg, picreg); + addr = legitimize_pic_address (XEXP (addr, 1), + base == reg ? NULL_RTX : reg, + picreg); + + if (GET_CODE (addr) == CONST_INT) + { + gcc_assert (! reload_in_progress && ! reload_completed); + addr = force_reg (Pmode, addr); + } + + if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1))) + { + base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0)); + addr = XEXP (addr, 1); + } + + return gen_rtx_PLUS (Pmode, base, addr); + } + + return new_rtx; +} + +/* Expand a move operation in mode MODE. The operands are in OPERANDS. + Returns true if no further code must be generated, false if the caller + should generate an insn to move OPERANDS[1] to OPERANDS[0]. */ + +bool +expand_move (rtx *operands, enum machine_mode mode) +{ + rtx dest = operands[0]; + rtx op = operands[1]; + + if ((reload_in_progress | reload_completed) == 0 + && GET_CODE (dest) == MEM && GET_CODE (op) != REG) + operands[1] = force_reg (mode, op); + else if (mode == SImode && symbolic_operand (op, SImode)) + { + if (flag_pic) + { + if (sdata_symbolic_operand (op, SImode)) + { + emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op)); + crtl->uses_pic_offset_table = 1; + return true; + } + else + { + rtx temp = (reload_completed || reload_in_progress + ? dest : gen_reg_rtx (Pmode)); + + operands[1] = legitimize_pic_address (op, temp, + pic_offset_table_rtx); + } + } + else if (reload_completed + && !sdata_symbolic_operand (op, SImode)) + { + emit_insn (gen_movsi_high (dest, op)); + emit_insn (gen_movsi_lo_sum (dest, dest, op)); + return true; + } + } + return false; +} + +/* This function is called when we're about to expand an integer compare + operation which performs COMPARISON. It examines the second operand, + and if it is an integer constant that cannot be used directly on the + current machine in a comparison insn, it returns true. */ +bool +c6x_force_op_for_comparison_p (enum rtx_code code, rtx op) +{ + if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op)) + return false; + + if ((code == EQ || code == LT || code == GT) + && !satisfies_constraint_Is5 (op)) + return true; + if ((code == GTU || code == LTU) + && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op))) + return true; + + return false; +} + +/* Emit comparison instruction if necessary, returning the expression + that holds the compare result in the proper mode. Return the comparison + that should be used in the jump insn. */ + +rtx +c6x_expand_compare (rtx comparison, enum machine_mode mode) +{ + enum rtx_code code = GET_CODE (comparison); + rtx op0 = XEXP (comparison, 0); + rtx op1 = XEXP (comparison, 1); + rtx cmp; + enum rtx_code jump_code = code; + enum machine_mode op_mode = GET_MODE (op0); + + if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx) + { + rtx t = gen_reg_rtx (SImode); + emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0), + gen_highpart (SImode, op0))); + op_mode = SImode; + cmp = t; + } + else if (op_mode == DImode) + { + rtx lo[2], high[2]; + rtx cmp1, cmp2; + + if (code == NE || code == GEU || code == LEU || code == GE || code == LE) + { + code = reverse_condition (code); + jump_code = EQ; + } + else + jump_code = NE; + + split_di (&op0, 1, lo, high); + split_di (&op1, 1, lo + 1, high + 1); + + if (c6x_force_op_for_comparison_p (code, high[1]) + || c6x_force_op_for_comparison_p (EQ, high[1])) + high[1] = force_reg (SImode, high[1]); + + cmp1 = gen_reg_rtx (SImode); + cmp2 = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (VOIDmode, cmp1, + gen_rtx_fmt_ee (code, SImode, high[0], high[1]))); + if (code == EQ) + { + if (c6x_force_op_for_comparison_p (code, lo[1])) + lo[1] = force_reg (SImode, lo[1]); + emit_insn (gen_rtx_SET (VOIDmode, cmp2, + gen_rtx_fmt_ee (code, SImode, lo[0], lo[1]))); + emit_insn (gen_andsi3 (cmp1, cmp1, cmp2)); + } + else + { + emit_insn (gen_rtx_SET (VOIDmode, cmp2, + gen_rtx_EQ (SImode, high[0], high[1]))); + if (code == GT) + code = GTU; + else if (code == LT) + code = LTU; + if (c6x_force_op_for_comparison_p (code, lo[1])) + lo[1] = force_reg (SImode, lo[1]); + emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode, + lo[0], lo[1]), + lo[0], lo[1], cmp2)); + emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2)); + } + cmp = cmp1; + } + else if (TARGET_FP && !flag_finite_math_only + && (op_mode == DFmode || op_mode == SFmode) + && code != EQ && code != NE && code != LT && code != GT + && code != UNLE && code != UNGE) + { + enum rtx_code code1, code2, code3; + rtx (*fn) (rtx, rtx, rtx, rtx, rtx); + + jump_code = NE; + code3 = UNKNOWN; + switch (code) + { + case UNLT: + case UNGT: + jump_code = EQ; + /* fall through */ + case LE: + case GE: + code1 = code == LE || code == UNGT ? LT : GT; + code2 = EQ; + break; + + case UNORDERED: + jump_code = EQ; + /* fall through */ + case ORDERED: + code3 = EQ; + /* fall through */ + case LTGT: + code1 = LT; + code2 = GT; + break; + + case UNEQ: + code1 = LT; + code2 = GT; + jump_code = EQ; + break; + + default: + gcc_unreachable (); + } + + cmp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (VOIDmode, cmp, + gen_rtx_fmt_ee (code1, SImode, op0, op1))); + fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior; + emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1), + op0, op1, cmp)); + if (code3 != UNKNOWN) + emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1), + op0, op1, cmp)); + } + else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx) + cmp = op0; + else + { + bool is_fp_libfunc; + is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode); + + if ((code == NE || code == GEU || code == LEU || code == GE || code == LE) + && !is_fp_libfunc) + { + code = reverse_condition (code); + jump_code = EQ; + } + else if (code == UNGE) + { + code = LT; + jump_code = EQ; + } + else if (code == UNLE) + { + code = GT; + jump_code = EQ; + } + else + jump_code = NE; + + if (is_fp_libfunc) + { + rtx insns; + rtx libfunc; + switch (code) + { + case EQ: + libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc; + break; + case NE: + libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc; + break; + case GT: + libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc; + break; + case GE: + libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc; + break; + case LT: + libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc; + break; + case LE: + libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc; + break; + default: + gcc_unreachable (); + } + start_sequence (); + + cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2, + op0, op_mode, op1, op_mode); + insns = get_insns (); + end_sequence (); + + emit_libcall_block (insns, cmp, cmp, + gen_rtx_fmt_ee (code, SImode, op0, op1)); + } + else + { + cmp = gen_reg_rtx (SImode); + if (c6x_force_op_for_comparison_p (code, op1)) + op1 = force_reg (SImode, op1); + emit_insn (gen_rtx_SET (VOIDmode, cmp, + gen_rtx_fmt_ee (code, SImode, op0, op1))); + } + } + + return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx); +} + +/* Return one word of double-word value OP. HIGH_P is true to select the + high part, false to select the low part. When encountering auto-increment + addressing, we make the assumption that the low part is going to be accessed + first. */ + +rtx +c6x_subword (rtx op, bool high_p) +{ + unsigned int byte; + enum machine_mode mode; + + mode = GET_MODE (op); + if (mode == VOIDmode) + mode = DImode; + + if (TARGET_BIG_ENDIAN ? !high_p : high_p) + byte = UNITS_PER_WORD; + else + byte = 0; + + if (MEM_P (op)) + { + rtx addr = XEXP (op, 0); + if (GET_CODE (addr) == PLUS || REG_P (addr)) + return adjust_address (op, word_mode, byte); + /* FIXME: should really support autoincrement addressing for + multi-word modes. */ + gcc_unreachable (); + } + + return simplify_gen_subreg (word_mode, op, mode, byte); +} + +/* Split one or more DImode RTL references into pairs of SImode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) +{ + while (num--) + { + rtx op = operands[num]; + + lo_half[num] = c6x_subword (op, false); + hi_half[num] = c6x_subword (op, true); + } +} + +/* Return true if VAL is a mask valid for a clr instruction. */ +bool +c6x_valid_mask_p (HOST_WIDE_INT val) +{ + int i; + for (i = 0; i < 32; i++) + if (!(val & ((unsigned HOST_WIDE_INT)1 << i))) + break; + for (; i < 32; i++) + if (val & ((unsigned HOST_WIDE_INT)1 << i)) + break; + for (; i < 32; i++) + if (!(val & ((unsigned HOST_WIDE_INT)1 << i))) + return false; + return true; +} + +/* Expand a block move for a movmemM pattern. */ + +bool +c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, + rtx expected_align_exp ATTRIBUTE_UNUSED, + rtx expected_size_exp ATTRIBUTE_UNUSED) +{ + unsigned HOST_WIDE_INT align = 1; + unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align; + unsigned HOST_WIDE_INT count = 0, offset = 0; + unsigned int biggest_move = TARGET_STDW ? 8 : 4; + + if (CONST_INT_P (align_exp)) + align = INTVAL (align_exp); + + src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT; + dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT; + min_mem_align = MIN (src_mem_align, dst_mem_align); + + if (min_mem_align > align) + align = min_mem_align / BITS_PER_UNIT; + if (src_mem_align < align) + src_mem_align = align; + if (dst_mem_align < align) + dst_mem_align = align; + + if (CONST_INT_P (count_exp)) + count = INTVAL (count_exp); + else + return false; + + /* Make sure we don't need to care about overflow later on. */ + if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) + return false; + + if (count >= 28 && (count & 3) == 0 && align >= 4) + { + tree dst_expr = MEM_EXPR (dst); + tree src_expr = MEM_EXPR (src); + rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc; + rtx srcreg = force_reg (Pmode, XEXP (src, 0)); + rtx dstreg = force_reg (Pmode, XEXP (dst, 0)); + + if (src_expr) + mark_addressable (src_expr); + if (dst_expr) + mark_addressable (dst_expr); + emit_library_call (fn, LCT_NORMAL, VOIDmode, 3, + dstreg, Pmode, srcreg, Pmode, count_exp, SImode); + return true; + } + + if (biggest_move > align && !TARGET_INSNS_64) + biggest_move = align; + + if (count / biggest_move > 7) + return false; + + while (count > 0) + { + rtx reg, reg_lowpart; + enum machine_mode srcmode, dstmode; + unsigned HOST_WIDE_INT src_size, dst_size, src_left; + int shift; + rtx srcmem, dstmem; + + while (biggest_move > count) + biggest_move /= 2; + + src_size = dst_size = biggest_move; + if (src_size > src_mem_align && src_size == 2) + src_size = 1; + if (dst_size > dst_mem_align && dst_size == 2) + dst_size = 1; + + if (dst_size > src_size) + dst_size = src_size; + + srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0); + dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0); + if (src_size >= 4) + reg_lowpart = reg = gen_reg_rtx (srcmode); + else + { + reg = gen_reg_rtx (SImode); + reg_lowpart = gen_lowpart (srcmode, reg); + } + + srcmem = adjust_address (copy_rtx (src), srcmode, offset); + + if (src_size > src_mem_align) + { + enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi + : CODE_FOR_movmisaligndi); + emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem)); + } + else + emit_move_insn (reg_lowpart, srcmem); + + src_left = src_size; + shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT : 0; + while (src_left > 0) + { + rtx dstreg = reg_lowpart; + + if (src_size > dst_size) + { + rtx srcword = reg; + int shift_amount = shift & (BITS_PER_WORD - 1); + if (src_size > 4) + srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4, + SImode); + if (shift_amount > 0) + { + dstreg = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (dstreg, srcword, + GEN_INT (shift_amount))); + } + else + dstreg = srcword; + dstreg = gen_lowpart (dstmode, dstreg); + } + + dstmem = adjust_address (copy_rtx (dst), dstmode, offset); + if (dst_size > dst_mem_align) + { + enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi + : CODE_FOR_movmisaligndi); + emit_insn (GEN_FCN (icode) (dstmem, dstreg)); + } + else + emit_move_insn (dstmem, dstreg); + + if (TARGET_BIG_ENDIAN) + shift -= dst_size * BITS_PER_UNIT; + else + shift += dst_size * BITS_PER_UNIT; + offset += dst_size; + src_left -= dst_size; + } + count -= src_size; + } + return true; +} + +/* Subroutine of print_address_operand, print a single address offset OFF for + a memory access of mode MEM_MODE, choosing between normal form and scaled + form depending on the type of the insn. Misaligned memory references must + use the scaled form. */ + +static void +print_address_offset (FILE *file, rtx off, enum machine_mode mem_mode) +{ + rtx pat; + + if (c6x_current_insn != NULL_RTX) + { + pat = PATTERN (c6x_current_insn); + if (GET_CODE (pat) == COND_EXEC) + pat = COND_EXEC_CODE (pat); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + + if (GET_CODE (pat) == SET + && GET_CODE (SET_SRC (pat)) == UNSPEC + && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS) + { + gcc_assert (CONST_INT_P (off) + && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0); + fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]", + INTVAL (off) / GET_MODE_SIZE (mem_mode)); + return; + } + } + fputs ("(", file); + output_address (off); + fputs (")", file); +} + +static bool +c6x_print_operand_punct_valid_p (unsigned char c) +{ + return c == '$' || c == '.' || c == '|'; +} + +static void c6x_print_operand (FILE *, rtx, int); + +/* Subroutine of c6x_print_operand; used to print a memory reference X to FILE. */ + +static void +c6x_print_address_operand (FILE *file, rtx x, enum machine_mode mem_mode) +{ + rtx off; + switch (GET_CODE (x)) + { + case PRE_MODIFY: + case POST_MODIFY: + if (GET_CODE (x) == POST_MODIFY) + output_address (XEXP (x, 0)); + off = XEXP (XEXP (x, 1), 1); + if (XEXP (x, 0) == stack_pointer_rtx) + { + if (GET_CODE (x) == PRE_MODIFY) + gcc_assert (INTVAL (off) > 0); + else + gcc_assert (INTVAL (off) < 0); + } + if (CONST_INT_P (off) && INTVAL (off) < 0) + { + fprintf (file, "--"); + off = GEN_INT (-INTVAL (off)); + } + else + fprintf (file, "++"); + if (GET_CODE (x) == PRE_MODIFY) + output_address (XEXP (x, 0)); + print_address_offset (file, off, mem_mode); + break; + + case PLUS: + off = XEXP (x, 1); + if (CONST_INT_P (off) && INTVAL (off) < 0) + { + fprintf (file, "-"); + off = GEN_INT (-INTVAL (off)); + } + else + fprintf (file, "+"); + output_address (XEXP (x, 0)); + print_address_offset (file, off, mem_mode); + break; + + case PRE_DEC: + gcc_assert (XEXP (x, 0) != stack_pointer_rtx); + fprintf (file, "--"); + output_address (XEXP (x, 0)); + fprintf (file, "[1]"); + break; + case PRE_INC: + fprintf (file, "++"); + output_address (XEXP (x, 0)); + fprintf (file, "[1]"); + break; + case POST_INC: + gcc_assert (XEXP (x, 0) != stack_pointer_rtx); + output_address (XEXP (x, 0)); + fprintf (file, "++[1]"); + break; + case POST_DEC: + output_address (XEXP (x, 0)); + fprintf (file, "--[1]"); + break; + + case SYMBOL_REF: + case CONST: + case LABEL_REF: + gcc_assert (sdata_symbolic_operand (x, Pmode)); + fprintf (file, "+B14("); + output_addr_const (file, x); + fprintf (file, ")"); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + case UNSPEC_LOAD_GOT: + fputs ("$GOT(", file); + output_addr_const (file, XVECEXP (x, 0, 0)); + fputs (")", file); + break; + case UNSPEC_LOAD_SDATA: + output_addr_const (file, XVECEXP (x, 0, 0)); + break; + default: + gcc_unreachable (); + } + break; + + default: + gcc_assert (GET_CODE (x) != MEM); + c6x_print_operand (file, x, 0); + break; + } +} + +/* Return a single character, which is either 'l', 's', 'd' or 'm', which + specifies the functional unit used by INSN. */ + +char +c6x_get_unit_specifier (rtx insn) +{ + enum attr_units units; + + if (insn_info.exists ()) + { + int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation; + return c6x_unit_names[unit][0]; + } + + units = get_attr_units (insn); + switch (units) + { + case UNITS_D: + case UNITS_DL: + case UNITS_DS: + case UNITS_DLS: + case UNITS_D_ADDR: + return 'd'; + break; + case UNITS_L: + case UNITS_LS: + return 'l'; + break; + case UNITS_S: + return 's'; + break; + case UNITS_M: + return 'm'; + break; + default: + gcc_unreachable (); + } +} + +/* Prints the unit specifier field. */ +static void +c6x_print_unit_specifier_field (FILE *file, rtx insn) +{ + enum attr_units units = get_attr_units (insn); + enum attr_cross cross = get_attr_cross (insn); + enum attr_dest_regfile rf = get_attr_dest_regfile (insn); + int half; + char unitspec; + + if (units == UNITS_D_ADDR) + { + enum attr_addr_regfile arf = get_attr_addr_regfile (insn); + int t_half; + gcc_assert (arf != ADDR_REGFILE_UNKNOWN); + half = arf == ADDR_REGFILE_A ? 1 : 2; + t_half = rf == DEST_REGFILE_A ? 1 : 2; + fprintf (file, ".d%dt%d", half, t_half); + return; + } + + if (insn_info.exists ()) + { + int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation; + fputs (".", file); + fputs (c6x_unit_names[unit], file); + if (cross == CROSS_Y) + fputs ("x", file); + return; + } + + gcc_assert (rf != DEST_REGFILE_UNKNOWN); + unitspec = c6x_get_unit_specifier (insn); + half = rf == DEST_REGFILE_A ? 1 : 2; + fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : ""); +} + +/* Output assembly language output for the address ADDR to FILE. */ +static void +c6x_print_operand_address (FILE *file, rtx addr) +{ + c6x_print_address_operand (file, addr, VOIDmode); +} + +/* Print an operand, X, to FILE, with an optional modifier in CODE. + + Meaning of CODE: + $ -- print the unit specifier field for the instruction. + . -- print the predicate for the instruction or an emptry string for an + unconditional one. + | -- print "||" if the insn should be issued in parallel with the previous + one. + + C -- print an opcode suffix for a reversed condition + d -- H, W or D as a suffix for ADDA, based on the factor given by the + operand + D -- print either B, H, W or D as a suffix for ADDA, based on the size of + the operand + J -- print a predicate + j -- like J, but use reverse predicate + k -- treat a CONST_INT as a register number and print it as a register + k -- like k, but print out a doubleword register + n -- print an integer operand, negated + p -- print the low part of a DImode register + P -- print the high part of a DImode register + r -- print the absolute value of an integer operand, shifted right by 1 + R -- print the absolute value of an integer operand, shifted right by 2 + f -- the first clear bit in an integer operand assumed to be a mask for + a clr instruction + F -- the last clear bit in such a mask + s -- the first set bit in an integer operand assumed to be a mask for + a set instruction + S -- the last set bit in such a mask + U -- print either 1 or 2, depending on the side of the machine used by + the operand */ + +static void +c6x_print_operand (FILE *file, rtx x, int code) +{ + int i; + HOST_WIDE_INT v; + tree t; + enum machine_mode mode; + + if (code == '|') + { + if (GET_MODE (c6x_current_insn) != TImode) + fputs ("||", file); + return; + } + if (code == '$') + { + c6x_print_unit_specifier_field (file, c6x_current_insn); + return; + } + + if (code == '.') + { + x = current_insn_predicate; + if (x) + { + unsigned int regno = REGNO (XEXP (x, 0)); + fputs ("[", file); + if (GET_CODE (x) == EQ) + fputs ("!", file); + fputs (reg_names [regno], file); + fputs ("]", file); + } + return; + } + + mode = GET_MODE (x); + + switch (code) + { + case 'C': + case 'c': + { + enum rtx_code c = GET_CODE (x); + if (code == 'C') + c = swap_condition (c); + fputs (GET_RTX_NAME (c), file); + } + return; + + case 'J': + case 'j': + { + unsigned int regno = REGNO (XEXP (x, 0)); + if ((GET_CODE (x) == EQ) == (code == 'J')) + fputs ("!", file); + fputs (reg_names [regno], file); + } + return; + + case 'k': + gcc_assert (GET_CODE (x) == CONST_INT); + v = INTVAL (x); + fprintf (file, "%s", reg_names[v]); + return; + case 'K': + gcc_assert (GET_CODE (x) == CONST_INT); + v = INTVAL (x); + gcc_assert ((v & 1) == 0); + fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]); + return; + + case 's': + case 'S': + case 'f': + case 'F': + gcc_assert (GET_CODE (x) == CONST_INT); + v = INTVAL (x); + for (i = 0; i < 32; i++) + { + HOST_WIDE_INT tst = v & 1; + if (((code == 'f' || code == 'F') && !tst) + || ((code == 's' || code == 'S') && tst)) + break; + v >>= 1; + } + if (code == 'f' || code == 's') + { + fprintf (file, "%d", i); + return; + } + for (;i < 32; i++) + { + HOST_WIDE_INT tst = v & 1; + if ((code == 'F' && tst) || (code == 'S' && !tst)) + break; + v >>= 1; + } + fprintf (file, "%d", i - 1); + return; + + case 'n': + gcc_assert (GET_CODE (x) == CONST_INT); + output_addr_const (file, GEN_INT (-INTVAL (x))); + return; + + case 'r': + gcc_assert (GET_CODE (x) == CONST_INT); + v = INTVAL (x); + if (v < 0) + v = -v; + output_addr_const (file, GEN_INT (v >> 1)); + return; + + case 'R': + gcc_assert (GET_CODE (x) == CONST_INT); + v = INTVAL (x); + if (v < 0) + v = -v; + output_addr_const (file, GEN_INT (v >> 2)); + return; + + case 'd': + gcc_assert (GET_CODE (x) == CONST_INT); + v = INTVAL (x); + fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file); + return; + + case 'p': + case 'P': + gcc_assert (GET_CODE (x) == REG); + v = REGNO (x); + if (code == 'P') + v++; + fputs (reg_names[v], file); + return; + + case 'D': + v = 0; + if (GET_CODE (x) == CONST) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == PLUS); + gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT); + v = INTVAL (XEXP (x, 1)); + x = XEXP (x, 0); + + } + gcc_assert (GET_CODE (x) == SYMBOL_REF); + + t = SYMBOL_REF_DECL (x); + if (DECL_P (t)) + v |= DECL_ALIGN_UNIT (t); + else + v |= TYPE_ALIGN_UNIT (TREE_TYPE (t)); + if (v & 1) + fputs ("b", file); + else if (v & 2) + fputs ("h", file); + else + fputs ("w", file); + return; + + case 'U': + if (MEM_P (x)) + { + x = XEXP (x, 0); + if (GET_CODE (x) == PLUS + || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC) + x = XEXP (x, 0); + if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF) + { + gcc_assert (sdata_symbolic_operand (x, Pmode)); + fputs ("2", file); + return; + } + } + gcc_assert (REG_P (x)); + if (A_REGNO_P (REGNO (x))) + fputs ("1", file); + if (B_REGNO_P (REGNO (x))) + fputs ("2", file); + return; + + default: + switch (GET_CODE (x)) + { + case REG: + if (GET_MODE_SIZE (mode) == 8) + fprintf (file, "%s:%s", reg_names[REGNO (x) + 1], + reg_names[REGNO (x)]); + else + fprintf (file, "%s", reg_names[REGNO (x)]); + break; + + case MEM: + fputc ('*', file); + gcc_assert (XEXP (x, 0) != stack_pointer_rtx); + c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x)); + break; + + case SYMBOL_REF: + fputc ('(', file); + output_addr_const (file, x); + fputc (')', file); + break; + + case CONST_INT: + output_addr_const (file, x); + break; + + case CONST_DOUBLE: + output_operand_lossage ("invalid const_double operand"); + break; + + default: + output_addr_const (file, x); + } + } +} + +/* Return TRUE if OP is a valid memory address with a base register of + class C. If SMALL_OFFSET is true, we disallow memory references which would + require a long offset with B14/B15. */ + +bool +c6x_mem_operand (rtx op, enum reg_class c, bool small_offset) +{ + enum machine_mode mode = GET_MODE (op); + rtx base = XEXP (op, 0); + switch (GET_CODE (base)) + { + case REG: + break; + case PLUS: + if (small_offset + && (XEXP (base, 0) == stack_pointer_rtx + || XEXP (base, 0) == pic_offset_table_rtx)) + { + if (!c6x_legitimate_address_p_1 (mode, base, true, true)) + return false; + } + + /* fall through */ + case PRE_INC: + case PRE_DEC: + case PRE_MODIFY: + case POST_INC: + case POST_DEC: + case POST_MODIFY: + base = XEXP (base, 0); + break; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + gcc_assert (sdata_symbolic_operand (base, Pmode)); + return !small_offset && c == B_REGS; + + default: + return false; + } + return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base)); +} + +/* Returns true if X is a valid address for use in a memory reference + of mode MODE. If STRICT is true, we do not allow pseudo registers + in the address. NO_LARGE_OFFSET is true if we are examining an + address for use in a load or store misaligned instruction, or + recursively examining an operand inside a PRE/POST_MODIFY. */ + +bool +c6x_legitimate_address_p_1 (enum machine_mode mode, rtx x, bool strict, + bool no_large_offset) +{ + int size, size1; + HOST_WIDE_INT off; + enum rtx_code code = GET_CODE (x); + + switch (code) + { + case PRE_MODIFY: + case POST_MODIFY: + /* We can't split these into word-sized pieces yet. */ + if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return false; + if (GET_CODE (XEXP (x, 1)) != PLUS) + return false; + if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true)) + return false; + if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0))) + return false; + + /* fall through */ + case PRE_INC: + case PRE_DEC: + case POST_INC: + case POST_DEC: + /* We can't split these into word-sized pieces yet. */ + if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return false; + x = XEXP (x, 0); + if (!REG_P (x)) + return false; + + /* fall through */ + case REG: + if (strict) + return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x)); + else + return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x)); + + case PLUS: + if (!REG_P (XEXP (x, 0)) + || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false)) + return false; + /* We cannot ensure currently that both registers end up in the + same register file. */ + if (REG_P (XEXP (x, 1))) + return false; + + if (mode == BLKmode) + size = 4; + else if (mode == VOIDmode) + /* ??? This can happen during ivopts. */ + size = 1; + else + size = GET_MODE_SIZE (mode); + + if (flag_pic + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA + && XEXP (x, 0) == pic_offset_table_rtx + && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode)) + return !no_large_offset && size <= 4; + if (flag_pic == 1 + && mode == Pmode + && GET_CODE (XEXP (x, 1)) == UNSPEC + && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT + && XEXP (x, 0) == pic_offset_table_rtx + && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF + || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF)) + return !no_large_offset; + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return false; + + off = INTVAL (XEXP (x, 1)); + + /* If the machine does not have doubleword load/stores, we'll use + word size accesses. */ + size1 = size; + if (size == 2 * UNITS_PER_WORD && !TARGET_STDW) + size = UNITS_PER_WORD; + + if (((HOST_WIDE_INT)size1 - 1) & off) + return false; + off /= size; + if (off > -32 && off < (size1 == size ? 32 : 28)) + return true; + if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx + || size1 > UNITS_PER_WORD) + return false; + return off >= 0 && off < 32768; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return (!no_large_offset + /* With -fpic, we must wrap it in an unspec to show the B14 + dependency. */ + && !flag_pic + && GET_MODE_SIZE (mode) <= UNITS_PER_WORD + && sdata_symbolic_operand (x, Pmode)); + + default: + return false; + } +} + +static bool +c6x_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + return c6x_legitimate_address_p_1 (mode, x, strict, false); +} + +static bool +c6x_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED) +{ + return true; +} + +/* Implements TARGET_PREFERRED_RENAME_CLASS. */ +static reg_class_t +c6x_preferred_rename_class (reg_class_t cl) +{ + if (cl == A_REGS) + return NONPREDICATE_A_REGS; + if (cl == B_REGS) + return NONPREDICATE_B_REGS; + if (cl == ALL_REGS || cl == GENERAL_REGS) + return NONPREDICATE_REGS; + return NO_REGS; +} + +/* Implements FINAL_PRESCAN_INSN. */ +void +c6x_final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED, + int noperands ATTRIBUTE_UNUSED) +{ + c6x_current_insn = insn; +} + +/* A structure to describe the stack layout of a function. The layout is + as follows: + + [saved frame pointer (or possibly padding0)] + --> incoming stack pointer, new hard frame pointer + [saved call-used regs] + [optional padding1] + --> soft frame pointer + [frame] + [outgoing arguments] + [optional padding2] + + The structure members are laid out in this order. */ + +struct c6x_frame +{ + int padding0; + /* Number of registers to save. */ + int nregs; + int padding1; + HOST_WIDE_INT frame; + int outgoing_arguments_size; + int padding2; + + HOST_WIDE_INT to_allocate; + /* The offsets relative to the incoming stack pointer (which + becomes HARD_FRAME_POINTER). */ + HOST_WIDE_INT frame_pointer_offset; + HOST_WIDE_INT b3_offset; + + /* True if we should call push_rts/pop_rts to save and restore + registers. */ + bool push_rts; +}; + +/* Return true if we need to save and modify the PIC register in the + prologue. */ + +static bool +must_reload_pic_reg_p (void) +{ + struct cgraph_local_info *i = NULL; + + if (!TARGET_DSBT) + return false; + + i = cgraph_local_info (current_function_decl); + + if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local) + return true; + return false; +} + +/* Return 1 if we need to save REGNO. */ +static int +c6x_save_reg (unsigned int regno) +{ + return ((df_regs_ever_live_p (regno) + && !call_used_regs[regno] + && !fixed_regs[regno]) + || (regno == RETURN_ADDR_REGNO + && (df_regs_ever_live_p (regno) + || !crtl->is_leaf)) + || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ())); +} + +/* Examine the number of regs NREGS we've determined we must save. + Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for + prologue and epilogue. */ + +static bool +use_push_rts_p (int nregs) +{ + if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun) + && !cfun->machine->contains_sibcall + && !cfun->returns_struct + && !TARGET_LONG_CALLS + && nregs >= 6 && !frame_pointer_needed) + return true; + return false; +} + +/* Return number of saved general prupose registers. */ + +int +c6x_nsaved_regs (void) +{ + int nregs = 0; + int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (c6x_save_reg (regno)) + nregs++; + return nregs; +} + +/* The safe debug order mandated by the ABI. */ +static unsigned reg_save_order[] = +{ + REG_A10, REG_A11, REG_A12, REG_A13, + REG_A14, REG_B3, + REG_B10, REG_B11, REG_B12, REG_B13, + REG_B14, REG_A15 +}; + +#define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order) + +/* Compute the layout of the stack frame and store it in FRAME. */ + +static void +c6x_compute_frame_layout (struct c6x_frame *frame) +{ + HOST_WIDE_INT size = get_frame_size (); + HOST_WIDE_INT offset; + int nregs; + + /* We use the four bytes which are technically inside the caller's frame, + usually to save the frame pointer. */ + offset = -4; + frame->padding0 = 0; + nregs = c6x_nsaved_regs (); + frame->push_rts = false; + frame->b3_offset = 0; + if (use_push_rts_p (nregs)) + { + frame->push_rts = true; + frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4; + nregs = 14; + } + else if (c6x_save_reg (REG_B3)) + { + int idx; + for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--) + { + if (c6x_save_reg (reg_save_order[idx])) + frame->b3_offset -= 4; + } + } + frame->nregs = nregs; + + if (size == 0 && nregs == 0) + { + frame->padding0 = 4; + frame->padding1 = frame->padding2 = 0; + frame->frame_pointer_offset = frame->to_allocate = 0; + frame->outgoing_arguments_size = 0; + return; + } + + if (!frame->push_rts) + offset += frame->nregs * 4; + + if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0 + && !crtl->is_leaf) + /* Don't use the bottom of the caller's frame if we have no + allocation of our own and call other functions. */ + frame->padding0 = frame->padding1 = 4; + else if (offset & 4) + frame->padding1 = 4; + else + frame->padding1 = 0; + + offset += frame->padding0 + frame->padding1; + frame->frame_pointer_offset = offset; + offset += size; + + frame->outgoing_arguments_size = crtl->outgoing_args_size; + offset += frame->outgoing_arguments_size; + + if ((offset & 4) == 0) + frame->padding2 = 8; + else + frame->padding2 = 4; + frame->to_allocate = offset + frame->padding2; +} + +/* Return the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +HOST_WIDE_INT +c6x_initial_elimination_offset (int from, int to) +{ + struct c6x_frame frame; + c6x_compute_frame_layout (&frame); + + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return 0; + else if (from == FRAME_POINTER_REGNUM + && to == HARD_FRAME_POINTER_REGNUM) + return -frame.frame_pointer_offset; + else + { + gcc_assert (to == STACK_POINTER_REGNUM); + + if (from == ARG_POINTER_REGNUM) + return frame.to_allocate + (frame.push_rts ? 56 : 0); + + gcc_assert (from == FRAME_POINTER_REGNUM); + return frame.to_allocate - frame.frame_pointer_offset; + } +} + +/* Given FROM and TO register numbers, say whether this elimination is + allowed. Frame pointer elimination is automatically handled. */ + +static bool +c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + if (to == STACK_POINTER_REGNUM) + return !frame_pointer_needed; + return true; +} + +/* Emit insns to increment the stack pointer by OFFSET. If + FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns. + Does nothing if the offset is zero. */ + +static void +emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p) +{ + rtx to_add = GEN_INT (offset); + rtx orig_to_add = to_add; + rtx insn; + + if (offset == 0) + return; + + if (offset < -32768 || offset > 32767) + { + rtx reg = gen_rtx_REG (SImode, REG_A0); + rtx low = GEN_INT (trunc_int_for_mode (offset, HImode)); + + insn = emit_insn (gen_movsi_high (reg, low)); + if (frame_related_p) + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add)); + if (frame_related_p) + RTX_FRAME_RELATED_P (insn) = 1; + to_add = reg; + } + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + to_add)); + if (frame_related_p) + { + if (REG_P (to_add)) + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + orig_to_add))); + + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Prologue and epilogue. */ +void +c6x_expand_prologue (void) +{ + struct c6x_frame frame; + rtx insn, mem; + int nsaved = 0; + HOST_WIDE_INT initial_offset, off, added_already; + + c6x_compute_frame_layout (&frame); + + if (flag_stack_usage_info) + current_function_static_stack_size = frame.to_allocate; + + initial_offset = -frame.to_allocate; + if (frame.push_rts) + { + emit_insn (gen_push_rts ()); + nsaved = frame.nregs; + } + + /* If the offsets would be too large for the memory references we will + create to save registers, do the stack allocation in two parts. + Ensure by subtracting 8 that we don't store to the word pointed to + by the stack pointer. */ + if (initial_offset < -32768) + initial_offset = -frame.frame_pointer_offset - 8; + + if (frame.to_allocate > 0) + gcc_assert (initial_offset != 0); + + off = -initial_offset + 4 - frame.padding0; + + mem = gen_frame_mem (Pmode, stack_pointer_rtx); + + added_already = 0; + if (frame_pointer_needed) + { + rtx fp_reg = gen_rtx_REG (SImode, REG_A15); + /* We go through some contortions here to both follow the ABI's + recommendation that FP == incoming SP, and to avoid writing or + reading the word pointed to by the stack pointer. */ + rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (-8))); + insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg); + RTX_FRAME_RELATED_P (insn) = 1; + nsaved++; + insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx, + GEN_INT (8))); + RTX_FRAME_RELATED_P (insn) = 1; + off -= 4; + added_already = -8; + } + + emit_add_sp_const (initial_offset - added_already, true); + + if (nsaved < frame.nregs) + { + unsigned i; + + for (i = 0; i < N_SAVE_ORDER; i++) + { + int idx = N_SAVE_ORDER - i - 1; + unsigned regno = reg_save_order[idx]; + rtx reg; + enum machine_mode save_mode = SImode; + + if (regno == REG_A15 && frame_pointer_needed) + /* Already saved. */ + continue; + if (!c6x_save_reg (regno)) + continue; + + if (TARGET_STDW && (off & 4) == 0 && off <= 256 + && (regno & 1) == 1 + && i + 1 < N_SAVE_ORDER + && reg_save_order[idx - 1] == regno - 1 + && c6x_save_reg (regno - 1)) + { + save_mode = DImode; + regno--; + i++; + } + reg = gen_rtx_REG (save_mode, regno); + off -= GET_MODE_SIZE (save_mode); + + insn = emit_move_insn (adjust_address (mem, save_mode, off), + reg); + RTX_FRAME_RELATED_P (insn) = 1; + + nsaved += HARD_REGNO_NREGS (regno, save_mode); + } + } + gcc_assert (nsaved == frame.nregs); + emit_add_sp_const (-frame.to_allocate - initial_offset, true); + if (must_reload_pic_reg_p ()) + { + if (dsbt_decl == NULL) + { + tree t; + + t = build_index_type (integer_one_node); + t = build_array_type (integer_type_node, t); + t = build_decl (BUILTINS_LOCATION, VAR_DECL, + get_identifier ("__c6xabi_DSBT_BASE"), t); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_EXTERNAL (t) = 1; + TREE_STATIC (t) = 1; + TREE_PUBLIC (t) = 1; + TREE_USED (t) = 1; + + dsbt_decl = t; + } + emit_insn (gen_setup_dsbt (pic_offset_table_rtx, + XEXP (DECL_RTL (dsbt_decl), 0))); + } +} + +void +c6x_expand_epilogue (bool sibcall) +{ + unsigned i; + struct c6x_frame frame; + rtx mem; + HOST_WIDE_INT off; + int nsaved = 0; + + c6x_compute_frame_layout (&frame); + + mem = gen_frame_mem (Pmode, stack_pointer_rtx); + + /* Insert a dummy set/use of the stack pointer. This creates a + scheduler barrier between the prologue saves and epilogue restores. */ + emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx)); + + /* If the offsets would be too large for the memory references we will + create to restore registers, do a preliminary stack adjustment here. */ + off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1; + if (frame.push_rts) + { + nsaved = frame.nregs; + } + else + { + if (frame.to_allocate > 32768) + { + /* Don't add the entire offset so that we leave an unused word + above the stack pointer. */ + emit_add_sp_const ((off - 16) & ~7, false); + off &= 7; + off += 16; + } + for (i = 0; i < N_SAVE_ORDER; i++) + { + unsigned regno = reg_save_order[i]; + rtx reg; + enum machine_mode save_mode = SImode; + + if (!c6x_save_reg (regno)) + continue; + if (regno == REG_A15 && frame_pointer_needed) + continue; + + if (TARGET_STDW && (off & 4) == 0 && off < 256 + && (regno & 1) == 0 + && i + 1 < N_SAVE_ORDER + && reg_save_order[i + 1] == regno + 1 + && c6x_save_reg (regno + 1)) + { + save_mode = DImode; + i++; + } + reg = gen_rtx_REG (save_mode, regno); + + emit_move_insn (reg, adjust_address (mem, save_mode, off)); + + off += GET_MODE_SIZE (save_mode); + nsaved += HARD_REGNO_NREGS (regno, save_mode); + } + } + if (!frame_pointer_needed) + emit_add_sp_const (off + frame.padding0 - 4, false); + else + { + rtx fp_reg = gen_rtx_REG (SImode, REG_A15); + rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (8))); + emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx, + GEN_INT (-8))); + emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr)); + nsaved++; + } + gcc_assert (nsaved == frame.nregs); + if (!sibcall) + { + if (frame.push_rts) + emit_jump_insn (gen_pop_rts ()); + else + emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode, + RETURN_ADDR_REGNO))); + } +} + +/* Return the value of the return address for the frame COUNT steps up + from the current frame, after the prologue. + We punt for everything but the current frame by returning const0_rtx. */ + +rtx +c6x_return_addr_rtx (int count) +{ + if (count != 0) + return const0_rtx; + + return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO); +} + +/* Return true iff TYPE is one of the shadow types. */ +static bool +shadow_type_p (enum attr_type type) +{ + return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW + || type == TYPE_MULT_SHADOW); +} + +/* Return true iff INSN is a shadow pattern. */ +static bool +shadow_p (rtx insn) +{ + if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0) + return false; + return shadow_type_p (get_attr_type (insn)); +} + +/* Return true iff INSN is a shadow or blockage pattern. */ +static bool +shadow_or_blockage_p (rtx insn) +{ + enum attr_type type; + if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0) + return false; + type = get_attr_type (insn); + return shadow_type_p (type) || type == TYPE_BLOCKAGE; +} + +/* Translate UNITS into a bitmask of units we can reserve for this + insn. */ +static int +get_reservation_flags (enum attr_units units) +{ + switch (units) + { + case UNITS_D: + case UNITS_D_ADDR: + return RESERVATION_FLAG_D; + case UNITS_L: + return RESERVATION_FLAG_L; + case UNITS_S: + return RESERVATION_FLAG_S; + case UNITS_M: + return RESERVATION_FLAG_M; + case UNITS_LS: + return RESERVATION_FLAG_LS; + case UNITS_DL: + return RESERVATION_FLAG_DL; + case UNITS_DS: + return RESERVATION_FLAG_DS; + case UNITS_DLS: + return RESERVATION_FLAG_DLS; + default: + return 0; + } +} + +/* Compute the side of the machine used by INSN, which reserves UNITS. + This must match the reservations in the scheduling description. */ +static int +get_insn_side (rtx insn, enum attr_units units) +{ + if (units == UNITS_D_ADDR) + return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1); + else + { + enum attr_dest_regfile rf = get_attr_dest_regfile (insn); + if (rf == DEST_REGFILE_ANY) + return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1; + else + return rf == DEST_REGFILE_A ? 0 : 1; + } +} + +/* After scheduling, walk the insns between HEAD and END and assign unit + reservations. */ +static void +assign_reservations (rtx head, rtx end) +{ + rtx insn; + for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn)) + { + unsigned int sched_mask, reserved; + rtx within, last; + int pass; + int rsrv[2]; + int rsrv_count[2][4]; + int i; + + if (GET_MODE (insn) != TImode) + continue; + + reserved = 0; + last = NULL_RTX; + /* Find the last insn in the packet. It has a state recorded for it, + which we can use to determine the units we should be using. */ + for (within = insn; + (within != NEXT_INSN (end) + && (within == insn || GET_MODE (within) != TImode)); + within = NEXT_INSN (within)) + { + int icode; + if (!NONDEBUG_INSN_P (within)) + continue; + icode = recog_memoized (within); + if (icode < 0) + continue; + if (shadow_p (within)) + continue; + if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0) + reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation; + last = within; + } + if (last == NULL_RTX) + continue; + + sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask; + sched_mask &= ~reserved; + + memset (rsrv_count, 0, sizeof rsrv_count); + rsrv[0] = rsrv[1] = ~0; + for (i = 0; i < 8; i++) + { + int side = i / 4; + int unit = i & 3; + unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET); + /* Clear the bits which we expect to reserve in the following loop, + leaving the ones set which aren't present in the scheduler's + state and shouldn't be reserved. */ + if (sched_mask & unit_bit) + rsrv[i / 4] &= ~(1 << unit); + } + + /* Walk through the insns that occur in the same cycle. We use multiple + passes to assign units, assigning for insns with the most specific + requirements first. */ + for (pass = 0; pass < 4; pass++) + for (within = insn; + (within != NEXT_INSN (end) + && (within == insn || GET_MODE (within) != TImode)); + within = NEXT_INSN (within)) + { + int uid = INSN_UID (within); + int this_rsrv, side; + int icode; + enum attr_units units; + enum attr_type type; + int j; + + if (!NONDEBUG_INSN_P (within)) + continue; + icode = recog_memoized (within); + if (icode < 0) + continue; + if (INSN_INFO_ENTRY (uid).reservation != 0) + continue; + units = get_attr_units (within); + type = get_attr_type (within); + this_rsrv = get_reservation_flags (units); + if (this_rsrv == 0) + continue; + side = get_insn_side (within, units); + + /* Certain floating point instructions are treated specially. If + an insn can choose between units it can reserve, and its + reservation spans more than one cycle, the reservation contains + special markers in the first cycle to help us reconstruct what + the automaton chose. */ + if ((type == TYPE_ADDDP || type == TYPE_FP4) + && units == UNITS_LS) + { + int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1) + + side * UNIT_QID_SIDE_OFFSET); + int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1) + + side * UNIT_QID_SIDE_OFFSET); + if ((sched_mask & (1 << test1_code)) != 0) + { + this_rsrv = RESERVATION_FLAG_L; + sched_mask &= ~(1 << test1_code); + } + else if ((sched_mask & (1 << test2_code)) != 0) + { + this_rsrv = RESERVATION_FLAG_S; + sched_mask &= ~(1 << test2_code); + } + } + + if ((this_rsrv & (this_rsrv - 1)) == 0) + { + int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET; + rsrv[side] |= this_rsrv; + INSN_INFO_ENTRY (uid).reservation = t; + continue; + } + + if (pass == 1) + { + for (j = 0; j < 4; j++) + if (this_rsrv & (1 << j)) + rsrv_count[side][j]++; + continue; + } + if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS) + || (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS)) + { + int best = -1, best_cost = INT_MAX; + for (j = 0; j < 4; j++) + if ((this_rsrv & (1 << j)) + && !(rsrv[side] & (1 << j)) + && rsrv_count[side][j] < best_cost) + { + best_cost = rsrv_count[side][j]; + best = j; + } + gcc_assert (best != -1); + rsrv[side] |= 1 << best; + for (j = 0; j < 4; j++) + if ((this_rsrv & (1 << j)) && j != best) + rsrv_count[side][j]--; + + INSN_INFO_ENTRY (uid).reservation + = best + side * UNIT_QID_SIDE_OFFSET; + } + } + } +} + +/* Return a factor by which to weight unit imbalances for a reservation + R. */ +static int +unit_req_factor (enum unitreqs r) +{ + switch (r) + { + case UNIT_REQ_D: + case UNIT_REQ_L: + case UNIT_REQ_S: + case UNIT_REQ_M: + case UNIT_REQ_X: + case UNIT_REQ_T: + return 1; + case UNIT_REQ_DL: + case UNIT_REQ_LS: + case UNIT_REQ_DS: + return 2; + case UNIT_REQ_DLS: + return 3; + default: + gcc_unreachable (); + } +} + +/* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit + requirements. Returns zero if INSN can't be handled, otherwise + either one or two to show how many of the two pairs are in use. + REQ1 is always used, it holds what is normally thought of as the + instructions reservation, e.g. UNIT_REQ_DL. REQ2 is used to either + describe a cross path, or for loads/stores, the T unit. */ +static int +get_unit_reqs (rtx insn, int *req1, int *side1, int *req2, int *side2) +{ + enum attr_units units; + enum attr_cross cross; + int side, req; + + if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0) + return 0; + units = get_attr_units (insn); + if (units == UNITS_UNKNOWN) + return 0; + side = get_insn_side (insn, units); + cross = get_attr_cross (insn); + + req = (units == UNITS_D ? UNIT_REQ_D + : units == UNITS_D_ADDR ? UNIT_REQ_D + : units == UNITS_DL ? UNIT_REQ_DL + : units == UNITS_DS ? UNIT_REQ_DS + : units == UNITS_L ? UNIT_REQ_L + : units == UNITS_LS ? UNIT_REQ_LS + : units == UNITS_S ? UNIT_REQ_S + : units == UNITS_M ? UNIT_REQ_M + : units == UNITS_DLS ? UNIT_REQ_DLS + : -1); + gcc_assert (req != -1); + *req1 = req; + *side1 = side; + if (units == UNITS_D_ADDR) + { + *req2 = UNIT_REQ_T; + *side2 = side ^ (cross == CROSS_Y ? 1 : 0); + return 2; + } + else if (cross == CROSS_Y) + { + *req2 = UNIT_REQ_X; + *side2 = side; + return 2; + } + return 1; +} + +/* Walk the insns between and including HEAD and TAIL, and mark the + resource requirements in the unit_reqs table. */ +static void +count_unit_reqs (unit_req_table reqs, rtx head, rtx tail) +{ + rtx insn; + + memset (reqs, 0, sizeof (unit_req_table)); + + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) + { + int side1, side2, req1, req2; + + switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2)) + { + case 2: + reqs[side2][req2]++; + /* fall through */ + case 1: + reqs[side1][req1]++; + break; + } + } +} + +/* Update the table REQS by merging more specific unit reservations into + more general ones, i.e. counting (for example) UNIT_REQ_D also in + UNIT_REQ_DL, DS, and DLS. */ +static void +merge_unit_reqs (unit_req_table reqs) +{ + int side; + for (side = 0; side < 2; side++) + { + int d = reqs[side][UNIT_REQ_D]; + int l = reqs[side][UNIT_REQ_L]; + int s = reqs[side][UNIT_REQ_S]; + int dl = reqs[side][UNIT_REQ_DL]; + int ls = reqs[side][UNIT_REQ_LS]; + int ds = reqs[side][UNIT_REQ_DS]; + + reqs[side][UNIT_REQ_DL] += d; + reqs[side][UNIT_REQ_DL] += l; + reqs[side][UNIT_REQ_DS] += d; + reqs[side][UNIT_REQ_DS] += s; + reqs[side][UNIT_REQ_LS] += l; + reqs[side][UNIT_REQ_LS] += s; + reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s; + } +} + +/* Examine the table REQS and return a measure of unit imbalance by comparing + the two sides of the machine. If, for example, D1 is used twice and D2 + used not at all, the return value should be 1 in the absence of other + imbalances. */ +static int +unit_req_imbalance (unit_req_table reqs) +{ + int val = 0; + int i; + + for (i = 0; i < UNIT_REQ_MAX; i++) + { + int factor = unit_req_factor ((enum unitreqs) i); + int diff = abs (reqs[0][i] - reqs[1][i]); + val += (diff + factor - 1) / factor / 2; + } + return val; +} + +/* Return the resource-constrained minimum iteration interval given the + data in the REQS table. This must have been processed with + merge_unit_reqs already. */ +static int +res_mii (unit_req_table reqs) +{ + int side, req; + int worst = 1; + for (side = 0; side < 2; side++) + for (req = 0; req < UNIT_REQ_MAX; req++) + { + int factor = unit_req_factor ((enum unitreqs) req); + worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst); + } + + return worst; +} + +/* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent + the operands that are involved in the (up to) two reservations, as + found by get_unit_reqs. Return true if we did this successfully, false + if we couldn't identify what to do with INSN. */ +static bool +get_unit_operand_masks (rtx insn, unsigned int *pmask1, unsigned int *pmask2) +{ + enum attr_op_pattern op_pat; + + if (recog_memoized (insn) < 0) + return 0; + if (GET_CODE (PATTERN (insn)) == COND_EXEC) + return false; + extract_insn (insn); + op_pat = get_attr_op_pattern (insn); + if (op_pat == OP_PATTERN_DT) + { + gcc_assert (recog_data.n_operands == 2); + *pmask1 = 1 << 0; + *pmask2 = 1 << 1; + return true; + } + else if (op_pat == OP_PATTERN_TD) + { + gcc_assert (recog_data.n_operands == 2); + *pmask1 = 1 << 1; + *pmask2 = 1 << 0; + return true; + } + else if (op_pat == OP_PATTERN_SXS) + { + gcc_assert (recog_data.n_operands == 3); + *pmask1 = (1 << 0) | (1 << 2); + *pmask2 = 1 << 1; + return true; + } + else if (op_pat == OP_PATTERN_SX) + { + gcc_assert (recog_data.n_operands == 2); + *pmask1 = 1 << 0; + *pmask2 = 1 << 1; + return true; + } + else if (op_pat == OP_PATTERN_SSX) + { + gcc_assert (recog_data.n_operands == 3); + *pmask1 = (1 << 0) | (1 << 1); + *pmask2 = 1 << 2; + return true; + } + return false; +} + +/* Try to replace a register in INSN, which has corresponding rename info + from regrename_analyze in INFO. OP_MASK and ORIG_SIDE provide information + about the operands that must be renamed and the side they are on. + REQS is the table of unit reservations in the loop between HEAD and TAIL. + We recompute this information locally after our transformation, and keep + it only if we managed to improve the balance. */ +static void +try_rename_operands (rtx head, rtx tail, unit_req_table reqs, rtx insn, + insn_rr_info *info, unsigned int op_mask, int orig_side) +{ + enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS; + HARD_REG_SET unavailable; + du_head_p this_head; + struct du_chain *chain; + int i; + unsigned tmp_mask; + int best_reg, old_reg; + vec involved_chains = vNULL; + unit_req_table new_reqs; + + for (i = 0, tmp_mask = op_mask; tmp_mask; i++) + { + du_head_p op_chain; + if ((tmp_mask & (1 << i)) == 0) + continue; + if (info->op_info[i].n_chains != 1) + goto out_fail; + op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id); + involved_chains.safe_push (op_chain); + tmp_mask &= ~(1 << i); + } + + if (involved_chains.length () > 1) + goto out_fail; + + this_head = involved_chains[0]; + if (this_head->cannot_rename) + goto out_fail; + + for (chain = this_head->first; chain; chain = chain->next_use) + { + unsigned int mask1, mask2, mask_changed; + int count, side1, side2, req1, req2; + insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)]; + + count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2); + + if (count == 0) + goto out_fail; + + if (!get_unit_operand_masks (chain->insn, &mask1, &mask2)) + goto out_fail; + + extract_insn (chain->insn); + + mask_changed = 0; + for (i = 0; i < recog_data.n_operands; i++) + { + int j; + int n_this_op = this_rr->op_info[i].n_chains; + for (j = 0; j < n_this_op; j++) + { + du_head_p other = this_rr->op_info[i].heads[j]; + if (regrename_chain_from_id (other->id) == this_head) + break; + } + if (j == n_this_op) + continue; + + if (n_this_op != 1) + goto out_fail; + mask_changed |= 1 << i; + } + gcc_assert (mask_changed != 0); + if (mask_changed != mask1 && mask_changed != mask2) + goto out_fail; + } + + /* If we get here, we can do the renaming. */ + COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]); + + old_reg = this_head->regno; + best_reg = find_best_rename_reg (this_head, super_class, &unavailable, old_reg); + + regrename_do_replace (this_head, best_reg); + + count_unit_reqs (new_reqs, head, PREV_INSN (tail)); + merge_unit_reqs (new_reqs); + if (dump_file) + { + fprintf (dump_file, "reshuffle for insn %d, op_mask %x, " + "original side %d, new reg %d\n", + INSN_UID (insn), op_mask, orig_side, best_reg); + fprintf (dump_file, " imbalance %d -> %d\n", + unit_req_imbalance (reqs), unit_req_imbalance (new_reqs)); + } + if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs)) + regrename_do_replace (this_head, old_reg); + else + memcpy (reqs, new_reqs, sizeof (unit_req_table)); + + out_fail: + involved_chains.release (); +} + +/* Find insns in LOOP which would, if shifted to the other side + of the machine, reduce an imbalance in the unit reservations. */ +static void +reshuffle_units (basic_block loop) +{ + rtx head = BB_HEAD (loop); + rtx tail = BB_END (loop); + rtx insn; + unit_req_table reqs; + edge e; + edge_iterator ei; + bitmap_head bbs; + + count_unit_reqs (reqs, head, PREV_INSN (tail)); + merge_unit_reqs (reqs); + + regrename_init (true); + + bitmap_initialize (&bbs, &bitmap_default_obstack); + + FOR_EACH_EDGE (e, ei, loop->preds) + bitmap_set_bit (&bbs, e->src->index); + + bitmap_set_bit (&bbs, loop->index); + regrename_analyze (&bbs); + + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) + { + enum attr_units units; + int count, side1, side2, req1, req2; + unsigned int mask1, mask2; + insn_rr_info *info; + + if (!NONDEBUG_INSN_P (insn)) + continue; + + count = get_unit_reqs (insn, &req1, &side1, &req2, &side2); + + if (count == 0) + continue; + + if (!get_unit_operand_masks (insn, &mask1, &mask2)) + continue; + + info = &insn_rr[INSN_UID (insn)]; + if (info->op_info == NULL) + continue; + + if (reqs[side1][req1] > 1 + && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1]) + { + try_rename_operands (head, tail, reqs, insn, info, mask1, side1); + } + + units = get_attr_units (insn); + if (units == UNITS_D_ADDR) + { + gcc_assert (count == 2); + if (reqs[side2][req2] > 1 + && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2]) + { + try_rename_operands (head, tail, reqs, insn, info, mask2, side2); + } + } + } + regrename_finish (); +} + +/* Backend scheduling state. */ +typedef struct c6x_sched_context +{ + /* The current scheduler clock, saved in the sched_reorder hook. */ + int curr_sched_clock; + + /* Number of insns issued so far in this cycle. */ + int issued_this_cycle; + + /* We record the time at which each jump occurs in JUMP_CYCLES. The + theoretical maximum for number of jumps in flight is 12: 2 every + cycle, with a latency of 6 cycles each. This is a circular + buffer; JUMP_CYCLE_INDEX is the pointer to the start. Earlier + jumps have a higher index. This array should be accessed through + the jump_cycle function. */ + int jump_cycles[12]; + int jump_cycle_index; + + /* In parallel with jump_cycles, this array records the opposite of + the condition used in each pending jump. This is used to + predicate insns that are scheduled in the jump's delay slots. If + this is NULL_RTX no such predication happens. */ + rtx jump_cond[12]; + + /* Similar to the jump_cycles mechanism, but here we take into + account all insns with delay slots, to avoid scheduling asms into + the delay slots. */ + int delays_finished_at; + + /* The following variable value is the last issued insn. */ + rtx last_scheduled_insn; + /* The last issued insn that isn't a shadow of another. */ + rtx last_scheduled_iter0; + + /* The following variable value is DFA state before issuing the + first insn in the current clock cycle. We do not use this member + of the structure directly; we copy the data in and out of + prev_cycle_state. */ + state_t prev_cycle_state_ctx; + + int reg_n_accesses[FIRST_PSEUDO_REGISTER]; + int reg_n_xaccesses[FIRST_PSEUDO_REGISTER]; + int reg_set_in_cycle[FIRST_PSEUDO_REGISTER]; + + int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER]; + int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER]; +} *c6x_sched_context_t; + +/* The current scheduling state. */ +static struct c6x_sched_context ss; + +/* The following variable value is DFA state before issuing the first insn + in the current clock cycle. This is used in c6x_variable_issue for + comparison with the state after issuing the last insn in a cycle. */ +static state_t prev_cycle_state; + +/* Set when we discover while processing an insn that it would lead to too + many accesses of the same register. */ +static bool reg_access_stall; + +/* The highest insn uid after delayed insns were split, but before loop bodies + were copied by the modulo scheduling code. */ +static int sploop_max_uid_iter0; + +/* Look up the jump cycle with index N. For an out-of-bounds N, we return 0, + so the caller does not specifically have to test for it. */ +static int +get_jump_cycle (int n) +{ + if (n >= 12) + return 0; + n += ss.jump_cycle_index; + if (n >= 12) + n -= 12; + return ss.jump_cycles[n]; +} + +/* Look up the jump condition with index N. */ +static rtx +get_jump_cond (int n) +{ + if (n >= 12) + return NULL_RTX; + n += ss.jump_cycle_index; + if (n >= 12) + n -= 12; + return ss.jump_cond[n]; +} + +/* Return the index of the first jump that occurs after CLOCK_VAR. If no jump + has delay slots beyond CLOCK_VAR, return -1. */ +static int +first_jump_index (int clock_var) +{ + int retval = -1; + int n = 0; + for (;;) + { + int t = get_jump_cycle (n); + if (t <= clock_var) + break; + retval = n; + n++; + } + return retval; +} + +/* Add a new entry in our scheduling state for a jump that occurs in CYCLE + and has the opposite condition of COND. */ +static void +record_jump (int cycle, rtx cond) +{ + if (ss.jump_cycle_index == 0) + ss.jump_cycle_index = 11; + else + ss.jump_cycle_index--; + ss.jump_cycles[ss.jump_cycle_index] = cycle; + ss.jump_cond[ss.jump_cycle_index] = cond; +} + +/* Set the clock cycle of INSN to CYCLE. Also clears the insn's entry in + new_conditions. */ +static void +insn_set_clock (rtx insn, int cycle) +{ + unsigned uid = INSN_UID (insn); + + if (uid >= INSN_INFO_LENGTH) + insn_info.safe_grow (uid * 5 / 4 + 10); + + INSN_INFO_ENTRY (uid).clock = cycle; + INSN_INFO_ENTRY (uid).new_cond = NULL; + INSN_INFO_ENTRY (uid).reservation = 0; + INSN_INFO_ENTRY (uid).ebb_start = false; +} + +/* Return the clock cycle we set for the insn with uid UID. */ +static int +insn_uid_get_clock (int uid) +{ + return INSN_INFO_ENTRY (uid).clock; +} + +/* Return the clock cycle we set for INSN. */ +static int +insn_get_clock (rtx insn) +{ + return insn_uid_get_clock (INSN_UID (insn)); +} + +/* Examine INSN, and if it is a conditional jump of any kind, return + the opposite of the condition in which it branches. Otherwise, + return NULL_RTX. */ +static rtx +condjump_opposite_condition (rtx insn) +{ + rtx pat = PATTERN (insn); + int icode = INSN_CODE (insn); + rtx x = NULL; + + if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false) + { + x = XEXP (SET_SRC (pat), 0); + if (icode == CODE_FOR_br_false) + return x; + } + if (GET_CODE (pat) == COND_EXEC) + { + rtx t = COND_EXEC_CODE (pat); + if ((GET_CODE (t) == PARALLEL + && GET_CODE (XVECEXP (t, 0, 0)) == RETURN) + || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP) + || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx)) + x = COND_EXEC_TEST (pat); + } + + if (x != NULL_RTX) + { + enum rtx_code code = GET_CODE (x); + x = gen_rtx_fmt_ee (code == EQ ? NE : EQ, + GET_MODE (x), XEXP (x, 0), + XEXP (x, 1)); + } + return x; +} + +/* Return true iff COND1 and COND2 are exactly opposite conditions + one of them NE and the other EQ. */ +static bool +conditions_opposite_p (rtx cond1, rtx cond2) +{ + return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0)) + && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1)) + && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2))); +} + +/* Return true if we can add a predicate COND to INSN, or if INSN + already has that predicate. If DOIT is true, also perform the + modification. */ +static bool +predicate_insn (rtx insn, rtx cond, bool doit) +{ + int icode; + if (cond == NULL_RTX) + { + gcc_assert (!doit); + return false; + } + + if (get_attr_predicable (insn) == PREDICABLE_YES + && GET_CODE (PATTERN (insn)) != COND_EXEC) + { + if (doit) + { + rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn)); + PATTERN (insn) = newpat; + INSN_CODE (insn) = -1; + } + return true; + } + if (GET_CODE (PATTERN (insn)) == COND_EXEC + && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond)) + return true; + icode = INSN_CODE (insn); + if (icode == CODE_FOR_real_jump + || icode == CODE_FOR_jump + || icode == CODE_FOR_indirect_jump) + { + rtx pat = PATTERN (insn); + rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0) + : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0) + : SET_SRC (pat)); + if (doit) + { + rtx newpat; + if (REG_P (dest)) + newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn)); + else + newpat = gen_br_true (cond, XEXP (cond, 0), dest); + PATTERN (insn) = newpat; + INSN_CODE (insn) = -1; + } + return true; + } + if (INSN_CODE (insn) == CODE_FOR_br_true) + { + rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0); + return rtx_equal_p (br_cond, cond); + } + if (INSN_CODE (insn) == CODE_FOR_br_false) + { + rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0); + return conditions_opposite_p (br_cond, cond); + } + return false; +} + +/* Initialize SC. Used by c6x_init_sched_context and c6x_sched_init. */ +static void +init_sched_state (c6x_sched_context_t sc) +{ + sc->last_scheduled_insn = NULL_RTX; + sc->last_scheduled_iter0 = NULL_RTX; + sc->issued_this_cycle = 0; + memset (sc->jump_cycles, 0, sizeof sc->jump_cycles); + memset (sc->jump_cond, 0, sizeof sc->jump_cond); + sc->jump_cycle_index = 0; + sc->delays_finished_at = 0; + sc->curr_sched_clock = 0; + + sc->prev_cycle_state_ctx = xmalloc (dfa_state_size); + + memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses); + memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses); + memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle); + + state_reset (sc->prev_cycle_state_ctx); +} + +/* Allocate store for new scheduling context. */ +static void * +c6x_alloc_sched_context (void) +{ + return xmalloc (sizeof (struct c6x_sched_context)); +} + +/* If CLEAN_P is true then initializes _SC with clean data, + and from the global context otherwise. */ +static void +c6x_init_sched_context (void *_sc, bool clean_p) +{ + c6x_sched_context_t sc = (c6x_sched_context_t) _sc; + + if (clean_p) + { + init_sched_state (sc); + } + else + { + *sc = ss; + sc->prev_cycle_state_ctx = xmalloc (dfa_state_size); + memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size); + } +} + +/* Sets the global scheduling context to the one pointed to by _SC. */ +static void +c6x_set_sched_context (void *_sc) +{ + c6x_sched_context_t sc = (c6x_sched_context_t) _sc; + + gcc_assert (sc != NULL); + ss = *sc; + memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size); +} + +/* Clear data in _SC. */ +static void +c6x_clear_sched_context (void *_sc) +{ + c6x_sched_context_t sc = (c6x_sched_context_t) _sc; + gcc_assert (_sc != NULL); + + free (sc->prev_cycle_state_ctx); +} + +/* Free _SC. */ +static void +c6x_free_sched_context (void *_sc) +{ + free (_sc); +} + +/* True if we are currently performing a preliminary scheduling + pass before modulo scheduling; we can't allow the scheduler to + modify instruction patterns using packetization assumptions, + since there will be another scheduling pass later if modulo + scheduling fails. */ +static bool in_hwloop; + +/* Provide information about speculation capabilities, and set the + DO_BACKTRACKING flag. */ +static void +c6x_set_sched_flags (spec_info_t spec_info) +{ + unsigned int *flags = &(current_sched_info->flags); + + if (*flags & SCHED_EBB) + { + *flags |= DO_BACKTRACKING | DO_PREDICATION; + } + if (in_hwloop) + *flags |= DONT_BREAK_DEPENDENCIES; + + spec_info->mask = 0; +} + +/* Implement the TARGET_SCHED_ISSUE_RATE hook. */ + +static int +c6x_issue_rate (void) +{ + return 8; +} + +/* Used together with the collapse_ndfa option, this ensures that we reach a + deterministic automaton state before trying to advance a cycle. + With collapse_ndfa, genautomata creates advance cycle arcs only for + such deterministic states. */ + +static rtx +c6x_sched_dfa_pre_cycle_insn (void) +{ + return const0_rtx; +} + +/* We're beginning a new block. Initialize data structures as necessary. */ + +static void +c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + if (prev_cycle_state == NULL) + { + prev_cycle_state = xmalloc (dfa_state_size); + } + init_sched_state (&ss); + state_reset (prev_cycle_state); +} + +/* We are about to being issuing INSN. Return nonzero if we cannot + issue it on given cycle CLOCK and return zero if we should not sort + the ready queue on the next clock start. + For C6X, we use this function just to copy the previous DFA state + for comparison purposes. */ + +static int +c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, + rtx insn ATTRIBUTE_UNUSED, int last_clock ATTRIBUTE_UNUSED, + int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED) +{ + if (clock != last_clock) + memcpy (prev_cycle_state, curr_state, dfa_state_size); + return 0; +} + +static void +c6x_mark_regno_read (int regno, bool cross) +{ + int t = ++ss.tmp_reg_n_accesses[regno]; + + if (t > 4) + reg_access_stall = true; + + if (cross) + { + int set_cycle = ss.reg_set_in_cycle[regno]; + /* This must be done in this way rather than by tweaking things in + adjust_cost, since the stall occurs even for insns with opposite + predicates, and the scheduler may not even see a dependency. */ + if (set_cycle > 0 && set_cycle == ss.curr_sched_clock) + reg_access_stall = true; + /* This doesn't quite do anything yet as we're only modeling one + x unit. */ + ++ss.tmp_reg_n_xaccesses[regno]; + } +} + +/* Note that REG is read in the insn being examined. If CROSS, it + means the access is through a cross path. Update the temporary reg + access arrays, and set REG_ACCESS_STALL if the insn can't be issued + in the current cycle. */ + +static void +c6x_mark_reg_read (rtx reg, bool cross) +{ + unsigned regno = REGNO (reg); + unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)]; + + while (nregs-- > 0) + c6x_mark_regno_read (regno + nregs, cross); +} + +/* Note that register REG is written in cycle CYCLES. */ + +static void +c6x_mark_reg_written (rtx reg, int cycles) +{ + unsigned regno = REGNO (reg); + unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)]; + + while (nregs-- > 0) + ss.reg_set_in_cycle[regno + nregs] = cycles; +} + +/* Update the register state information for an instruction whose + body is X. Return true if the instruction has to be delayed until the + next cycle. */ + +static bool +c6x_registers_update (rtx insn) +{ + enum attr_cross cross; + enum attr_dest_regfile destrf; + int i, nops; + rtx x; + + if (!reload_completed || recog_memoized (insn) < 0) + return false; + + reg_access_stall = false; + memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses, + sizeof ss.tmp_reg_n_accesses); + memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses, + sizeof ss.tmp_reg_n_xaccesses); + + extract_insn (insn); + + cross = get_attr_cross (insn); + destrf = get_attr_dest_regfile (insn); + + nops = recog_data.n_operands; + x = PATTERN (insn); + if (GET_CODE (x) == COND_EXEC) + { + c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false); + nops -= 2; + } + + for (i = 0; i < nops; i++) + { + rtx op = recog_data.operand[i]; + if (recog_data.operand_type[i] == OP_OUT) + continue; + if (REG_P (op)) + { + bool this_cross = cross; + if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op))) + this_cross = false; + if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op))) + this_cross = false; + c6x_mark_reg_read (op, this_cross); + } + else if (MEM_P (op)) + { + op = XEXP (op, 0); + switch (GET_CODE (op)) + { + case POST_INC: + case PRE_INC: + case POST_DEC: + case PRE_DEC: + op = XEXP (op, 0); + /* fall through */ + case REG: + c6x_mark_reg_read (op, false); + break; + case POST_MODIFY: + case PRE_MODIFY: + op = XEXP (op, 1); + gcc_assert (GET_CODE (op) == PLUS); + /* fall through */ + case PLUS: + c6x_mark_reg_read (XEXP (op, 0), false); + if (REG_P (XEXP (op, 1))) + c6x_mark_reg_read (XEXP (op, 1), false); + break; + case SYMBOL_REF: + case LABEL_REF: + case CONST: + c6x_mark_regno_read (REG_B14, false); + break; + default: + gcc_unreachable (); + } + } + else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0) + gcc_unreachable (); + } + return reg_access_stall; +} + +/* Helper function for the TARGET_SCHED_REORDER and + TARGET_SCHED_REORDER2 hooks. If scheduling an insn would be unsafe + in the current cycle, move it down in the ready list and return the + number of non-unsafe insns. */ + +static int +c6x_sched_reorder_1 (rtx *ready, int *pn_ready, int clock_var) +{ + int n_ready = *pn_ready; + rtx *e_ready = ready + n_ready; + rtx *insnp; + int first_jump; + + /* Keep track of conflicts due to a limit number of register accesses, + and due to stalls incurred by too early accesses of registers using + cross paths. */ + + for (insnp = ready; insnp < e_ready; insnp++) + { + rtx insn = *insnp; + int icode = recog_memoized (insn); + bool is_asm = (icode < 0 + && (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0)); + bool no_parallel = (is_asm || icode == CODE_FOR_sploop + || (icode >= 0 + && get_attr_type (insn) == TYPE_ATOMIC)); + + /* We delay asm insns until all delay slots are exhausted. We can't + accurately tell how many cycles an asm takes, and the main scheduling + code always assumes at least 1 cycle, which may be wrong. */ + if ((no_parallel + && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at)) + || c6x_registers_update (insn) + || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop)) + { + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + n_ready--; + ready++; + } + else if (shadow_p (insn)) + { + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + } + } + + /* Ensure that no other jump is scheduled in jump delay slots, since + it would put the machine into the wrong state. Also, we must + avoid scheduling insns that have a latency longer than the + remaining jump delay slots, as the code at the jump destination + won't be prepared for it. + + However, we can relax this condition somewhat. The rest of the + scheduler will automatically avoid scheduling an insn on which + the jump shadow depends so late that its side effect happens + after the jump. This means that if we see an insn with a longer + latency here, it can safely be scheduled if we can ensure that it + has a predicate opposite of the previous jump: the side effect + will happen in what we think of as the same basic block. In + c6x_variable_issue, we will record the necessary predicate in + new_conditions, and after scheduling is finished, we will modify + the insn. + + Special care must be taken whenever there is more than one jump + in flight. */ + + first_jump = first_jump_index (clock_var); + if (first_jump != -1) + { + int first_cycle = get_jump_cycle (first_jump); + rtx first_cond = get_jump_cond (first_jump); + int second_cycle = 0; + + if (first_jump > 0) + second_cycle = get_jump_cycle (first_jump - 1); + + for (insnp = ready; insnp < e_ready; insnp++) + { + rtx insn = *insnp; + int icode = recog_memoized (insn); + bool is_asm = (icode < 0 + && (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0)); + int this_cycles, rsrv_cycles; + enum attr_type type; + + gcc_assert (!is_asm); + if (icode < 0) + continue; + this_cycles = get_attr_cycles (insn); + rsrv_cycles = get_attr_reserve_cycles (insn); + type = get_attr_type (insn); + /* Treat branches specially; there is also a hazard if two jumps + end at the same cycle. */ + if (type == TYPE_BRANCH || type == TYPE_CALL) + this_cycles++; + if (clock_var + this_cycles <= first_cycle) + continue; + if ((first_jump > 0 && clock_var + this_cycles > second_cycle) + || clock_var + rsrv_cycles > first_cycle + || !predicate_insn (insn, first_cond, false)) + { + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + n_ready--; + ready++; + } + } + } + + return n_ready; +} + +/* Implement the TARGET_SCHED_REORDER hook. We save the current clock + for later and clear the register access information for the new + cycle. We also move asm statements out of the way if they would be + scheduled in a delay slot. */ + +static int +c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx *ready ATTRIBUTE_UNUSED, + int *pn_ready ATTRIBUTE_UNUSED, int clock_var) +{ + ss.curr_sched_clock = clock_var; + ss.issued_this_cycle = 0; + memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses); + memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses); + + if (ready == NULL) + return 0; + + return c6x_sched_reorder_1 (ready, pn_ready, clock_var); +} + +/* Implement the TARGET_SCHED_REORDER2 hook. We use this to record the clock + cycle for every insn. */ + +static int +c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx *ready ATTRIBUTE_UNUSED, + int *pn_ready ATTRIBUTE_UNUSED, int clock_var) +{ + /* FIXME: the assembler rejects labels inside an execute packet. + This can occur if prologue insns are scheduled in parallel with + others, so we avoid this here. Also make sure that nothing is + scheduled in parallel with a TYPE_ATOMIC insn or after a jump. */ + if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn) + || JUMP_P (ss.last_scheduled_insn) + || (recog_memoized (ss.last_scheduled_insn) >= 0 + && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC)) + { + int n_ready = *pn_ready; + rtx *e_ready = ready + n_ready; + rtx *insnp; + + for (insnp = ready; insnp < e_ready; insnp++) + { + rtx insn = *insnp; + if (!shadow_p (insn)) + { + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + n_ready--; + ready++; + } + } + return n_ready; + } + + return c6x_sched_reorder_1 (ready, pn_ready, clock_var); +} + +/* Subroutine of maybe_clobber_cond, called through note_stores. */ + +static void +clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1) +{ + rtx *cond = (rtx *)data1; + if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond)) + *cond = NULL_RTX; +} + +/* Examine INSN, and if it destroys the conditions have recorded for + any of the jumps in flight, clear that condition so that we don't + predicate any more insns. CLOCK_VAR helps us limit the search to + only those jumps which are still in flight. */ + +static void +maybe_clobber_cond (rtx insn, int clock_var) +{ + int n, idx; + idx = ss.jump_cycle_index; + for (n = 0; n < 12; n++, idx++) + { + rtx cond, link; + int cycle; + + if (idx >= 12) + idx -= 12; + cycle = ss.jump_cycles[idx]; + if (cycle <= clock_var) + return; + + cond = ss.jump_cond[idx]; + if (cond == NULL_RTX) + continue; + + if (CALL_P (insn)) + { + ss.jump_cond[idx] = NULL_RTX; + continue; + } + + note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx); + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_INC) + clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx); + } +} + +/* Implement the TARGET_SCHED_VARIABLE_ISSUE hook. We are about to + issue INSN. Return the number of insns left on the ready queue + that can be issued this cycle. + We use this hook to record clock cycles and reservations for every insn. */ + +static int +c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx insn, int can_issue_more ATTRIBUTE_UNUSED) +{ + ss.last_scheduled_insn = insn; + if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn)) + ss.last_scheduled_iter0 = insn; + if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER) + ss.issued_this_cycle++; + if (insn_info.exists ()) + { + state_t st_after = alloca (dfa_state_size); + int curr_clock = ss.curr_sched_clock; + int uid = INSN_UID (insn); + int icode = recog_memoized (insn); + rtx first_cond; + int first, first_cycle; + unsigned int mask; + int i; + + insn_set_clock (insn, curr_clock); + INSN_INFO_ENTRY (uid).ebb_start + = curr_clock == 0 && ss.issued_this_cycle == 1; + + first = first_jump_index (ss.curr_sched_clock); + if (first == -1) + { + first_cycle = 0; + first_cond = NULL_RTX; + } + else + { + first_cycle = get_jump_cycle (first); + first_cond = get_jump_cond (first); + } + if (icode >= 0 + && first_cycle > curr_clock + && first_cond != NULL_RTX + && (curr_clock + get_attr_cycles (insn) > first_cycle + || get_attr_type (insn) == TYPE_BRANCH + || get_attr_type (insn) == TYPE_CALL)) + INSN_INFO_ENTRY (uid).new_cond = first_cond; + + memcpy (st_after, curr_state, dfa_state_size); + state_transition (st_after, const0_rtx); + + mask = 0; + for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++) + if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i]) + && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i])) + mask |= 1 << i; + INSN_INFO_ENTRY (uid).unit_mask = mask; + + maybe_clobber_cond (insn, curr_clock); + + if (icode >= 0) + { + int i, cycles; + + c6x_registers_update (insn); + memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses, + sizeof ss.reg_n_accesses); + memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses, + sizeof ss.reg_n_xaccesses); + + cycles = get_attr_cycles (insn); + if (ss.delays_finished_at < ss.curr_sched_clock + cycles) + ss.delays_finished_at = ss.curr_sched_clock + cycles; + if (get_attr_type (insn) == TYPE_BRANCH + || get_attr_type (insn) == TYPE_CALL) + { + rtx opposite = condjump_opposite_condition (insn); + record_jump (ss.curr_sched_clock + cycles, opposite); + } + + /* Mark the cycles in which the destination registers are written. + This is used for calculating stalls when using cross units. */ + extract_insn (insn); + /* Cross-path stalls don't apply to results of load insns. */ + if (get_attr_type (insn) == TYPE_LOAD + || get_attr_type (insn) == TYPE_LOADN + || get_attr_type (insn) == TYPE_LOAD_SHADOW) + cycles--; + for (i = 0; i < recog_data.n_operands; i++) + { + rtx op = recog_data.operand[i]; + if (MEM_P (op)) + { + rtx addr = XEXP (op, 0); + if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) + c6x_mark_reg_written (XEXP (addr, 0), + insn_uid_get_clock (uid) + 1); + } + if (recog_data.operand_type[i] != OP_IN + && REG_P (op)) + { + c6x_mark_reg_written (op, + insn_uid_get_clock (uid) + cycles); + } + } + } + } + return can_issue_more; +} + +/* Implement the TARGET_SCHED_ADJUST_COST hook. We need special handling for + anti- and output dependencies. */ + +static int +c6x_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN; + int dep_insn_code_number, insn_code_number; + int shadow_bonus = 0; + enum reg_note kind; + dep_insn_code_number = recog_memoized (dep_insn); + insn_code_number = recog_memoized (insn); + + if (dep_insn_code_number >= 0) + dep_insn_type = get_attr_type (dep_insn); + + if (insn_code_number >= 0) + insn_type = get_attr_type (insn); + + kind = REG_NOTE_KIND (link); + if (kind == 0) + { + /* If we have a dependency on a load, and it's not for the result of + the load, it must be for an autoincrement. Reduce the cost in that + case. */ + if (dep_insn_type == TYPE_LOAD) + { + rtx set = PATTERN (dep_insn); + if (GET_CODE (set) == COND_EXEC) + set = COND_EXEC_CODE (set); + if (GET_CODE (set) == UNSPEC) + cost = 1; + else + { + gcc_assert (GET_CODE (set) == SET); + if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn))) + cost = 1; + } + } + } + + /* A jump shadow needs to have its latency decreased by one. Conceptually, + it occurs in between two cycles, but we schedule it at the end of the + first cycle. */ + if (shadow_type_p (insn_type)) + shadow_bonus = 1; + + /* Anti and output dependencies usually have zero cost, but we want + to insert a stall after a jump, and after certain floating point + insns that take more than one cycle to read their inputs. In the + future, we should try to find a better algorithm for scheduling + jumps. */ + if (kind != 0) + { + /* We can get anti-dependencies against shadow insns. Treat these + like output dependencies, so that the insn is entirely finished + before the branch takes place. */ + if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW) + kind = REG_DEP_OUTPUT; + switch (dep_insn_type) + { + case TYPE_CALLP: + return 1; + case TYPE_BRANCH: + case TYPE_CALL: + if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y) + /* This is a real_jump/real_call insn. These don't have + outputs, and ensuring the validity of scheduling things + in the delay slot is the job of + c6x_sched_reorder_1. */ + return 0; + /* Unsplit calls can happen - e.g. for divide insns. */ + return 6; + case TYPE_LOAD: + case TYPE_LOADN: + case TYPE_INTDP: + if (kind == REG_DEP_OUTPUT) + return 5 - shadow_bonus; + return 0; + case TYPE_MPY4: + case TYPE_FP4: + if (kind == REG_DEP_OUTPUT) + return 4 - shadow_bonus; + return 0; + case TYPE_MPY2: + if (kind == REG_DEP_OUTPUT) + return 2 - shadow_bonus; + return 0; + case TYPE_CMPDP: + if (kind == REG_DEP_OUTPUT) + return 2 - shadow_bonus; + return 2; + case TYPE_ADDDP: + case TYPE_MPYSPDP: + if (kind == REG_DEP_OUTPUT) + return 7 - shadow_bonus; + return 2; + case TYPE_MPYSP2DP: + if (kind == REG_DEP_OUTPUT) + return 5 - shadow_bonus; + return 2; + case TYPE_MPYI: + if (kind == REG_DEP_OUTPUT) + return 9 - shadow_bonus; + return 4; + case TYPE_MPYID: + case TYPE_MPYDP: + if (kind == REG_DEP_OUTPUT) + return 10 - shadow_bonus; + return 4; + + default: + if (insn_type == TYPE_SPKERNEL) + return 0; + if (kind == REG_DEP_OUTPUT) + return 1 - shadow_bonus; + + return 0; + } + } + + return cost - shadow_bonus; +} + +/* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there + are N_FILLED. REAL_FIRST identifies the slot if the insn that appears + first in the original stream. */ + +static void +gen_one_bundle (rtx *slot, int n_filled, int real_first) +{ + rtx bundle; + rtx t; + int i; + + bundle = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot)); + bundle = make_insn_raw (bundle); + BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]); + INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]); + PREV_INSN (bundle) = PREV_INSN (slot[real_first]); + + t = NULL_RTX; + + for (i = 0; i < n_filled; i++) + { + rtx insn = slot[i]; + remove_insn (insn); + PREV_INSN (insn) = t ? t : PREV_INSN (bundle); + if (t != NULL_RTX) + NEXT_INSN (t) = insn; + t = insn; + if (i > 0) + INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle); + } + + NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle)); + NEXT_INSN (t) = NEXT_INSN (bundle); + NEXT_INSN (PREV_INSN (bundle)) = bundle; + PREV_INSN (NEXT_INSN (bundle)) = bundle; +} + +/* Move all parallel instructions into SEQUENCEs, so that no subsequent passes + try to insert labels in the middle. */ + +static void +c6x_gen_bundles (void) +{ + basic_block bb; + rtx insn, next, last_call; + + FOR_EACH_BB_FN (bb, cfun) + { + rtx insn, next; + /* The machine is eight insns wide. We can have up to six shadow + insns, plus an extra slot for merging the jump shadow. */ + rtx slot[15]; + int n_filled = 0; + int first_slot = 0; + + for (insn = BB_HEAD (bb);; insn = next) + { + int at_end; + rtx delete_this = NULL_RTX; + + if (NONDEBUG_INSN_P (insn)) + { + /* Put calls at the start of the sequence. */ + if (CALL_P (insn)) + { + first_slot++; + if (n_filled) + { + memmove (&slot[1], &slot[0], + n_filled * sizeof (slot[0])); + } + if (!shadow_p (insn)) + { + PUT_MODE (insn, TImode); + if (n_filled) + PUT_MODE (slot[1], VOIDmode); + } + n_filled++; + slot[0] = insn; + } + else + { + slot[n_filled++] = insn; + } + } + + next = NEXT_INSN (insn); + while (next && insn != BB_END (bb) + && !(NONDEBUG_INSN_P (next) + && GET_CODE (PATTERN (next)) != USE + && GET_CODE (PATTERN (next)) != CLOBBER)) + { + insn = next; + next = NEXT_INSN (insn); + } + + at_end = insn == BB_END (bb); + if (delete_this == NULL_RTX + && (at_end || (GET_MODE (next) == TImode + && !(shadow_p (next) && CALL_P (next))))) + { + if (n_filled >= 2) + gen_one_bundle (slot, n_filled, first_slot); + + n_filled = 0; + first_slot = 0; + } + if (at_end) + break; + } + } + /* Bundling, and emitting nops, can separate + NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls. Fix + that up here. */ + last_call = NULL_RTX; + for (insn = get_insns (); insn; insn = next) + { + next = NEXT_INSN (insn); + if (CALL_P (insn) + || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE + && CALL_P (XVECEXP (PATTERN (insn), 0, 0)))) + last_call = insn; + if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION) + continue; + if (NEXT_INSN (last_call) == insn) + continue; + NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn); + PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn); + PREV_INSN (insn) = last_call; + NEXT_INSN (insn) = NEXT_INSN (last_call); + PREV_INSN (NEXT_INSN (insn)) = insn; + NEXT_INSN (PREV_INSN (insn)) = insn; + last_call = insn; + } +} + +/* Emit a NOP instruction for CYCLES cycles after insn AFTER. Return it. */ + +static rtx +emit_nop_after (int cycles, rtx after) +{ + rtx insn; + + /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path + operation. We don't need the extra NOP since in this case, the hardware + will automatically insert the required stall. */ + if (cycles == 10) + cycles--; + + gcc_assert (cycles < 10); + + insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after); + PUT_MODE (insn, TImode); + + return insn; +} + +/* Determine whether INSN is a call that needs to have a return label + placed. */ + +static bool +returning_call_p (rtx insn) +{ + if (CALL_P (insn)) + return (!SIBLING_CALL_P (insn) + && get_attr_type (insn) != TYPE_CALLP + && get_attr_type (insn) != TYPE_SHADOW); + if (recog_memoized (insn) < 0) + return false; + if (get_attr_type (insn) == TYPE_CALL) + return true; + return false; +} + +/* Determine whether INSN's pattern can be converted to use callp. */ +static bool +can_use_callp (rtx insn) +{ + int icode = recog_memoized (insn); + if (!TARGET_INSNS_64PLUS + || icode < 0 + || GET_CODE (PATTERN (insn)) == COND_EXEC) + return false; + + return ((icode == CODE_FOR_real_call + || icode == CODE_FOR_call_internal + || icode == CODE_FOR_call_value_internal) + && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY); +} + +/* Convert the pattern of INSN, which must be a CALL_INSN, into a callp. */ +static void +convert_to_callp (rtx insn) +{ + rtx lab; + extract_insn (insn); + if (GET_CODE (PATTERN (insn)) == SET) + { + rtx dest = recog_data.operand[0]; + lab = recog_data.operand[1]; + PATTERN (insn) = gen_callp_value (dest, lab); + INSN_CODE (insn) = CODE_FOR_callp_value; + } + else + { + lab = recog_data.operand[0]; + PATTERN (insn) = gen_callp (lab); + INSN_CODE (insn) = CODE_FOR_callp; + } +} + +/* Scan forwards from INSN until we find the next insn that has mode TImode + (indicating it starts a new cycle), and occurs in cycle CLOCK. + Return it if we find such an insn, NULL_RTX otherwise. */ +static rtx +find_next_cycle_insn (rtx insn, int clock) +{ + rtx t = insn; + if (GET_MODE (t) == TImode) + t = next_real_insn (t); + while (t && GET_MODE (t) != TImode) + t = next_real_insn (t); + + if (t && insn_get_clock (t) == clock) + return t; + return NULL_RTX; +} + +/* If COND_INSN has a COND_EXEC condition, wrap the same condition + around PAT. Return PAT either unchanged or modified in this + way. */ +static rtx +duplicate_cond (rtx pat, rtx cond_insn) +{ + rtx cond_pat = PATTERN (cond_insn); + if (GET_CODE (cond_pat) == COND_EXEC) + pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)), + pat); + return pat; +} + +/* Walk forward from INSN to find the last insn that issues in the same clock + cycle. */ +static rtx +find_last_same_clock (rtx insn) +{ + rtx retval = insn; + rtx t = next_real_insn (insn); + + while (t && GET_MODE (t) != TImode) + { + if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0) + retval = t; + t = next_real_insn (t); + } + return retval; +} + +/* For every call insn in the function, emit code to load the return + address. For each call we create a return label and store it in + CALL_LABELS. If are not scheduling, we emit the labels here, + otherwise the caller will do it later. + This function is called after final insn scheduling, but before creating + the SEQUENCEs that represent execute packets. */ + +static void +reorg_split_calls (rtx *call_labels) +{ + unsigned int reservation_mask = 0; + rtx insn = get_insns (); + gcc_assert (NOTE_P (insn)); + insn = next_real_insn (insn); + while (insn) + { + int uid; + rtx next = next_real_insn (insn); + + if (DEBUG_INSN_P (insn)) + goto done; + + if (GET_MODE (insn) == TImode) + reservation_mask = 0; + uid = INSN_UID (insn); + if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0) + reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation; + + if (returning_call_p (insn)) + { + rtx label = gen_label_rtx (); + rtx labelref = gen_rtx_LABEL_REF (Pmode, label); + rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO); + + LABEL_NUSES (label) = 2; + if (!c6x_flag_schedule_insns2) + { + if (can_use_callp (insn)) + convert_to_callp (insn); + else + { + rtx t; + rtx slot[4]; + emit_label_after (label, insn); + + /* Bundle the call and its delay slots into a single + SEQUENCE. While these do not issue in parallel + we need to group them into a single EH region. */ + slot[0] = insn; + PUT_MODE (insn, TImode); + if (TARGET_INSNS_64) + { + t = gen_addkpc (reg, labelref, GEN_INT (4)); + slot[1] = emit_insn_after (duplicate_cond (t, insn), + insn); + PUT_MODE (slot[1], TImode); + gen_one_bundle (slot, 2, 0); + } + else + { + slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)), + insn); + PUT_MODE (slot[3], TImode); + t = gen_movsi_lo_sum (reg, reg, labelref); + slot[2] = emit_insn_after (duplicate_cond (t, insn), + insn); + PUT_MODE (slot[2], TImode); + t = gen_movsi_high (reg, labelref); + slot[1] = emit_insn_after (duplicate_cond (t, insn), + insn); + PUT_MODE (slot[1], TImode); + gen_one_bundle (slot, 4, 0); + } + } + } + else + { + /* If we scheduled, we reserved the .S2 unit for one or two + cycles after the call. Emit the insns in these slots, + unless it's possible to create a CALLP insn. + Note that this works because the dependencies ensure that + no insn setting/using B3 is scheduled in the delay slots of + a call. */ + int this_clock = insn_get_clock (insn); + rtx last_same_clock; + rtx after1; + + call_labels[INSN_UID (insn)] = label; + + last_same_clock = find_last_same_clock (insn); + + if (can_use_callp (insn)) + { + /* Find the first insn of the next execute packet. If it + is the shadow insn corresponding to this call, we may + use a CALLP insn. */ + rtx shadow = next_nonnote_nondebug_insn (last_same_clock); + + if (CALL_P (shadow) + && insn_get_clock (shadow) == this_clock + 5) + { + convert_to_callp (shadow); + insn_set_clock (shadow, this_clock); + INSN_INFO_ENTRY (INSN_UID (shadow)).reservation + = RESERVATION_S2; + INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask + = INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask; + if (GET_MODE (insn) == TImode) + { + rtx new_cycle_first = NEXT_INSN (insn); + while (!NONDEBUG_INSN_P (new_cycle_first) + || GET_CODE (PATTERN (new_cycle_first)) == USE + || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER) + new_cycle_first = NEXT_INSN (new_cycle_first); + PUT_MODE (new_cycle_first, TImode); + if (new_cycle_first != shadow) + PUT_MODE (shadow, VOIDmode); + INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start + = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start; + } + else + PUT_MODE (shadow, VOIDmode); + delete_insn (insn); + goto done; + } + } + after1 = find_next_cycle_insn (last_same_clock, this_clock + 1); + if (after1 == NULL_RTX) + after1 = last_same_clock; + else + after1 = find_last_same_clock (after1); + if (TARGET_INSNS_64) + { + rtx x1 = gen_addkpc (reg, labelref, const0_rtx); + x1 = emit_insn_after (duplicate_cond (x1, insn), after1); + insn_set_clock (x1, this_clock + 1); + INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2; + if (after1 == last_same_clock) + PUT_MODE (x1, TImode); + else + INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask + = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask; + } + else + { + rtx x1, x2; + rtx after2 = find_next_cycle_insn (after1, this_clock + 2); + if (after2 == NULL_RTX) + after2 = after1; + x2 = gen_movsi_lo_sum (reg, reg, labelref); + x2 = emit_insn_after (duplicate_cond (x2, insn), after2); + x1 = gen_movsi_high (reg, labelref); + x1 = emit_insn_after (duplicate_cond (x1, insn), after1); + insn_set_clock (x1, this_clock + 1); + insn_set_clock (x2, this_clock + 2); + INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2; + INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2; + if (after1 == last_same_clock) + PUT_MODE (x1, TImode); + else + INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask + = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask; + if (after1 == after2) + PUT_MODE (x2, TImode); + else + INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask + = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask; + } + } + } + done: + insn = next; + } +} + +/* Called as part of c6x_reorg. This function emits multi-cycle NOP + insns as required for correctness. CALL_LABELS is the array that + holds the return labels for call insns; we emit these here if + scheduling was run earlier. */ + +static void +reorg_emit_nops (rtx *call_labels) +{ + bool first; + rtx prev, last_call; + int prev_clock, earliest_bb_end; + int prev_implicit_nops; + rtx insn = get_insns (); + + /* We look at one insn (or bundle inside a sequence) in each iteration, storing + its issue time in PREV_CLOCK for the next iteration. If there is a gap in + clocks, we must insert a NOP. + EARLIEST_BB_END tracks in which cycle all insns that have been issued in the + current basic block will finish. We must not allow the next basic block to + begin before this cycle. + PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains + a multi-cycle nop. The code is scheduled such that subsequent insns will + show the cycle gap, but we needn't insert a real NOP instruction. */ + insn = next_real_insn (insn); + last_call = prev = NULL_RTX; + prev_clock = -1; + earliest_bb_end = 0; + prev_implicit_nops = 0; + first = true; + while (insn) + { + int this_clock = -1; + rtx next; + int max_cycles = 0; + + next = next_real_insn (insn); + + if (DEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER + || shadow_or_blockage_p (insn) + || JUMP_TABLE_DATA_P (insn)) + goto next_insn; + + if (!c6x_flag_schedule_insns2) + /* No scheduling; ensure that no parallel issue happens. */ + PUT_MODE (insn, TImode); + else + { + int cycles; + + this_clock = insn_get_clock (insn); + if (this_clock != prev_clock) + { + PUT_MODE (insn, TImode); + + if (!first) + { + cycles = this_clock - prev_clock; + + cycles -= prev_implicit_nops; + if (cycles > 1) + { + rtx nop = emit_nop_after (cycles - 1, prev); + insn_set_clock (nop, prev_clock + prev_implicit_nops + 1); + } + } + prev_clock = this_clock; + + if (last_call + && insn_get_clock (last_call) + 6 <= this_clock) + { + emit_label_before (call_labels[INSN_UID (last_call)], insn); + last_call = NULL_RTX; + } + prev_implicit_nops = 0; + } + } + + /* Examine how many cycles the current insn takes, and adjust + LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS. */ + if (recog_memoized (insn) >= 0 + /* If not scheduling, we've emitted NOPs after calls already. */ + && (c6x_flag_schedule_insns2 || !returning_call_p (insn))) + { + max_cycles = get_attr_cycles (insn); + if (get_attr_type (insn) == TYPE_CALLP) + prev_implicit_nops = 5; + } + else + max_cycles = 1; + if (returning_call_p (insn)) + last_call = insn; + + if (c6x_flag_schedule_insns2) + { + gcc_assert (this_clock >= 0); + if (earliest_bb_end < this_clock + max_cycles) + earliest_bb_end = this_clock + max_cycles; + } + else if (max_cycles > 1) + emit_nop_after (max_cycles - 1, insn); + + prev = insn; + first = false; + + next_insn: + if (c6x_flag_schedule_insns2 + && (next == NULL_RTX + || (GET_MODE (next) == TImode + && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start)) + && earliest_bb_end > 0) + { + int cycles = earliest_bb_end - prev_clock; + if (cycles > 1) + { + prev = emit_nop_after (cycles - 1, prev); + insn_set_clock (prev, prev_clock + prev_implicit_nops + 1); + } + earliest_bb_end = 0; + prev_clock = -1; + first = true; + + if (last_call) + emit_label_after (call_labels[INSN_UID (last_call)], prev); + last_call = NULL_RTX; + } + insn = next; + } +} + +/* If possible, split INSN, which we know is either a jump or a call, into a real + insn and its shadow. */ +static void +split_delayed_branch (rtx insn) +{ + int code = recog_memoized (insn); + rtx i1, newpat; + rtx pat = PATTERN (insn); + + if (GET_CODE (pat) == COND_EXEC) + pat = COND_EXEC_CODE (pat); + + if (CALL_P (insn)) + { + rtx src = pat, dest = NULL_RTX; + rtx callee; + if (GET_CODE (pat) == SET) + { + dest = SET_DEST (pat); + src = SET_SRC (pat); + } + callee = XEXP (XEXP (src, 0), 0); + if (SIBLING_CALL_P (insn)) + { + if (REG_P (callee)) + newpat = gen_indirect_sibcall_shadow (); + else + newpat = gen_sibcall_shadow (callee); + pat = gen_real_jump (callee); + } + else if (dest != NULL_RTX) + { + if (REG_P (callee)) + newpat = gen_indirect_call_value_shadow (dest); + else + newpat = gen_call_value_shadow (dest, callee); + pat = gen_real_call (callee); + } + else + { + if (REG_P (callee)) + newpat = gen_indirect_call_shadow (); + else + newpat = gen_call_shadow (callee); + pat = gen_real_call (callee); + } + pat = duplicate_cond (pat, insn); + newpat = duplicate_cond (newpat, insn); + } + else + { + rtx src, op; + if (GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN) + { + newpat = gen_return_shadow (); + pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0)); + newpat = duplicate_cond (newpat, insn); + } + else + switch (code) + { + case CODE_FOR_br_true: + case CODE_FOR_br_false: + src = SET_SRC (pat); + op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2); + newpat = gen_condjump_shadow (op); + pat = gen_real_jump (op); + if (code == CODE_FOR_br_true) + pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat); + else + pat = gen_rtx_COND_EXEC (VOIDmode, + reversed_comparison (XEXP (src, 0), + VOIDmode), + pat); + break; + + case CODE_FOR_jump: + op = SET_SRC (pat); + newpat = gen_jump_shadow (op); + break; + + case CODE_FOR_indirect_jump: + newpat = gen_indirect_jump_shadow (); + break; + + case CODE_FOR_return_internal: + newpat = gen_return_shadow (); + pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0)); + break; + + default: + return; + } + } + i1 = emit_insn_before (pat, insn); + PATTERN (insn) = newpat; + INSN_CODE (insn) = -1; + record_delay_slot_pair (i1, insn, 5, 0); +} + +/* If INSN is a multi-cycle insn that should be handled properly in + modulo-scheduling, split it into a real insn and a shadow. + Return true if we made a change. + + It is valid for us to fail to split an insn; the caller has to deal + with the possibility. Currently we handle loads and most mpy2 and + mpy4 insns. */ +static bool +split_delayed_nonbranch (rtx insn) +{ + int code = recog_memoized (insn); + enum attr_type type; + rtx i1, newpat, src, dest; + rtx pat = PATTERN (insn); + rtvec rtv; + int delay; + + if (GET_CODE (pat) == COND_EXEC) + pat = COND_EXEC_CODE (pat); + + if (code < 0 || GET_CODE (pat) != SET) + return false; + src = SET_SRC (pat); + dest = SET_DEST (pat); + if (!REG_P (dest)) + return false; + + type = get_attr_type (insn); + if (code >= 0 + && (type == TYPE_LOAD + || type == TYPE_LOADN)) + { + if (!MEM_P (src) + && (GET_CODE (src) != ZERO_EXTEND + || !MEM_P (XEXP (src, 0)))) + return false; + + if (GET_MODE_SIZE (GET_MODE (dest)) > 4 + && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW)) + return false; + + rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))), + SET_SRC (pat)); + newpat = gen_load_shadow (SET_DEST (pat)); + pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD); + delay = 4; + } + else if (code >= 0 + && (type == TYPE_MPY2 + || type == TYPE_MPY4)) + { + /* We don't handle floating point multiplies yet. */ + if (GET_MODE (dest) == SFmode) + return false; + + rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))), + SET_SRC (pat)); + newpat = gen_mult_shadow (SET_DEST (pat)); + pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT); + delay = type == TYPE_MPY2 ? 1 : 3; + } + else + return false; + + pat = duplicate_cond (pat, insn); + newpat = duplicate_cond (newpat, insn); + i1 = emit_insn_before (pat, insn); + PATTERN (insn) = newpat; + INSN_CODE (insn) = -1; + recog_memoized (insn); + recog_memoized (i1); + record_delay_slot_pair (i1, insn, delay, 0); + return true; +} + +/* Examine if INSN is the result of splitting a load into a real load and a + shadow, and if so, undo the transformation. */ +static void +undo_split_delayed_nonbranch (rtx insn) +{ + int icode = recog_memoized (insn); + enum attr_type type; + rtx prev_pat, insn_pat, prev; + + if (icode < 0) + return; + type = get_attr_type (insn); + if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW) + return; + prev = PREV_INSN (insn); + prev_pat = PATTERN (prev); + insn_pat = PATTERN (insn); + if (GET_CODE (prev_pat) == COND_EXEC) + { + prev_pat = COND_EXEC_CODE (prev_pat); + insn_pat = COND_EXEC_CODE (insn_pat); + } + + gcc_assert (GET_CODE (prev_pat) == UNSPEC + && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD + && type == TYPE_LOAD_SHADOW) + || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT + && type == TYPE_MULT_SHADOW))); + insn_pat = gen_rtx_SET (VOIDmode, SET_DEST (insn_pat), + XVECEXP (prev_pat, 0, 1)); + insn_pat = duplicate_cond (insn_pat, prev); + PATTERN (insn) = insn_pat; + INSN_CODE (insn) = -1; + delete_insn (prev); +} + +/* Split every insn (i.e. jumps and calls) which can have delay slots into + two parts: the first one is scheduled normally and emits the instruction, + while the second one is a shadow insn which shows the side effect taking + place. The second one is placed in the right cycle by the scheduler, but + not emitted as an assembly instruction. */ + +static void +split_delayed_insns (void) +{ + rtx insn; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (JUMP_P (insn) || CALL_P (insn)) + split_delayed_branch (insn); + } +} + +/* For every insn that has an entry in the new_conditions vector, give it + the appropriate predicate. */ +static void +conditionalize_after_sched (void) +{ + basic_block bb; + rtx insn; + FOR_EACH_BB_FN (bb, cfun) + FOR_BB_INSNS (bb, insn) + { + unsigned uid = INSN_UID (insn); + rtx cond; + if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH) + continue; + cond = INSN_INFO_ENTRY (uid).new_cond; + if (cond == NULL_RTX) + continue; + if (dump_file) + fprintf (dump_file, "Conditionalizing insn %d\n", uid); + predicate_insn (insn, cond, true); + } +} + +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a loop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ + +static rtx +hwloop_pattern_reg (rtx insn) +{ + rtx pat, reg; + + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end) + return NULL_RTX; + + pat = PATTERN (insn); + reg = SET_DEST (XVECEXP (pat, 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; +} + +/* Return the number of cycles taken by BB, as computed by scheduling, + including the latencies of all insns with delay slots. IGNORE is + an insn we should ignore in the calculation, usually the final + branch. */ +static int +bb_earliest_end_cycle (basic_block bb, rtx ignore) +{ + int earliest = 0; + rtx insn; + + FOR_BB_INSNS (bb, insn) + { + int cycles, this_clock; + + if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER + || insn == ignore) + continue; + + this_clock = insn_get_clock (insn); + cycles = get_attr_cycles (insn); + + if (earliest < this_clock + cycles) + earliest = this_clock + cycles; + } + return earliest; +} + +/* Examine the insns in BB and remove all which have a uid greater or + equal to MAX_UID. */ +static void +filter_insns_above (basic_block bb, int max_uid) +{ + rtx insn, next; + bool prev_ti = false; + int prev_cycle = -1; + + FOR_BB_INSNS_SAFE (bb, insn, next) + { + int this_cycle; + if (!NONDEBUG_INSN_P (insn)) + continue; + if (insn == BB_END (bb)) + return; + this_cycle = insn_get_clock (insn); + if (prev_ti && this_cycle == prev_cycle) + { + gcc_assert (GET_MODE (insn) != TImode); + PUT_MODE (insn, TImode); + } + prev_ti = false; + if (INSN_UID (insn) >= max_uid) + { + if (GET_MODE (insn) == TImode) + { + prev_ti = true; + prev_cycle = this_cycle; + } + delete_insn (insn); + } + } +} + +/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ + +static void +c6x_asm_emit_except_personality (rtx personality) +{ + fputs ("\t.personality\t", asm_out_file); + output_addr_const (asm_out_file, personality); + fputc ('\n', asm_out_file); +} + +/* Use a special assembly directive rather than a regular setion for + unwind table data. */ + +static void +c6x_asm_init_sections (void) +{ + exception_section = get_unnamed_section (0, output_section_asm_op, + "\t.handlerdata"); +} + +/* A callback for the hw-doloop pass. Called to optimize LOOP in a + machine-specific fashion; returns true if successful and false if + the hwloop_fail function should be called. */ + +static bool +hwloop_optimize (hwloop_info loop) +{ + basic_block entry_bb, bb; + rtx seq, insn, prev, entry_after, end_packet; + rtx head_insn, tail_insn, new_insns, last_insn; + int loop_earliest; + int n_execute_packets; + edge entry_edge; + unsigned ix; + int max_uid_before, delayed_splits; + int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages; + rtx *orig_vec; + rtx *copies; + rtx **insn_copies; + + if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2 + || !TARGET_INSNS_64PLUS) + return false; + + if (loop->iter_reg_used || loop->depth > 1) + return false; + if (loop->has_call || loop->has_asm) + return false; + + if (loop->head != loop->tail) + return false; + + gcc_assert (loop->incoming_dest == loop->head); + + entry_edge = NULL; + FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge) + if (entry_edge->flags & EDGE_FALLTHRU) + break; + if (entry_edge == NULL) + return false; + + reshuffle_units (loop->head); + + in_hwloop = true; + schedule_ebbs_init (); + schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true); + schedule_ebbs_finish (); + in_hwloop = false; + + bb = loop->head; + loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1; + + max_uid_before = get_max_uid (); + + /* Split all multi-cycle operations, such as loads. For normal + scheduling, we only do this for branches, as the generated code + would otherwise not be interrupt-safe. When using sploop, it is + safe and beneficial to split them. If any multi-cycle operations + remain after splitting (because we don't handle them yet), we + cannot pipeline the loop. */ + delayed_splits = 0; + FOR_BB_INSNS (bb, insn) + { + if (NONDEBUG_INSN_P (insn)) + { + recog_memoized (insn); + if (split_delayed_nonbranch (insn)) + delayed_splits++; + else if (INSN_CODE (insn) >= 0 + && get_attr_cycles (insn) > 1) + goto undo_splits; + } + } + + /* Count the number of insns as well as the number real insns, and save + the original sequence of insns in case we must restore it later. */ + n_insns = n_real_insns = 0; + FOR_BB_INSNS (bb, insn) + { + n_insns++; + if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end) + n_real_insns++; + } + orig_vec = XNEWVEC (rtx, n_insns); + n_insns = 0; + FOR_BB_INSNS (bb, insn) + orig_vec[n_insns++] = insn; + + /* Count the unit reservations, and compute a minimum II from that + table. */ + count_unit_reqs (unit_reqs, loop->start_label, + PREV_INSN (loop->loop_end)); + merge_unit_reqs (unit_reqs); + + min_ii = res_mii (unit_reqs); + max_ii = loop_earliest < 15 ? loop_earliest : 14; + + /* Make copies of the loop body, up to a maximum number of stages we want + to handle. */ + max_parallel = loop_earliest / min_ii + 1; + + copies = XCNEWVEC (rtx, (max_parallel + 1) * n_real_insns); + insn_copies = XNEWVEC (rtx *, max_parallel + 1); + for (i = 0; i < max_parallel + 1; i++) + insn_copies[i] = copies + i * n_real_insns; + + head_insn = next_nonnote_nondebug_insn (loop->start_label); + tail_insn = prev_real_insn (BB_END (bb)); + + i = 0; + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end) + insn_copies[0][i++] = insn; + + sploop_max_uid_iter0 = get_max_uid (); + + /* Generate the copies of the loop body, and save them in the + INSN_COPIES array. */ + start_sequence (); + for (i = 0; i < max_parallel; i++) + { + int j; + rtx this_iter; + + this_iter = duplicate_insn_chain (head_insn, tail_insn); + j = 0; + while (this_iter) + { + rtx prev_stage_insn = insn_copies[i][j]; + gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn)); + + if (INSN_CODE (this_iter) >= 0 + && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW + || get_attr_type (this_iter) == TYPE_MULT_SHADOW)) + { + rtx prev = PREV_INSN (this_iter); + record_delay_slot_pair (prev, this_iter, + get_attr_cycles (prev) - 1, 0); + } + else + record_delay_slot_pair (prev_stage_insn, this_iter, i, 1); + + insn_copies[i + 1][j] = this_iter; + j++; + this_iter = next_nonnote_nondebug_insn (this_iter); + } + } + new_insns = get_insns (); + last_insn = insn_copies[max_parallel][n_real_insns - 1]; + end_sequence (); + emit_insn_before (new_insns, BB_END (bb)); + + /* Try to schedule the loop using varying initiation intervals, + starting with the smallest possible and incrementing it + on failure. */ + for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++) + { + basic_block tmp_bb; + if (dump_file) + fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii); + + df_clear_flags (DF_LR_RUN_DCE); + + schedule_ebbs_init (); + set_modulo_params (sp_ii, max_parallel, n_real_insns, + sploop_max_uid_iter0); + tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true); + schedule_ebbs_finish (); + + if (tmp_bb) + { + if (dump_file) + fprintf (dump_file, "Found schedule with II %d\n", sp_ii); + break; + } + } + + discard_delay_pairs_above (max_uid_before); + + if (sp_ii > max_ii) + goto restore_loop; + + stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1; + + if (stages == 1 && sp_ii > 5) + goto restore_loop; + + /* At this point, we know we've been successful, unless we find later that + there are too many execute packets for the loop buffer to hold. */ + + /* Assign reservations to the instructions in the loop. We must find + the stage that contains the full loop kernel, and transfer the + reservations of the instructions contained in it to the corresponding + instructions from iteration 0, which are the only ones we'll keep. */ + assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn); + PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0; + NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb); + filter_insns_above (bb, sploop_max_uid_iter0); + + for (i = 0; i < n_real_insns; i++) + { + rtx insn = insn_copies[0][i]; + int uid = INSN_UID (insn); + int stage = insn_uid_get_clock (uid) / sp_ii; + + if (stage + 1 < stages) + { + int copy_uid; + stage = stages - stage - 1; + copy_uid = INSN_UID (insn_copies[stage][i]); + INSN_INFO_ENTRY (uid).reservation + = INSN_INFO_ENTRY (copy_uid).reservation; + } + } + if (stages == 1) + stages++; + + /* Compute the number of execute packets the pipelined form of the loop will + require. */ + prev = NULL_RTX; + n_execute_packets = 0; + for (insn = loop->start_label; insn != loop->loop_end; insn = NEXT_INSN (insn)) + { + if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode + && !shadow_p (insn)) + { + n_execute_packets++; + if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn)) + /* We need an extra NOP instruction. */ + n_execute_packets++; + + prev = insn; + } + } + + end_packet = ss.last_scheduled_iter0; + while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode) + end_packet = PREV_INSN (end_packet); + + /* The earliest cycle in which we can emit the SPKERNEL instruction. */ + loop_earliest = (stages - 1) * sp_ii; + if (loop_earliest > insn_get_clock (end_packet)) + { + n_execute_packets++; + end_packet = loop->loop_end; + } + else + loop_earliest = insn_get_clock (end_packet); + + if (n_execute_packets > 14) + goto restore_loop; + + /* Generate the spkernel instruction, and place it at the appropriate + spot. */ + PUT_MODE (end_packet, VOIDmode); + + insn = gen_spkernel (GEN_INT (stages - 1), + const0_rtx, JUMP_LABEL (loop->loop_end)); + insn = emit_jump_insn_before (insn, end_packet); + JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end); + insn_set_clock (insn, loop_earliest); + PUT_MODE (insn, TImode); + INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false; + delete_insn (loop->loop_end); + + /* Place the mvc and sploop instructions before the loop. */ + entry_bb = entry_edge->src; + + start_sequence (); + + insn = emit_insn (gen_mvilc (loop->iter_reg)); + insn = emit_insn (gen_sploop (GEN_INT (sp_ii))); + + seq = get_insns (); + + if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1) + { + basic_block new_bb; + edge e; + edge_iterator ei; + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU)) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + make_edge (new_bb, loop->head, 0); + } + else + { + entry_after = BB_END (entry_bb); + while (DEBUG_INSN_P (entry_after) + || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK)) + entry_after = PREV_INSN (entry_after); + emit_insn_after (seq, entry_after); + } + + end_sequence (); + + /* Make sure we don't try to schedule this loop again. */ + for (ix = 0; loop->blocks.iterate (ix, &bb); ix++) + bb->flags |= BB_DISABLE_SCHEDULE; + + return true; + + restore_loop: + if (dump_file) + fprintf (dump_file, "Unable to pipeline loop.\n"); + + for (i = 1; i < n_insns; i++) + { + NEXT_INSN (orig_vec[i - 1]) = orig_vec[i]; + PREV_INSN (orig_vec[i]) = orig_vec[i - 1]; + } + PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb)); + NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0]; + NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb)); + PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1]; + BB_HEAD (bb) = orig_vec[0]; + BB_END (bb) = orig_vec[n_insns - 1]; + undo_splits: + free_delay_pairs (); + FOR_BB_INSNS (bb, insn) + if (NONDEBUG_INSN_P (insn)) + undo_split_delayed_nonbranch (insn); + return false; +} + +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the doloop_end pattern + into a subtract and a test. */ +static void +hwloop_fail (hwloop_info loop) +{ + rtx insn, test, testreg; + + if (dump_file) + fprintf (dump_file, "splitting doloop insn %d\n", + INSN_UID (loop->loop_end)); + insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx); + /* See if we can emit the add at the head of the loop rather than at the + end. */ + if (loop->head == NULL + || loop->iter_reg_used_outside + || loop->iter_reg_used + || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg)) + || loop->incoming_dest != loop->head + || EDGE_COUNT (loop->head->preds) != 2) + emit_insn_before (insn, loop->loop_end); + else + { + rtx t = loop->start_label; + while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK) + t = NEXT_INSN (t); + emit_insn_after (insn, t); + } + + testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2)); + if (GET_CODE (testreg) == SCRATCH) + testreg = loop->iter_reg; + else + emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end); + + test = gen_rtx_NE (VOIDmode, testreg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx, + loop->start_label), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); +} + +static struct hw_doloop_hooks c6x_doloop_hooks = +{ + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; + +/* Run the hw-doloop pass to modulo-schedule hardware loops, or split the + doloop_end patterns where such optimizations are impossible. */ +static void +c6x_hwloops (void) +{ + if (optimize) + reorg_loops (true, &c6x_doloop_hooks); +} + +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. We split call insns here + into a sequence that loads the return register and performs the call, + and emit the return label. + If scheduling after reload is requested, it happens here. */ + +static void +c6x_reorg (void) +{ + basic_block bb; + rtx *call_labels; + bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2 + && !maybe_skip_selective_scheduling ()); + + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + df_clear_flags (DF_LR_RUN_DCE); + df_note_add_problem (); + + /* If optimizing, we'll have split before scheduling. */ + if (optimize == 0) + split_all_insns (); + + df_analyze (); + + if (c6x_flag_schedule_insns2) + { + int sz = get_max_uid () * 3 / 2 + 1; + + insn_info.create (sz); + } + + /* Make sure the real-jump insns we create are not deleted. When modulo- + scheduling, situations where a reg is only stored in a loop can also + cause dead code when doing the initial unrolling. */ + sched_no_dce = true; + + c6x_hwloops (); + + if (c6x_flag_schedule_insns2) + { + split_delayed_insns (); + timevar_push (TV_SCHED2); + if (do_selsched) + run_selective_scheduling (); + else + schedule_ebbs (); + conditionalize_after_sched (); + timevar_pop (TV_SCHED2); + + free_delay_pairs (); + } + sched_no_dce = false; + + call_labels = XCNEWVEC (rtx, get_max_uid () + 1); + + reorg_split_calls (call_labels); + + if (c6x_flag_schedule_insns2) + { + FOR_EACH_BB_FN (bb, cfun) + if ((bb->flags & BB_DISABLE_SCHEDULE) == 0) + assign_reservations (BB_HEAD (bb), BB_END (bb)); + } + + if (c6x_flag_var_tracking) + { + timevar_push (TV_VAR_TRACKING); + variable_tracking_main (); + timevar_pop (TV_VAR_TRACKING); + } + + reorg_emit_nops (call_labels); + + /* Post-process the schedule to move parallel insns into SEQUENCEs. */ + if (c6x_flag_schedule_insns2) + { + free_delay_pairs (); + c6x_gen_bundles (); + } + + df_finish_pass (false); +} + +/* Called when a function has been assembled. It should perform all the + tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific + tasks. + We free the reservation (and other scheduling) information here now that + all insns have been output. */ +void +c6x_function_end (FILE *file, const char *fname) +{ + c6x_output_fn_unwind (file); + + insn_info.release (); + + if (!flag_inhibit_size_directive) + ASM_OUTPUT_MEASURED_SIZE (file, fname); +} + +/* Determine whether X is a shift with code CODE and an integer amount + AMOUNT. */ +static bool +shift_p (rtx x, enum rtx_code code, int amount) +{ + return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT + && INTVAL (XEXP (x, 1)) == amount); +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +c6x_rtx_costs (rtx x, int code, int outer_code, int opno, int *total, + bool speed) +{ + int cost2 = COSTS_N_INSNS (1); + rtx op0, op1; + + switch (code) + { + case CONST_INT: + if (outer_code == SET || outer_code == PLUS) + *total = satisfies_constraint_IsB (x) ? 0 : cost2; + else if (outer_code == AND || outer_code == IOR || outer_code == XOR + || outer_code == MINUS) + *total = satisfies_constraint_Is5 (x) ? 0 : cost2; + else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE + || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE) + *total = satisfies_constraint_Iu4 (x) ? 0 : cost2; + else if (outer_code == ASHIFT || outer_code == ASHIFTRT + || outer_code == LSHIFTRT) + *total = satisfies_constraint_Iu5 (x) ? 0 : cost2; + else + *total = cost2; + return true; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + case CONST_DOUBLE: + *total = COSTS_N_INSNS (2); + return true; + + case TRUNCATE: + /* Recognize a mult_highpart operation. */ + if ((GET_MODE (x) == HImode || GET_MODE (x) == SImode) + && GET_CODE (XEXP (x, 0)) == LSHIFTRT + && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (GET_MODE (x)) + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT + && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (GET_MODE (x))) + { + rtx mul = XEXP (XEXP (x, 0), 0); + rtx op0 = XEXP (mul, 0); + rtx op1 = XEXP (mul, 1); + enum rtx_code code0 = GET_CODE (op0); + enum rtx_code code1 = GET_CODE (op1); + + if ((code0 == code1 + && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND)) + || (GET_MODE (x) == HImode + && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND)) + { + if (GET_MODE (x) == HImode) + *total = COSTS_N_INSNS (2); + else + *total = COSTS_N_INSNS (12); + *total += rtx_cost (XEXP (op0, 0), code0, 0, speed); + *total += rtx_cost (XEXP (op1, 0), code1, 0, speed); + return true; + } + } + return false; + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (GET_MODE (x) == DImode) + *total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15); + else + *total = COSTS_N_INSNS (1); + return false; + + case PLUS: + case MINUS: + *total = COSTS_N_INSNS (1); + op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1); + op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0); + if (GET_MODE_SIZE (GET_MODE (x)) <= UNITS_PER_WORD + && INTEGRAL_MODE_P (GET_MODE (x)) + && GET_CODE (op0) == MULT + && GET_CODE (XEXP (op0, 1)) == CONST_INT + && (INTVAL (XEXP (op0, 1)) == 2 + || INTVAL (XEXP (op0, 1)) == 4 + || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8))) + { + *total += rtx_cost (XEXP (op0, 0), ASHIFT, 0, speed); + *total += rtx_cost (op1, (enum rtx_code) code, 1, speed); + return true; + } + return false; + + case MULT: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (GET_MODE (x) == DFmode) + { + if (TARGET_FP) + *total = COSTS_N_INSNS (speed ? 10 : 1); + else + *total = COSTS_N_INSNS (speed ? 200 : 4); + } + else if (GET_MODE (x) == SFmode) + { + if (TARGET_FP) + *total = COSTS_N_INSNS (speed ? 4 : 1); + else + *total = COSTS_N_INSNS (speed ? 100 : 4); + } + else if (GET_MODE (x) == DImode) + { + if (TARGET_MPY32 + && GET_CODE (op0) == GET_CODE (op1) + && (GET_CODE (op0) == ZERO_EXTEND + || GET_CODE (op0) == SIGN_EXTEND)) + { + *total = COSTS_N_INSNS (speed ? 2 : 1); + op0 = XEXP (op0, 0); + op1 = XEXP (op1, 0); + } + else + /* Maybe improve this laster. */ + *total = COSTS_N_INSNS (20); + } + else if (GET_MODE (x) == SImode) + { + if (((GET_CODE (op0) == ZERO_EXTEND + || GET_CODE (op0) == SIGN_EXTEND + || shift_p (op0, LSHIFTRT, 16)) + && (GET_CODE (op1) == SIGN_EXTEND + || GET_CODE (op1) == ZERO_EXTEND + || scst5_operand (op1, SImode) + || shift_p (op1, ASHIFTRT, 16) + || shift_p (op1, LSHIFTRT, 16))) + || (shift_p (op0, ASHIFTRT, 16) + && (GET_CODE (op1) == SIGN_EXTEND + || shift_p (op1, ASHIFTRT, 16)))) + { + *total = COSTS_N_INSNS (speed ? 2 : 1); + op0 = XEXP (op0, 0); + if (scst5_operand (op1, SImode)) + op1 = NULL_RTX; + else + op1 = XEXP (op1, 0); + } + else if (!speed) + *total = COSTS_N_INSNS (1); + else if (TARGET_MPY32) + *total = COSTS_N_INSNS (4); + else + *total = COSTS_N_INSNS (6); + } + else if (GET_MODE (x) == HImode) + *total = COSTS_N_INSNS (speed ? 2 : 1); + + if (GET_CODE (op0) != REG + && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG)) + *total += rtx_cost (op0, MULT, 0, speed); + if (op1 && GET_CODE (op1) != REG + && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG)) + *total += rtx_cost (op1, MULT, 1, speed); + return true; + + case UDIV: + case DIV: + /* This is a bit random; assuming on average there'll be 16 leading + zeros. FIXME: estimate better for constant dividends. */ + *total = COSTS_N_INSNS (6 + 3 * 16); + return false; + + case IF_THEN_ELSE: + /* Recognize the cmp_and/ior patterns. */ + op0 = XEXP (x, 0); + if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE) + && REG_P (XEXP (op0, 0)) + && XEXP (op0, 1) == const0_rtx + && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0))) + { + *total = rtx_cost (XEXP (x, 1), (enum rtx_code) outer_code, + opno, speed); + return false; + } + return false; + + default: + return false; + } +} + +/* Implements target hook vector_mode_supported_p. */ + +static bool +c6x_vector_mode_supported_p (enum machine_mode mode) +{ + switch (mode) + { + case V2HImode: + case V4QImode: + case V2SImode: + case V4HImode: + case V8QImode: + return true; + default: + return false; + } +} + +/* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ +static enum machine_mode +c6x_preferred_simd_mode (enum machine_mode mode) +{ + switch (mode) + { + case HImode: + return V2HImode; + case QImode: + return V4QImode; + + default: + return word_mode; + } +} + +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */ + +static bool +c6x_scalar_mode_supported_p (enum machine_mode mode) +{ + if (ALL_FIXED_POINT_MODE_P (mode) + && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD) + return true; + + return default_scalar_mode_supported_p (mode); +} + +/* Output a reference from a function exception table to the type_info + object X. Output these via a special assembly directive. */ + +static bool +c6x_output_ttype (rtx x) +{ + /* Use special relocations for symbol references. */ + if (GET_CODE (x) != CONST_INT) + fputs ("\t.ehtype\t", asm_out_file); + else + fputs ("\t.word\t", asm_out_file); + output_addr_const (asm_out_file, x); + fputc ('\n', asm_out_file); + + return TRUE; +} + +/* Modify the return address of the current function. */ + +void +c6x_set_return_address (rtx source, rtx scratch) +{ + struct c6x_frame frame; + rtx addr; + HOST_WIDE_INT offset; + + c6x_compute_frame_layout (&frame); + if (! c6x_save_reg (RETURN_ADDR_REGNO)) + emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source); + else + { + + if (frame_pointer_needed) + { + addr = hard_frame_pointer_rtx; + offset = frame.b3_offset; + } + else + { + addr = stack_pointer_rtx; + offset = frame.to_allocate - frame.b3_offset; + } + + /* TODO: Use base+offset loads where possible. */ + if (offset) + { + HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode); + + emit_insn (gen_movsi_high (scratch, GEN_INT (low))); + if (low != offset) + emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset))); + emit_insn (gen_addsi3 (scratch, addr, scratch)); + addr = scratch; + } + + emit_move_insn (gen_frame_mem (Pmode, addr), source); + } +} + +/* We save pairs of registers using a DImode store. Describe the component + registers for DWARF generation code. */ + +static rtx +c6x_dwarf_register_span (rtx rtl) +{ + unsigned regno; + unsigned real_regno; + int nregs; + int i; + rtx p; + + regno = REGNO (rtl); + nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl)); + if (nregs == 1) + return NULL_RTX; + + p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs)); + for (i = 0; i < nregs; i++) + { + if (TARGET_BIG_ENDIAN) + real_regno = regno + nregs - (i + 1); + else + real_regno = regno + i; + + XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno); + } + + return p; +} + +/* Codes for all the C6X builtins. */ +enum c6x_builtins +{ + C6X_BUILTIN_SADD, + C6X_BUILTIN_SSUB, + C6X_BUILTIN_ADD2, + C6X_BUILTIN_SUB2, + C6X_BUILTIN_ADD4, + C6X_BUILTIN_SUB4, + C6X_BUILTIN_SADD2, + C6X_BUILTIN_SSUB2, + C6X_BUILTIN_SADDU4, + + C6X_BUILTIN_SMPY, + C6X_BUILTIN_SMPYH, + C6X_BUILTIN_SMPYHL, + C6X_BUILTIN_SMPYLH, + C6X_BUILTIN_MPY2, + C6X_BUILTIN_SMPY2, + + C6X_BUILTIN_CLRR, + C6X_BUILTIN_EXTR, + C6X_BUILTIN_EXTRU, + + C6X_BUILTIN_SSHL, + C6X_BUILTIN_SUBC, + C6X_BUILTIN_ABS, + C6X_BUILTIN_ABS2, + C6X_BUILTIN_AVG2, + C6X_BUILTIN_AVGU4, + + C6X_BUILTIN_MAX +}; + + +static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX]; + +/* Return the C6X builtin for CODE. */ +static tree +c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) +{ + if (code >= C6X_BUILTIN_MAX) + return error_mark_node; + + return c6x_builtin_decls[code]; +} + +#define def_builtin(NAME, TYPE, CODE) \ +do { \ + tree bdecl; \ + bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ + NULL, NULL_TREE); \ + c6x_builtin_decls[CODE] = bdecl; \ +} while (0) + +/* Set up all builtin functions for this target. */ +static void +c6x_init_builtins (void) +{ + tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4); + tree V2HI_type_node = build_vector_type (intHI_type_node, 2); + tree V2SI_type_node = build_vector_type (intSI_type_node, 2); + tree int_ftype_int + = build_function_type_list (integer_type_node, integer_type_node, + NULL_TREE); + tree int_ftype_int_int + = build_function_type_list (integer_type_node, integer_type_node, + integer_type_node, NULL_TREE); + tree v2hi_ftype_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v4qi_ftype_v4qi_v4qi + = build_function_type_list (V4QI_type_node, V4QI_type_node, + V4QI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi + = build_function_type_list (V2HI_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + tree v2si_ftype_v2hi_v2hi + = build_function_type_list (V2SI_type_node, V2HI_type_node, + V2HI_type_node, NULL_TREE); + + def_builtin ("__builtin_c6x_sadd", int_ftype_int_int, + C6X_BUILTIN_SADD); + def_builtin ("__builtin_c6x_ssub", int_ftype_int_int, + C6X_BUILTIN_SSUB); + def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi, + C6X_BUILTIN_ADD2); + def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi, + C6X_BUILTIN_SUB2); + def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi, + C6X_BUILTIN_ADD4); + def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi, + C6X_BUILTIN_SUB4); + def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi, + C6X_BUILTIN_MPY2); + def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi, + C6X_BUILTIN_SADD2); + def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi, + C6X_BUILTIN_SSUB2); + def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi, + C6X_BUILTIN_SADDU4); + def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi, + C6X_BUILTIN_SMPY2); + + def_builtin ("__builtin_c6x_smpy", int_ftype_int_int, + C6X_BUILTIN_SMPY); + def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int, + C6X_BUILTIN_SMPYH); + def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int, + C6X_BUILTIN_SMPYHL); + def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int, + C6X_BUILTIN_SMPYLH); + + def_builtin ("__builtin_c6x_sshl", int_ftype_int_int, + C6X_BUILTIN_SSHL); + def_builtin ("__builtin_c6x_subc", int_ftype_int_int, + C6X_BUILTIN_SUBC); + + def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi, + C6X_BUILTIN_AVG2); + def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi, + C6X_BUILTIN_AVGU4); + + def_builtin ("__builtin_c6x_clrr", int_ftype_int_int, + C6X_BUILTIN_CLRR); + def_builtin ("__builtin_c6x_extr", int_ftype_int_int, + C6X_BUILTIN_EXTR); + def_builtin ("__builtin_c6x_extru", int_ftype_int_int, + C6X_BUILTIN_EXTRU); + + def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS); + def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2); +} + + +struct builtin_description +{ + const enum insn_code icode; + const char *const name; + const enum c6x_builtins code; +}; + +static const struct builtin_description bdesc_2arg[] = +{ + { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD }, + { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB }, + { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 }, + { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 }, + { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 }, + { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 }, + { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 }, + { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 }, + { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 }, + + { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC }, + { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL }, + + { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 }, + { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 }, + + { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY }, + { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH }, + { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH }, + { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL }, + + { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 }, + + { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR }, + { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR }, + { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU } +}; + +static const struct builtin_description bdesc_1arg[] = +{ + { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS }, + { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 } +}; + +/* Errors in the source file can cause expand_expr to return const0_rtx + where we expect a vector. To avoid crashing, use one of the vector + clear instructions. */ +static rtx +safe_vector_operand (rtx x, enum machine_mode mode) +{ + if (x != const0_rtx) + return x; + x = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (x, CONST0_RTX (SImode))); + return gen_lowpart (mode, x); +} + +/* Subroutine of c6x_expand_builtin to take care of binop insns. MACFLAG is -1 + if this is a normal binary op, or one of the MACFLAG_xxx constants. */ + +static rtx +c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target, + bool match_op) +{ + int offs = match_op ? 1 : 0; + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode op1mode = GET_MODE (op1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1 + offs].mode; + enum machine_mode mode1 = insn_data[icode].operand[2 + offs].mode; + rtx ret = target; + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + if (VECTOR_MODE_P (mode1)) + op1 = safe_vector_operand (op1, mode1); + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + { + if (tmode == SQmode || tmode == V2SQmode) + { + ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode); + target = gen_lowpart (tmode, ret); + } + else + target = gen_reg_rtx (tmode); + } + + if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode) + && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode)) + { + op0mode = mode0; + op0 = gen_lowpart (mode0, op0); + } + if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode) + && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode)) + { + op1mode = mode1; + op1 = gen_lowpart (mode1, op1); + } + /* In case the insn wants input operands in modes different from + the result, abort. */ + gcc_assert ((op0mode == mode0 || op0mode == VOIDmode) + && (op1mode == mode1 || op1mode == VOIDmode)); + + if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + if (match_op) + pat = GEN_FCN (icode) (target, target, op0, op1); + else + pat = GEN_FCN (icode) (target, op0, op1); + + if (! pat) + return 0; + + emit_insn (pat); + + return ret; +} + +/* Subroutine of c6x_expand_builtin to take care of unop insns. */ + +static rtx +c6x_expand_unop_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + enum machine_mode op0mode = GET_MODE (op0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (VECTOR_MODE_P (mode0)) + op0 = safe_vector_operand (op0, mode0); + + if (op0mode == SImode && mode0 == HImode) + { + op0mode = HImode; + op0 = gen_lowpart (HImode, op0); + } + gcc_assert (op0mode == mode0 || op0mode == VOIDmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (target, op0); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, + rtx subtarget ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) +{ + size_t i; + const struct builtin_description *d; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + return c6x_expand_binop_builtin (d->icode, exp, target, + fcode == C6X_BUILTIN_CLRR); + + for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++) + if (d->code == fcode) + return c6x_expand_unop_builtin (d->icode, exp, target); + + gcc_unreachable (); +} + +/* Target unwind frame info is generated from dwarf CFI directives, so + always output dwarf2 unwind info. */ + +static enum unwind_info_type +c6x_debug_unwind_info (void) +{ + if (flag_unwind_tables || flag_exceptions) + return UI_DWARF2; + + return default_debug_unwind_info (); +} + +/* Target Structure. */ + +/* Initialize the GCC target structure. */ +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG c6x_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance +#undef TARGET_FUNCTION_ARG_BOUNDARY +#define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary +#undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY +#define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \ + c6x_function_arg_round_boundary +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE c6x_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE c6x_libcall_value +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY c6x_return_in_memory +#undef TARGET_RETURN_IN_MSB +#define TARGET_RETURN_IN_MSB c6x_return_in_msb +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference +#undef TARGET_CALLEE_COPIES +#define TARGET_CALLEE_COPIES c6x_callee_copies +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx +#undef TARGET_FUNCTION_OK_FOR_SIBCALL +#define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk + +#undef TARGET_BUILD_BUILTIN_VA_LIST +#define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline + +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p + +#undef TARGET_IN_SMALL_DATA_P +#define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION c6x_select_rtx_section +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION c6x_elf_select_section +#undef TARGET_ASM_UNIQUE_SECTION +#define TARGET_ASM_UNIQUE_SECTION c6x_elf_unique_section +#undef TARGET_SECTION_TYPE_FLAGS +#define TARGET_SECTION_TYPE_FLAGS c6x_section_type_flags +#undef TARGET_HAVE_SRODATA_SECTION +#define TARGET_HAVE_SRODATA_SECTION true +#undef TARGET_ASM_MERGEABLE_RODATA_PREFIX +#define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const" + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE c6x_option_override +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS c6x_init_libfuncs +#undef TARGET_LIBFUNC_GNU_PREFIX +#define TARGET_LIBFUNC_GNU_PREFIX true + +#undef TARGET_SCALAR_MODE_SUPPORTED_P +#define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode + +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS c6x_rtx_costs + +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT c6x_sched_init +#undef TARGET_SCHED_SET_SCHED_FLAGS +#define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags +#undef TARGET_SCHED_ADJUST_COST +#define TARGET_SCHED_ADJUST_COST c6x_adjust_cost +#undef TARGET_SCHED_ISSUE_RATE +#define TARGET_SCHED_ISSUE_RATE c6x_issue_rate +#undef TARGET_SCHED_VARIABLE_ISSUE +#define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER c6x_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 c6x_sched_reorder2 +#undef TARGET_SCHED_DFA_NEW_CYCLE +#define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle +#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN +#define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn +#undef TARGET_SCHED_EXPOSED_PIPELINE +#define TARGET_SCHED_EXPOSED_PIPELINE true + +#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT +#define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context +#undef TARGET_SCHED_INIT_SCHED_CONTEXT +#define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context +#undef TARGET_SCHED_SET_SCHED_CONTEXT +#define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context +#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT +#define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context +#undef TARGET_SCHED_FREE_SCHED_CONTEXT +#define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE c6x_can_eliminate + +#undef TARGET_PREFERRED_RENAME_CLASS +#define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class + +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START c6x_file_start + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND c6x_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p + +/* C6x unwinding tables use a different format for the typeinfo tables. */ +#undef TARGET_ASM_TTYPE +#define TARGET_ASM_TTYPE c6x_output_ttype + +/* The C6x ABI follows the ARM EABI exception handling rules. */ +#undef TARGET_ARM_EABI_UNWINDER +#define TARGET_ARM_EABI_UNWINDER true + +#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY +#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality + +#undef TARGET_ASM_INIT_SECTIONS +#define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections + +#undef TARGET_DEBUG_UNWIND_INFO +#define TARGET_DEBUG_UNWIND_INFO c6x_debug_unwind_info + +#undef TARGET_DWARF_REGISTER_SPAN +#define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS c6x_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN c6x_expand_builtin +#undef TARGET_BUILTIN_DECL +#define TARGET_BUILTIN_DECL c6x_builtin_decl + +struct gcc_target targetm = TARGET_INITIALIZER; + +#include "gt-c6x.h" diff --git a/gcc-4.9/gcc/config/c6x/c6x.h b/gcc-4.9/gcc/config/c6x/c6x.h new file mode 100644 index 000000000..e0a60a971 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x.h @@ -0,0 +1,618 @@ +/* Target Definitions for TI C6X. + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Andrew Jenner + Contributed by Bernd Schmidt + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_C6X_H +#define GCC_C6X_H + +/* Feature bit definitions that enable specific insns. */ +#define C6X_INSNS_C62X 1 +#define C6X_INSNS_C64X 2 +#define C6X_INSNS_C64XP 4 +#define C6X_INSNS_C67X 8 +#define C6X_INSNS_C67XP 16 +#define C6X_INSNS_C674X 32 +#define C6X_INSNS_ATOMIC 64 +#define C6X_INSNS_ALL_CPU_BITS 127 + +#define C6X_DEFAULT_INSN_MASK \ + (C6X_INSNS_C62X | C6X_INSNS_C64X | C6X_INSNS_C64XP) + +/* A mask of allowed insn types, as defined above. */ +extern unsigned long c6x_insn_mask; + +/* Value of -march= */ +extern c6x_cpu_t c6x_arch; +#define C6X_DEFAULT_ARCH C6X_CPU_C64XP + +/* True if the target has C64x instructions. */ +#define TARGET_INSNS_64 ((c6x_insn_mask & C6X_INSNS_C64X) != 0) +/* True if the target has C64x+ instructions. */ +#define TARGET_INSNS_64PLUS ((c6x_insn_mask & C6X_INSNS_C64XP) != 0) +/* True if the target has C67x instructions. */ +#define TARGET_INSNS_67 ((c6x_insn_mask & C6X_INSNS_C67X) != 0) +/* True if the target has C67x+ instructions. */ +#define TARGET_INSNS_67PLUS ((c6x_insn_mask & C6X_INSNS_C67XP) != 0) + +/* True if the target supports doubleword loads. */ +#define TARGET_LDDW (TARGET_INSNS_64 || TARGET_INSNS_67) +/* True if the target supports doubleword loads. */ +#define TARGET_STDW TARGET_INSNS_64 +/* True if the target supports the MPY32 family of instructions. */ +#define TARGET_MPY32 TARGET_INSNS_64PLUS +/* True if the target has floating point hardware. */ +#define TARGET_FP TARGET_INSNS_67 +/* True if the target has C67x+ floating point extensions. */ +#define TARGET_FP_EXT TARGET_INSNS_67PLUS + +#define TARGET_DEFAULT 0 + +/* Run-time Target. */ + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_assert ("machine=tic6x"); \ + builtin_assert ("cpu=tic6x"); \ + builtin_define ("__TMS320C6X__"); \ + builtin_define ("_TMS320C6X"); \ + \ + if (TARGET_DSBT) \ + builtin_define ("__DSBT__"); \ + \ + if (TARGET_BIG_ENDIAN) \ + builtin_define ("_BIG_ENDIAN"); \ + else \ + builtin_define ("_LITTLE_ENDIAN"); \ + \ + switch (c6x_arch) \ + { \ + case C6X_CPU_C62X: \ + builtin_define ("_TMS320C6200"); \ + break; \ + \ + case C6X_CPU_C64XP: \ + builtin_define ("_TMS320C6400_PLUS"); \ + /* ... fall through ... */ \ + case C6X_CPU_C64X: \ + builtin_define ("_TMS320C6400"); \ + break; \ + \ + case C6X_CPU_C67XP: \ + builtin_define ("_TMS320C6700_PLUS"); \ + /* ... fall through ... */ \ + case C6X_CPU_C67X: \ + builtin_define ("_TMS320C6700"); \ + break; \ + \ + case C6X_CPU_C674X: \ + builtin_define ("_TMS320C6740"); \ + builtin_define ("_TMS320C6700_PLUS"); \ + builtin_define ("_TMS320C6700"); \ + builtin_define ("_TMS320C6400_PLUS"); \ + builtin_define ("_TMS320C6400"); \ + break; \ + } \ + } while (0) + +#define OPTION_DEFAULT_SPECS \ + {"arch", "%{!march=*:-march=%(VALUE)}" } + +/* Storage Layout. */ + +#define BITS_BIG_ENDIAN 0 +#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) +#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0) + +#define REG_WORDS_BIG_ENDIAN 0 + +#define UNITS_PER_WORD 4 +#define PARM_BOUNDARY 8 +#define STACK_BOUNDARY 64 +#define FUNCTION_BOUNDARY 32 +#define BIGGEST_ALIGNMENT 64 +#define STRICT_ALIGNMENT 1 + +/* The ABI requires static arrays must be at least 8 byte aligned. + Really only externally visible arrays must be aligned this way, as + only those are directly visible from another compilation unit. But + we don't have that information available here. */ +#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \ + (((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE) \ + ? BITS_PER_UNIT * 8 : (ALIGN)) + +/* Type Layout. */ + +#define DEFAULT_SIGNED_CHAR 1 + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* Registers. */ + +#define FIRST_PSEUDO_REGISTER 67 +#define FIXED_REGISTERS \ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1} +#define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ + 1, 1, 1} + +/* This lists call-used non-predicate registers first, followed by call-used + registers, followed by predicate registers. We want to avoid allocating + the predicate registers for other uses as much as possible. */ +#define REG_ALLOC_ORDER \ + { \ + REG_A0, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7, REG_A8, REG_A9, \ + REG_A16, REG_A17, REG_A18, REG_A19, REG_A20, REG_A21, REG_A22, REG_A23, \ + REG_A24, REG_A25, REG_A26, REG_A27, REG_A28, REG_A29, REG_A30, REG_A31, \ + REG_B4, REG_B5, REG_B6, REG_B7, REG_B8, REG_B9, REG_B16, \ + REG_B17, REG_B18, REG_B19, REG_B20, REG_B21, REG_B22, REG_B23, REG_B24, \ + REG_B25, REG_B26, REG_B27, REG_B28, REG_B29, REG_B30, REG_B31, \ + REG_A10, REG_A11, REG_A12, REG_A13, REG_A14, REG_A15, \ + REG_B3, REG_B10, REG_B11, REG_B12, REG_B13, REG_B14, REG_B15, \ + REG_A1, REG_A2, REG_B0, REG_B1, REG_B2, REG_ILC \ + } + +#define HARD_REGNO_NREGS(regno, mode) \ + ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) \ + / UNITS_PER_WORD) + +#define HARD_REGNO_MODE_OK(reg, mode) (GET_MODE_SIZE (mode) <= UNITS_PER_WORD \ + ? 1 : ((reg) & 1) == 0) + +#define MODES_TIEABLE_P(mode1, mode2) \ + ((mode1) == (mode2) || \ + (GET_MODE_SIZE (mode1) <= UNITS_PER_WORD && \ + GET_MODE_SIZE (mode2) <= UNITS_PER_WORD)) + + +/* Register Classes. */ + +enum reg_class + { + NO_REGS, + PREDICATE_A_REGS, + PREDICATE_B_REGS, + PREDICATE_REGS, + PICREG, + SPREG, + CALL_USED_B_REGS, + NONPREDICATE_A_REGS, + NONPREDICATE_B_REGS, + NONPREDICATE_REGS, + A_REGS, + B_REGS, + GENERAL_REGS, + ALL_REGS, + LIM_REG_CLASSES + }; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES { \ + "NO_REGS", \ + "PREDICATE_A_REGS", \ + "PREDICATE_B_REGS", \ + "PREDICATE_REGS", \ + "PICREG", \ + "SPREG", \ + "CALL_USED_B_REGS", \ + "NONPREDICATE_A_REGS", \ + "NONPREDICATE_B_REGS", \ + "NONPREDICATE_REGS", \ + "A_REGS", \ + "B_REGS", \ + "GENERAL_REGS", \ + "ALL_REGS" } + +#define REG_CLASS_CONTENTS \ +{ \ + /* NO_REGS. */ \ + { 0x00000000, 0x00000000, 0 }, \ + /* PREDICATE_A_REGS. */ \ + { 0x00000006, 0x00000000, 0 }, \ + /* PREDICATE_B_REGS. */ \ + { 0x00000000, 0x00000007, 0 }, \ + /* PREDICATE_REGS. */ \ + { 0x00000006, 0x00000007, 0 }, \ + /* PICREG. */ \ + { 0x00000000, 0x00004000, 0 }, \ + /* SPREG. */ \ + { 0x00000000, 0x00008000, 0 }, \ + /* CALL_USED_B_REGS. */ \ + { 0x00000000, 0xFFFF03FF, 0 }, \ + /* NONPREDICATE_A_REGS. */ \ + { 0xFFFFFFF9, 0x00000000, 0 }, \ + /* NONPREDICATE_B_REGS. */ \ + { 0x00000000, 0xFFFFFFF8, 0 }, \ + /* NONPREDICATE_REGS. */ \ + { 0xFFFFFFF9, 0xFFFFFFF8, 0 }, \ + /* A_REGS. */ \ + { 0xFFFFFFFF, 0x00000000, 3 }, \ + /* B_REGS. */ \ + { 0x00000000, 0xFFFFFFFF, 3 }, \ + /* GENERAL_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 3 }, \ + /* ALL_REGS. */ \ + { 0xFFFFFFFF, 0xFFFFFFFF, 7 }, \ +} + +#define A_REGNO_P(N) ((N) <= REG_A31) +#define B_REGNO_P(N) ((N) >= REG_B0 && (N) <= REG_B31) + +#define A_REG_P(X) (REG_P (X) && A_REGNO_P (REGNO (X))) +#define CROSS_OPERANDS(X0,X1) \ + (A_REG_P (X0) == A_REG_P (X1) ? CROSS_N : CROSS_Y) + +#define REGNO_REG_CLASS(reg) \ + ((reg) >= REG_A1 && (reg) <= REG_A2 ? PREDICATE_A_REGS \ + : (reg) == REG_A0 && TARGET_INSNS_64 ? PREDICATE_A_REGS \ + : (reg) >= REG_B0 && (reg) <= REG_B2 ? PREDICATE_B_REGS \ + : A_REGNO_P (reg) ? NONPREDICATE_A_REGS \ + : call_used_regs[reg] ? CALL_USED_B_REGS : B_REGS) + +#define BASE_REG_CLASS ALL_REGS +#define INDEX_REG_CLASS ALL_REGS + +#define REGNO_OK_FOR_BASE_STRICT_P(X) \ + ((X) < FIRST_PSEUDO_REGISTER \ + || (reg_renumber[X] >= 0 && reg_renumber[X] < FIRST_PSEUDO_REGISTER)) +#define REGNO_OK_FOR_BASE_NONSTRICT_P(X) 1 + +#define REGNO_OK_FOR_INDEX_STRICT_P(X) \ + ((X) < FIRST_PSEUDO_REGISTER \ + || (reg_renumber[X] >= 0 && reg_renumber[X] < FIRST_PSEUDO_REGISTER)) +#define REGNO_OK_FOR_INDEX_NONSTRICT_P(X) 1 + +#ifdef REG_OK_STRICT +#define REGNO_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_STRICT_P (X) +#define REGNO_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_STRICT_P (X) +#else +#define REGNO_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_NONSTRICT_P (X) +#define REGNO_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_NONSTRICT_P (X) +#endif + +#define CLASS_MAX_NREGS(class, mode) \ + ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +#define REGNO_OK_FOR_INDIRECT_JUMP_P(REGNO, MODE) B_REGNO_P (REGNO) + +/* Stack and Calling. */ + +/* SP points to 4 bytes below the first word of the frame. */ +#define STACK_POINTER_OFFSET 4 +/* Likewise for AP (which is the incoming stack pointer). */ +#define FIRST_PARM_OFFSET(fundecl) 4 +#define STARTING_FRAME_OFFSET 0 +#define FRAME_GROWS_DOWNWARD 1 +#define STACK_GROWS_DOWNWARD + +#define STACK_POINTER_REGNUM REG_B15 +#define HARD_FRAME_POINTER_REGNUM REG_A15 +/* These two always get eliminated in favour of the stack pointer + or the hard frame pointer. */ +#define FRAME_POINTER_REGNUM REG_FRAME +#define ARG_POINTER_REGNUM REG_ARGP + +#define PIC_OFFSET_TABLE_REGNUM REG_B14 + +/* We keep the stack pointer constant rather than using push/pop + instructions. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Before the prologue, the return address is in the B3 register. */ +#define RETURN_ADDR_REGNO REG_B3 +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, RETURN_ADDR_REGNO) +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (RETURN_ADDR_REGNO) + +#define RETURN_ADDR_RTX(COUNT, FRAME) c6x_return_addr_rtx (COUNT) + +#define INCOMING_FRAME_SP_OFFSET 0 +#define ARG_POINTER_CFA_OFFSET(fundecl) 0 + +#define STATIC_CHAIN_REGNUM REG_A2 + +struct c6x_args { + /* Number of arguments to pass in registers. */ + int nregs; + /* Number of arguments passed in registers so far. */ + int count; +}; + +#define CUMULATIVE_ARGS struct c6x_args + +#define INIT_CUMULATIVE_ARGS(cum, fntype, libname, fndecl, n_named_args) \ + c6x_init_cumulative_args (&cum, fntype, libname, n_named_args) + +#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \ + (c6x_block_reg_pad_upward (MODE, TYPE, FIRST) ? upward : downward) + +#define FUNCTION_ARG_REGNO_P(r) \ + (((r) >= REG_A4 && (r) <= REG_A13) || ((r) >= REG_B4 && (r) <= REG_B13)) + +#define DEFAULT_PCC_STRUCT_RETURN 0 + +#define FUNCTION_PROFILER(file, labelno) \ + fatal_error ("profiling is not yet implemented for this architecture") + + +/* Trampolines. */ +#define TRAMPOLINE_SIZE 32 +#define TRAMPOLINE_ALIGNMENT 256 + +#define ELIMINABLE_REGS \ +{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} \ + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = c6x_initial_elimination_offset ((FROM), (TO))) + +/* Addressing Modes. */ + +#define CONSTANT_ADDRESS_P(x) (CONSTANT_P(x) && GET_CODE(x) != CONST_DOUBLE) +#define MAX_REGS_PER_ADDRESS 2 + +#define HAVE_PRE_DECREMENT 1 +#define HAVE_POST_DECREMENT 1 +#define HAVE_PRE_INCREMENT 1 +#define HAVE_POST_INCREMENT 1 + +/* Register forms are available, but due to scaling we currently don't + support them. */ +#define HAVE_PRE_MODIFY_DISP 1 +#define HAVE_POST_MODIFY_DISP 1 + +#define LEGITIMATE_PIC_OPERAND_P(X) \ + (!symbolic_operand (X, SImode)) + +struct GTY(()) machine_function +{ + /* True if we expanded a sibling call. */ + int contains_sibcall; +}; + +/* Costs. */ +#define NO_FUNCTION_CSE 1 + +#define SLOW_BYTE_ACCESS 0 + +#define BRANCH_COST(speed_p, predictable_p) 6 + + +/* Model costs for the vectorizer. */ + +/* Cost of conditional branch. */ +#ifndef TARG_COND_BRANCH_COST +#define TARG_COND_BRANCH_COST 6 +#endif + +/* Cost of any scalar operation, excluding load and store. */ +#ifndef TARG_SCALAR_STMT_COST +#define TARG_SCALAR_STMT_COST 1 +#endif + +/* Cost of scalar load. */ +#undef TARG_SCALAR_LOAD_COST +#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */ + +/* Cost of scalar store. */ +#undef TARG_SCALAR_STORE_COST +#define TARG_SCALAR_STORE_COST 10 + +/* Cost of any vector operation, excluding load, store, + or vector to scalar operation. */ +#undef TARG_VEC_STMT_COST +#define TARG_VEC_STMT_COST 1 + +/* Cost of vector to scalar operation. */ +#undef TARG_VEC_TO_SCALAR_COST +#define TARG_VEC_TO_SCALAR_COST 1 + +/* Cost of scalar to vector operation. */ +#undef TARG_SCALAR_TO_VEC_COST +#define TARG_SCALAR_TO_VEC_COST 1 + +/* Cost of aligned vector load. */ +#undef TARG_VEC_LOAD_COST +#define TARG_VEC_LOAD_COST 1 + +/* Cost of misaligned vector load. */ +#undef TARG_VEC_UNALIGNED_LOAD_COST +#define TARG_VEC_UNALIGNED_LOAD_COST 2 + +/* Cost of vector store. */ +#undef TARG_VEC_STORE_COST +#define TARG_VEC_STORE_COST 1 + +/* Cost of vector permutation. */ +#ifndef TARG_VEC_PERMUTE_COST +#define TARG_VEC_PERMUTE_COST 1 +#endif + +/* ttype entries (the only interesting data references used) are + sb-relative got-indirect (aka .ehtype). */ +#define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \ + (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \ + : DW_EH_PE_absptr) + +/* This should be the same as the definition in elfos.h, plus the call + to output special unwinding directives. */ +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + c6x_output_file_unwind (FILE); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } \ + while (0) + +/* This should be the same as the definition in elfos.h, plus the call + to output special unwinding directives. */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \ + c6x_function_end (STREAM, NAME) + +/* Arbitrarily choose A4/A5. */ +#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N) + 4 : INVALID_REGNUM) + +/* The register that holds the return address in exception handlers. */ +#define C6X_EH_STACKADJ_REGNUM 3 +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, C6X_EH_STACKADJ_REGNUM) + + +/* Assembler Format. */ + +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + +#undef ASM_APP_ON +#define ASM_APP_ON "\t; #APP \n" +#undef ASM_APP_OFF +#define ASM_APP_OFF "\t; #NO_APP \n" + +#define ASM_OUTPUT_COMMON(stream, name, size, rounded) +#define ASM_OUTPUT_LOCAL(stream, name, size, rounded) + +#define GLOBAL_ASM_OP "\t.global\t" + +#define REGISTER_NAMES \ + { \ + "A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", \ + "A8", "A9", "A10", "A11", "A12", "A13", "A14", "A15", \ + "A16", "A17", "A18", "A19", "A20", "A21", "A22", "A23", \ + "A24", "A25", "A26", "A27", "A28", "A29", "A30", "A31", \ + "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", \ + "B8", "B9", "B10", "B11", "B12", "B13", "B14", "B15", \ + "B16", "B17", "B18", "B19", "B20", "B21", "B22", "B23", \ + "B24", "B25", "B26", "B27", "B28", "B29", "B30", "B31", \ + "FP", "ARGP", "ILC" } + +#define DBX_REGISTER_NUMBER(N) (dbx_register_map[(N)]) + +extern unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER]; + +#define FINAL_PRESCAN_INSN c6x_final_prescan_insn + +#define TEXT_SECTION_ASM_OP ".text;" +#define DATA_SECTION_ASM_OP ".data;" + +#define ASM_OUTPUT_ALIGN(stream, power) \ + do \ + { \ + if (power) \ + fprintf ((stream), "\t.align\t%d\n", power); \ + } \ + while (0) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { char __buf[256]; \ + fprintf (FILE, "\t.long\t"); \ + ASM_GENERATE_INTERNAL_LABEL (__buf, "L", VALUE); \ + assemble_name (FILE, __buf); \ + fputc ('\n', FILE); \ + } while (0) + +/* Determine whether to place EXP (an expression or a decl) should be + placed into one of the small data sections. */ +#define PLACE_IN_SDATA_P(EXP) \ + (c6x_sdata_mode == C6X_SDATA_NONE ? false \ + : c6x_sdata_mode == C6X_SDATA_ALL ? true \ + : !AGGREGATE_TYPE_P (TREE_TYPE (EXP))) + +#define SCOMMON_ASM_OP "\t.scomm\t" + +#undef ASM_OUTPUT_ALIGNED_DECL_COMMON +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + do \ + { \ + if (DECL != NULL && PLACE_IN_SDATA_P (DECL)) \ + fprintf ((FILE), "%s", SCOMMON_ASM_OP); \ + else \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (ALIGN) / BITS_PER_UNIT);\ + } \ + while (0) + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. */ + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ +do { \ + if (PLACE_IN_SDATA_P (DECL)) \ + switch_to_section (sbss_section); \ + else \ + switch_to_section (bss_section); \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE); \ + ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT)); \ + ASM_OUTPUT_LABEL(FILE, NAME); \ + ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1); \ +} while (0) + +#define CASE_VECTOR_PC_RELATIVE flag_pic +#define JUMP_TABLES_IN_TEXT_SECTION flag_pic + +#define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2) + +/* This is how to output an element of a case-vector that is relative. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + do { char buf[100]; \ + fputs ("\t.long ", FILE); \ + ASM_GENERATE_INTERNAL_LABEL (buf, "L", VALUE); \ + assemble_name (FILE, buf); \ + putc ('-', FILE); \ + ASM_GENERATE_INTERNAL_LABEL (buf, "L", REL); \ + assemble_name (FILE, buf); \ + putc ('\n', FILE); \ + } while (0) + +/* Misc. */ + +#define CASE_VECTOR_MODE SImode +#define MOVE_MAX 4 +#define MOVE_RATIO(SPEED) 4 +#define TRULY_NOOP_TRUNCATION(outprec, inprec) 1 +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define Pmode SImode +#define FUNCTION_MODE QImode + +#define CPU_UNITS_QUERY 1 + +extern int c6x_initial_flag_pic; + +#endif /* GCC_C6X_H */ diff --git a/gcc-4.9/gcc/config/c6x/c6x.md b/gcc-4.9/gcc/config/c6x/c6x.md new file mode 100644 index 000000000..53032b1f0 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x.md @@ -0,0 +1,3136 @@ +;; Machine description for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Andrew Jenner +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; Register names + +(define_constants + [(REG_A0 0) + (REG_A1 1) + (REG_A2 2) + (REG_A3 3) + (REG_A4 4) + (REG_A5 5) + (REG_A6 6) + (REG_A7 7) + (REG_A8 8) + (REG_A9 9) + (REG_A10 10) + (REG_A11 11) + (REG_A12 12) + (REG_A13 13) + (REG_A14 14) + (REG_A15 15) + (REG_A16 16) + (REG_A17 17) + (REG_A18 18) + (REG_A19 19) + (REG_A20 20) + (REG_A21 21) + (REG_A22 22) + (REG_A23 23) + (REG_A24 24) + (REG_A25 25) + (REG_A26 26) + (REG_A27 27) + (REG_A28 28) + (REG_A29 29) + (REG_A30 30) + (REG_A31 31) + (REG_B0 32) + (REG_B1 33) + (REG_B2 34) + (REG_B3 35) + (REG_B4 36) + (REG_B5 37) + (REG_B6 38) + (REG_B7 39) + (REG_B8 40) + (REG_B9 41) + (REG_B10 42) + (REG_B11 43) + (REG_B12 44) + (REG_B13 45) + (REG_B14 46) + (REG_SP 47) + (REG_B15 47) + (REG_B16 48) + (REG_B17 49) + (REG_B18 50) + (REG_B19 51) + (REG_B20 52) + (REG_B21 53) + (REG_B22 54) + (REG_B23 55) + (REG_B24 56) + (REG_B25 57) + (REG_B26 58) + (REG_B27 59) + (REG_B28 60) + (REG_B29 61) + (REG_B30 62) + (REG_B31 63) + (REG_FRAME 64) + (REG_ARGP 65) + (REG_ILC 66)]) + +(define_c_enum "unspec" [ + UNSPEC_NOP + UNSPEC_RCP + UNSPEC_MISALIGNED_ACCESS + UNSPEC_ADDKPC + UNSPEC_SETUP_DSBT + UNSPEC_LOAD_GOT + UNSPEC_LOAD_SDATA + UNSPEC_BITREV + UNSPEC_GOTOFF + UNSPEC_MVILC + UNSPEC_REAL_JUMP + UNSPEC_REAL_LOAD + UNSPEC_REAL_MULT + UNSPEC_JUMP_SHADOW + UNSPEC_LOAD_SHADOW + UNSPEC_MULT_SHADOW + UNSPEC_EPILOGUE_BARRIER + UNSPEC_ATOMIC + UNSPEC_CLR + UNSPEC_EXT + UNSPEC_EXTU + UNSPEC_SUBC + UNSPEC_AVG +]) + +(define_c_enum "unspecv" [ + UNSPECV_BLOCKAGE + UNSPECV_SPLOOP + UNSPECV_SPKERNEL + UNSPECV_EH_RETURN + UNSPECV_CAS +]) + +;; ------------------------------------------------------------------------- +;; Instruction attributes +;; ------------------------------------------------------------------------- + +(define_attr "cpu" + "c62x,c64x,c64xp,c67x,c67xp,c674x" + (const (symbol_ref "(enum attr_cpu)c6x_arch"))) + +;; Define a type for each insn which is used in the scheduling description. +;; These correspond to the types defined in chapter 4 of the C674x manual. +(define_attr "type" + "unknown,single,mpy2,store,storen,mpy4,load,loadn,branch,call,callp,dp2,fp4, + intdp,cmpdp,adddp,mpy,mpyi,mpyid,mpydp,mpyspdp,mpysp2dp,spkernel,sploop, + mvilc,blockage,shadow,load_shadow,mult_shadow,atomic" + (const_string "single")) + +;; The register file used by an instruction's destination register. +;; The function destreg_file computes this; instructions can override the +;; attribute if they aren't a single_set. +(define_attr "dest_regfile" + "unknown,any,a,b" + (cond [(eq_attr "type" "single,load,mpy2,mpy4,dp2,fp4,intdp,cmpdp,adddp,mpy,mpyi,mpyid,mpydp,mpyspdp,mpysp2dp") + (cond [(match_operand 0 "a_register" "") (const_string "a") + (match_operand 0 "b_register" "") (const_string "b")] + (const_string "unknown")) + (eq_attr "type" "store") + (cond [(match_operand 1 "a_register" "") (const_string "a") + (match_operand 1 "b_register" "") (const_string "b")] + (const_string "unknown"))] + (const_string "unknown"))) + +(define_attr "addr_regfile" + "unknown,a,b" + (const_string "unknown")) + +(define_attr "cross" + "n,y" + (const_string "n")) + +;; This describes the relationship between operands and register files. +;; For example, "sxs" means that operands 0 and 2 determine the side of +;; the machine, and operand 1 can optionally use the cross path. "dt" and +;; "td" are used to describe loads and stores. +;; Used for register renaming in loops for improving modulo scheduling. +(define_attr "op_pattern" + "unknown,dt,td,sx,sxs,ssx" + (cond [(eq_attr "type" "load") (const_string "td") + (eq_attr "type" "store") (const_string "dt")] + (const_string "unknown"))) + +(define_attr "has_shadow" + "n,y" + (const_string "n")) + +;; The number of cycles the instruction takes to finish. Any cycles above +;; the first are delay slots. +(define_attr "cycles" "" + (cond [(eq_attr "type" "branch,call") (const_int 6) + (eq_attr "type" "load,loadn") (const_int 5) + (eq_attr "type" "dp2") (const_int 2) + (eq_attr "type" "mpy2") (const_int 2) + (eq_attr "type" "mpy4") (const_int 4) + (eq_attr "type" "fp4") (const_int 4) + (eq_attr "type" "mvilc") (const_int 4) + (eq_attr "type" "cmpdp") (const_int 2) + (eq_attr "type" "intdp") (const_int 5) + (eq_attr "type" "adddp") (const_int 7) + (eq_attr "type" "mpydp") (const_int 10) + (eq_attr "type" "mpyi") (const_int 9) + (eq_attr "type" "mpyid") (const_int 10) + (eq_attr "type" "mpyspdp") (const_int 7) + (eq_attr "type" "mpysp2dp") (const_int 5)] + (const_int 1))) + +;; The number of cycles during which the instruction reserves functional +;; units. +(define_attr "reserve_cycles" "" + (cond [(eq_attr "type" "cmpdp") (const_int 2) + (eq_attr "type" "adddp") (const_int 2) + (eq_attr "type" "mpydp") (const_int 4) + (eq_attr "type" "mpyi") (const_int 4) + (eq_attr "type" "mpyid") (const_int 4) + (eq_attr "type" "mpyspdp") (const_int 2)] + (const_int 1))) + +(define_attr "predicable" "no,yes" + (const_string "yes")) + +(define_attr "enabled" "no,yes" + (const_string "yes")) + +;; Specify which units can be used by a given instruction. Normally, +;; dest_regfile is used to select between the two halves of the machine. +;; D_ADDR is for load/store instructions; they use the D unit and use +;; addr_regfile to choose between D1 and D2. + +(define_attr "units62" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (const_string "unknown")) + +(define_attr "units64" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (const_string "unknown")) + +(define_attr "units64p" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (attr "units64")) + +(define_attr "units67" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (attr "units62")) + +(define_attr "units67p" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (attr "units67")) + +(define_attr "units674" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (attr "units64")) + +(define_attr "units" + "unknown,d,d_addr,l,m,s,dl,ds,dls,ls" + (cond [(eq_attr "cpu" "c62x") + (attr "units62") + (eq_attr "cpu" "c67x") + (attr "units67") + (eq_attr "cpu" "c67xp") + (attr "units67p") + (eq_attr "cpu" "c64x") + (attr "units64") + (eq_attr "cpu" "c64xp") + (attr "units64p") + (eq_attr "cpu" "c674x") + (attr "units674") + ] + (const_string "unknown"))) + +(define_automaton "c6x_1,c6x_2,c6x_m1,c6x_m2,c6x_t1,c6x_t2,c6x_branch") +(automata_option "no-comb-vect") +(automata_option "ndfa") +(automata_option "collapse-ndfa") + +(define_query_cpu_unit "d1,l1,s1" "c6x_1") +(define_cpu_unit "x1" "c6x_1") +(define_cpu_unit "l1w,s1w" "c6x_1") +(define_query_cpu_unit "m1" "c6x_m1") +(define_cpu_unit "m1w" "c6x_m1") +(define_cpu_unit "t1" "c6x_t1") +(define_query_cpu_unit "d2,l2,s2" "c6x_2") +(define_cpu_unit "x2" "c6x_2") +(define_cpu_unit "l2w,s2w" "c6x_2") +(define_query_cpu_unit "m2" "c6x_m2") +(define_cpu_unit "m2w" "c6x_m2") +(define_cpu_unit "t2" "c6x_t2") +;; A special set of units used to identify specific reservations, rather than +;; just units. +(define_query_cpu_unit "fps1,fpl1,adddps1,adddpl1" "c6x_1") +(define_query_cpu_unit "fps2,fpl2,adddps2,adddpl2" "c6x_2") + +;; There can be up to two branches in one cycle (on the .s1 and .s2 +;; units), but some instructions must not be scheduled in parallel +;; with a branch. We model this by reserving either br0 or br1 for a +;; normal branch, and both of them for an insn such as callp. +;; Another constraint is that two branches may only execute in parallel +;; if one uses an offset, and the other a register. We can distinguish +;; these by the dest_regfile attribute; it is "any" iff the branch uses +;; an offset. br0 is reserved for these, while br1 is reserved for +;; branches using a register. +(define_cpu_unit "br0,br1" "c6x_branch") + +(include "c6x-sched.md") + +;; Some reservations which aren't generated from c6x-sched.md.in + +(define_insn_reservation "branch_s1any" 6 + (and (eq_attr "type" "branch") + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "any")))) + "s1+s1w+br0") + +;; For calls, we also reserve the units needed in the following cycles +;; to load the return address. There are two options; using addkpc or +;; mvkh/mvkl. The code in c6x_reorg knows whether to use one of these +;; or whether to use callp. The actual insns are emitted only after +;; the final scheduling pass is complete. +;; We always reserve S2 for PC-relative call insns, since that allows +;; us to turn them into callp insns later on. +(define_insn_reservation "call_addkpc_s1any" 6 + (and (eq_attr "type" "call") + (and (ne (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "any"))))) + "s2+s2w+br0,s2+s2w+br0+br1") + +(define_insn_reservation "call_mvk_s1any" 6 + (and (eq_attr "type" "call") + (and (eq (symbol_ref "TARGET_INSNS_64") (const_int 0)) + (and (eq_attr "cross" "n") + (and (eq_attr "units" "s") + (eq_attr "dest_regfile" "any"))))) + "s2+s2w+br0,s2+s2w,s2+s2w") + +(define_reservation "all" "s1+s2+d1+d2+l1+l2+m1+m2") + +(define_insn_reservation "callp_s1" 1 + (and (eq_attr "type" "callp") (eq_attr "dest_regfile" "a")) + "s1+s1w,all*5") + +(define_insn_reservation "callp_s2" 1 + (and (eq_attr "type" "callp") (eq_attr "dest_regfile" "b")) + "s2+s2w,all*5") + +;; Constraints + +(include "constraints.md") + +;; Predicates + +(include "predicates.md") + +;; General predication pattern. + +(define_cond_exec + [(match_operator 0 "eqne_operator" + [(match_operand 1 "predicate_register" "AB") + (const_int 0)])] + "" + "") + +;; ------------------------------------------------------------------------- +;; NOP instruction +;; ------------------------------------------------------------------------- + +(define_insn "nop" + [(const_int 0)] + "" + "nop") + +(define_insn "nop_count" + [(unspec [(match_operand 0 "const_int_operand" "n")] UNSPEC_NOP)] + "" + "%|%.\\tnop\\t%0") + +;; ------------------------------------------------------------------------- +;; Move instructions +;; ------------------------------------------------------------------------- + +(define_mode_iterator QIHIM [QI HI]) +(define_mode_iterator SIDIM [SI DI]) +(define_mode_iterator SIDIVM [SI DI V2HI V4QI]) +(define_mode_iterator VEC4M [V2HI V4QI]) +(define_mode_iterator VEC8M [V2SI V4HI V8QI]) +(define_mode_iterator SISFVM [SI SF V2HI V4QI]) +(define_mode_iterator DIDFM [DI DF]) +(define_mode_iterator DIDFVM [DI DF V2SI V4HI V8QI]) +(define_mode_iterator SFDFM [SF DF]) +(define_mode_iterator M32 [QI HI SI SF V2HI V4QI]) + +;; The C6X LO_SUM and HIGH are backwards - HIGH sets the low bits, and +;; LO_SUM adds in the high bits. Fortunately these are opaque operations +;; so this does not matter. +(define_insn "movsi_lo_sum" + [(set (match_operand:SI 0 "register_operand" "=ab") + (lo_sum:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_or_symbolic_operand" "i")))] + "reload_completed" + "%|%.\\tmvkh\\t%$\\t%2, %0" + [(set_attr "units" "s")]) + +(define_insn "movsi_high" + [(set (match_operand:SI 0 "register_operand" "=ab") + (high:SI (match_operand:SI 1 "const_int_or_symbolic_operand" "i")))] + "reload_completed" + "%|%.\\tmvkl\\t%$\\t%1, %0" + [(set_attr "units" "s")]) + +(define_insn "movsi_gotoff_lo_sum" + [(set (match_operand:SI 0 "register_operand" "=ab") + (lo_sum:SI (match_operand:SI 1 "register_operand" "0") + (unspec:SI [(match_operand:SI 2 "symbolic_operand" "S2")] + UNSPEC_GOTOFF)))] + "flag_pic == 2" + "%|%.\\tmvkh\\t%$\\t$dpr_got%2, %0" + [(set_attr "units" "s")]) + +(define_insn "movsi_gotoff_high" + [(set (match_operand:SI 0 "register_operand" "=ab") + (high:SI (unspec:SI [(match_operand:SI 1 "symbolic_operand" "S2")] + UNSPEC_GOTOFF)))] + "flag_pic == 2" + "%|%.\\tmvkl\\t%$\\t$dpr_got%1, %0" + [(set_attr "units" "s")]) + +;; Normally we'd represent this as a normal load insn, but we can't currently +;; represent the addressing mode. +(define_insn "load_got_gotoff" + [(set (match_operand:SI 0 "register_operand" "=a,b") + (unspec:SI [(match_operand:SI 1 "register_operand" "Z,Z") + (match_operand:SI 2 "register_operand" "b,b")] + UNSPEC_GOTOFF))] + "flag_pic == 2" + "%|%.\\tldw\\t%$\\t*+%1[%2], %0" + [(set_attr "type" "load") + (set_attr "units" "d_addr") + (set_attr "op_pattern" "unknown") + (set_attr "dest_regfile" "a,b") + (set_attr "addr_regfile" "b")]) + +(define_insn "*movstricthi_high" + [(set (match_operand:SI 0 "register_operand" "+ab") + (ior:SI (and:SI (match_dup 0) (const_int 65535)) + (ashift:SI (match_operand:SI 1 "const_int_operand" "IuB") + (const_int 16))))] + "reload_completed" + "%|%.\\tmvklh\\t%$\\t%1, %0" + [(set_attr "units" "s")]) + +;; Break up SImode loads of immediate operands. + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "reload_completed + && !satisfies_constraint_IsB (operands[1])" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) (ior:SI (and:SI (match_dup 0) (const_int 65535)) + (ashift:SI (match_dup 3) (const_int 16))))] +{ + HOST_WIDE_INT val = INTVAL (operands[1]); + operands[2] = GEN_INT (trunc_int_for_mode (val, HImode)); + operands[3] = GEN_INT ((val >> 16) & 65535); +}) + +(define_split + [(set (match_operand:VEC4M 0 "register_operand" "") + (match_operand:VEC4M 1 "const_vector_operand" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 3))] +{ + unsigned HOST_WIDE_INT mask, val; + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int i; + + val = 0; + mask = GET_MODE_MASK (inner_mode); + if (TARGET_BIG_ENDIAN) + { + for (i = 0; i < GET_MODE_NUNITS (mode); i++) + { + val <<= GET_MODE_BITSIZE (inner_mode); + val |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask; + } + } + else + { + i = GET_MODE_NUNITS (mode); + while (i-- > 0) + { + val <<= GET_MODE_BITSIZE (inner_mode); + val |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask; + } + } + operands[2] = gen_rtx_REG (SImode, REGNO (operands[0])); + operands[3] = GEN_INT (trunc_int_for_mode (val, SImode)); +}) + +(define_split + [(set (match_operand:VEC8M 0 "register_operand" "") + (match_operand:VEC8M 1 "const_vector_operand" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + unsigned HOST_WIDE_INT mask; + unsigned HOST_WIDE_INT val[2]; + rtx lo_half, hi_half; + enum machine_mode inner_mode = GET_MODE_INNER (mode); + int i, j; + + split_di (operands, 1, &lo_half, &hi_half); + + val[0] = val[1] = 0; + mask = GET_MODE_MASK (inner_mode); + if (TARGET_BIG_ENDIAN) + { + for (i = 0, j = 1; i < GET_MODE_NUNITS (mode); i++) + { + if (i * 2 == GET_MODE_NUNITS (mode)) + j--; + val[j] <<= GET_MODE_BITSIZE (inner_mode); + val[j] |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask; + } + } + else + { + i = GET_MODE_NUNITS (mode); + j = 1; + while (i-- > 0) + { + val[j] <<= GET_MODE_BITSIZE (inner_mode); + val[j] |= INTVAL (CONST_VECTOR_ELT (operands[1], i)) & mask; + if (i * 2 == GET_MODE_NUNITS (mode)) + j--; + } + } + operands[2] = lo_half; + operands[3] = GEN_INT (trunc_int_for_mode (val[0], SImode)); + operands[4] = hi_half; + operands[5] = GEN_INT (trunc_int_for_mode (val[1], SImode)); +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "immediate_operand" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 2) (ior:SI (and:SI (match_dup 2) (const_int 65535)) + (ashift:SI (match_dup 4) (const_int 16))))] +{ + long values; + REAL_VALUE_TYPE value; + + gcc_assert (GET_CODE (operands[1]) == CONST_DOUBLE); + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (value, values); + + operands[2] = gen_rtx_REG (SImode, true_regnum (operands[0])); + operands[3] = GEN_INT (trunc_int_for_mode (values, HImode)); + if (values >= -32768 && values < 32768) + { + emit_move_insn (operands[2], operands[3]); + DONE; + } + operands[4] = GEN_INT ((values >> 16) & 65535); +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "symbolic_operand" ""))] + "reload_completed + && (!TARGET_INSNS_64PLUS + || !sdata_symbolic_operand (operands[1], SImode))" + [(set (match_dup 0) (high:SI (match_dup 1))) + (set (match_dup 0) (lo_sum:SI (match_dup 0) (match_dup 1)))] + "") + +;; Normally, we represent the load of an sdata address as a normal +;; move of a SYMBOL_REF. In DSBT mode, B14 is not constant, so we +;; should show the dependency. +(define_insn "load_sdata_pic" + [(set (match_operand:SI 0 "register_operand" "=a,b") + (plus:SI (match_operand:SI 1 "pic_register_operand" "Z,Z") + (unspec:SI [(match_operand:SI 2 "sdata_symbolic_operand" "S0,S0")] + UNSPEC_LOAD_SDATA)))] + "flag_pic" + "@ + %|%.\\tadda%D2\\t%$\\t%1, %2, %0 + %|%.\\tadda%D2\\t%$\\t%1, %2, %0" + [(set_attr "units" "d") + (set_attr "cross" "y,n") + (set_attr "op_pattern" "unknown") + (set_attr "predicable" "no")]) + +;; Move instruction patterns + +(define_mode_attr LDST_SUFFIX [(QI "b") (HI "h") + (SI "w") (SF "w") (V2HI "w") (V4QI "w") + (DI "dw") (V2SI "dw") (V4HI "dw") (V8QI "dw")]) + +(define_insn "mov_insn" + [(set (match_operand:QIHIM 0 "nonimmediate_operand" + "=a,b, a, b, ab, ab,a,?a, b,?b, Q, R, R, Q") + (match_operand:QIHIM 1 "general_operand" + "a,b,?b,?a,Is5,IsB,Q, R, R, Q, a,?a, b,?b"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG" + "@ + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmvk\\t%$\\t%1, %0 + %|%.\\tmvk\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tst\\t%$\\t%1, %0 + %|%.\\tst\\t%$\\t%1, %0 + %|%.\\tst\\t%$\\t%1, %0 + %|%.\\tst\\t%$\\t%1, %0" + [(set_attr "type" "*,*,*,*,*,*,load,load,load,load,store,store,store,store") + (set_attr "units62" "dls,dls,ls,ls,s,s,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr") + (set_attr "units64" "dls,dls,ls,ls,dl,s,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr") + (set_attr "op_pattern" "sx,sx,sx,sx,*,*,*,*,*,*,*,*,*,*") + (set_attr "addr_regfile" "*,*,*,*,*,*,a,b,b,a,a,b,b,a") + (set_attr "dest_regfile" "*,*,*,*,*,*,a,a,b,b,a,a,b,b") + (set_attr "cross" "n,n,y,y,n,n,n,y,n,y,n,y,n,y")]) + +(define_insn "mov_insn" + [(set (match_operand:SISFVM 0 "nonimmediate_operand" + "=a,b, a, b, ab, ab,a,b,ab,a,?a, b,?b, Q, R, R, Q") + (match_operand:SISFVM 1 "general_operand" + "a,b,?b,?a,Is5,IsB,S0,S0,Si,Q, R, R, Q, a,?a, b,?b"))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) == REG + || (GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))))" + "@ + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmv\\t%$\\t%1, %0 + %|%.\\tmvk\\t%$\\t%1, %0 + %|%.\\tmvk\\t%$\\t%1, %0 + %|%.\\tadda%D1\\t%$\\tB14, %1, %0 + %|%.\\tadda%D1\\t%$\\tB14, %1, %0 + # + %|%.\\tldw\\t%$\\t%1, %0 + %|%.\\tldw\\t%$\\t%1, %0 + %|%.\\tldw\\t%$\\t%1, %0 + %|%.\\tldw\\t%$\\t%1, %0 + %|%.\\tstw\\t%$\\t%1, %0 + %|%.\\tstw\\t%$\\t%1, %0 + %|%.\\tstw\\t%$\\t%1, %0 + %|%.\\tstw\\t%$\\t%1, %0" + [(set_attr "type" "*,*,*,*,*,*,*,*,*,load,load,load,load,store,store,store,store") + (set_attr "units62" "dls,dls,ls,ls,s,s,d,d,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr") + (set_attr "units64" "dls,dls,ls,ls,dl,s,d,d,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr") + (set_attr "op_pattern" "sx,sx,sx,sx,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "addr_regfile" "*,*,*,*,*,*,*,*,*,a,b,b,a,a,b,b,a") + (set_attr "dest_regfile" "*,*,*,*,*,*,*,*,*,a,a,b,b,a,a,b,b") + (set_attr "cross" "n,n,y,y,n,n,y,n,*,n,y,n,y,n,y,n,y") + (set_attr "predicable" "yes,yes,yes,yes,yes,yes,no,no,yes,yes,yes,yes,yes,yes,yes,yes,yes")]) + +(define_insn "*mov_insn" + [(set (match_operand:DIDFVM 0 "nonimmediate_operand" + "=a,b, a, b,ab,a,?a, b,?b, Q, R, R, Q") + (match_operand:DIDFVM 1 "general_operand" + "a,b,?b,?a,iF,Q, R, R, Q, a,?a, b,?b"))] + "(!MEM_P (operands[0]) || REG_P (operands[1]) + || (GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))))" +{ + if (MEM_P (operands[1]) && TARGET_LDDW) + return "%|%.\\tlddw\\t%$\\t%1, %0"; + if (MEM_P (operands[0]) && TARGET_STDW) + return "%|%.\\tstdw\\t%$\\t%1, %0"; + if (TARGET_INSNS_64PLUS && REG_P (operands[0]) && REG_P (operands[1]) + && A_REGNO_P (REGNO (operands[0])) == A_REGNO_P (REGNO (operands[1]))) + return "%|%.\\tdmv\\t%$\\t%P1, %p1, %0"; + return "#"; +} + [(set_attr "units" "s,s,*,*,*,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr,d_addr") + (set_attr "addr_regfile" "*,*,*,*,*,a,b,b,a,a,b,b,a") + (set_attr "dest_regfile" "*,*,*,*,*,a,a,b,b,a,a,b,b") + (set_attr "type" "*,*,*,*,*,load,load,load,load,store,store,store,store") + (set_attr "cross" "n,n,y,y,*,n,y,n,y,n,y,n,y")]) + +(define_split + [(set (match_operand:DIDFVM 0 "nonimmediate_operand" "") + (match_operand:DIDFVM 1 "general_operand" ""))] + "reload_completed + && !((MEM_P (operands[0]) && TARGET_STDW) + || (MEM_P (operands[1]) && TARGET_LDDW)) + && !const_vector_operand (operands[1], mode) + && !(TARGET_INSNS_64PLUS && REG_P (operands[0]) && REG_P (operands[1]) + && A_REGNO_P (REGNO (operands[0])) == A_REGNO_P (REGNO (operands[1])))" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx lo_half[2], hi_half[2]; + split_di (operands, 2, lo_half, hi_half); + + /* We can't have overlap for a register-register move, but if + memory is involved, we have to make sure we don't clobber the + address. */ + if (reg_overlap_mentioned_p (lo_half[0], hi_half[1])) + { + operands[2] = hi_half[0]; + operands[3] = hi_half[1]; + operands[4] = lo_half[0]; + operands[5] = lo_half[1]; + } + else + { + operands[2] = lo_half[0]; + operands[3] = lo_half[1]; + operands[4] = hi_half[0]; + operands[5] = hi_half[1]; + } +}) + +(define_insn "real_load" + [(unspec [(match_operand 0 "const_int_operand" "JA,JA,JB,JB") + (match_operand:M32 1 "memory_operand" "Q,R,R,Q")] + UNSPEC_REAL_LOAD)] + "" + "%|%.\\tld\\t%$\\t%1, %k0" + [(set_attr "type" "load") + (set_attr "units" "d_addr") + (set_attr "addr_regfile" "a,b,b,a") + (set_attr "dest_regfile" "a,a,b,b") + (set_attr "cross" "n,y,n,y")]) + +(define_insn "real_load" + [(unspec [(match_operand 0 "const_int_operand" "JA,JA,JB,JB") + (match_operand:DIDFVM 1 "memory_operand" "Q,R,R,Q")] + UNSPEC_REAL_LOAD)] + "TARGET_LDDW" + "%|%.\\tlddw\\t%$\\t%1, %K0" + [(set_attr "type" "load") + (set_attr "units" "d_addr") + (set_attr "addr_regfile" "a,b,b,a") + (set_attr "dest_regfile" "a,a,b,b") + (set_attr "cross" "n,y,n,y")]) + +(define_insn "load_shadow" + [(set (match_operand 0 "register_operand" "=ab") + (unspec [(pc)] UNSPEC_LOAD_SHADOW))] + "" + ";; load to %0 occurs" + [(set_attr "type" "load_shadow")]) + +(define_insn "mult_shadow" + [(set (match_operand 0 "register_operand" "=ab") + (unspec [(pc)] UNSPEC_MULT_SHADOW))] + "" + ";; multiplication occurs and stores to %0" + [(set_attr "type" "mult_shadow")]) + + +(define_mode_iterator MOV [QI HI SI SF DI DF V2HI V4QI V2SI V4HI V8QI]) + +(define_expand "mov" + [(set (match_operand:MOV 0 "nonimmediate_operand" "") + (match_operand:MOV 1 "general_operand" ""))] + "" +{ + if (expand_move (operands, mode)) + DONE; +}) + +(define_expand "movmisalign" + [(set (match_operand:SIDIVM 0 "nonimmediate_operand" "") + (unspec:SIDIVM [(match_operand:SIDIVM 1 "nonimmediate_operand" "")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_INSNS_64" +{ + if (memory_operand (operands[0], mode)) + { + emit_insn (gen_movmisalign_store (operands[0], operands[1])); + DONE; + } +}) + +(define_insn_and_split "movmisalign_store" + [(set (match_operand:SIDIVM 0 "memory_operand" "=W,Q,T,Q,T") + (unspec:SIDIVM [(match_operand:SIDIVM 1 "register_operand" "r,a,b,b,a")] + UNSPEC_MISALIGNED_ACCESS)) + (clobber (match_scratch:SI 2 "=r,X,X,X,X"))] + "TARGET_INSNS_64" + "@ + # + %|%.\\tstn\\t%$\\t%1, %0 + %|%.\\tstn\\t%$\\t%1, %0 + %|%.\\tstn\\t%$\\t%1, %0 + %|%.\\tstn\\t%$\\t%1, %0" + "&& reload_completed && satisfies_constraint_W (operands[0])" + [(parallel + [(set (match_dup 3) (unspec:SIDIVM [(match_dup 1)] UNSPEC_MISALIGNED_ACCESS)) + (clobber (match_dup 4))])] +{ + rtx addr = XEXP (operands[0], 0); + rtx tmpreg = operands[2]; + + if (GET_CODE (addr) == PLUS && XEXP (addr, 0) == stack_pointer_rtx + && GET_CODE (XEXP (addr, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); + val &= GET_MODE_SIZE (mode) - 1; + if (val == 0) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + } + operands[3] = change_address (operands[0], mode, tmpreg); + emit_move_insn (tmpreg, addr); + operands[4] = gen_rtx_SCRATCH (SImode); +} + [(set_attr "type" "storen") + (set_attr "units" "d_addr") + (set_attr "addr_regfile" "*,a,b,a,b") + (set_attr "dest_regfile" "*,a,b,b,a") + (set_attr "cross" "*,n,n,y,y")]) + +(define_insn_and_split "movmisalign_load" + [(set (match_operand:SIDIVM 0 "register_operand" "=ab,a,b,b,a") + (unspec:SIDIVM [(match_operand:SIDIVM 1 "memory_operand" "W,Q,T,Q,T")] + UNSPEC_MISALIGNED_ACCESS))] + "TARGET_INSNS_64" + "@ + # + %|%.\\tldn\\t%$\\t%1, %0 + %|%.\\tldn\\t%$\\t%1, %0 + %|%.\\tldn\\t%$\\t%1, %0 + %|%.\\tldn\\t%$\\t%1, %0" + "&& reload_completed && satisfies_constraint_W (operands[1])" + [(set (match_dup 0) (unspec:SIDIVM [(match_dup 2)] UNSPEC_MISALIGNED_ACCESS))] +{ + rtx addr = XEXP (operands[1], 0); + rtx tmpreg = (GET_MODE (operands[0]) == SImode ? operands[0] + : operand_subword_force (operands[0], 0, DImode)); + + if (GET_CODE (addr) == PLUS && XEXP (addr, 0) == stack_pointer_rtx + && GET_CODE (XEXP (addr, 1)) == CONST_INT) + { + unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); + val &= GET_MODE_SIZE (mode) - 1; + if (val == 0) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } + } + operands[2] = change_address (operands[1], mode, tmpreg); + emit_move_insn (tmpreg, addr); +} + [(set_attr "type" "loadn") + (set_attr "units" "d_addr") + (set_attr "addr_regfile" "*,a,b,a,b") + (set_attr "dest_regfile" "*,a,b,b,a") + (set_attr "cross" "*,n,n,y,y")]) + +;; + +;; ------------------------------------------------------------------------- +;; Extensions/extractions +;; ------------------------------------------------------------------------- + +(define_code_iterator any_extract [zero_extract sign_extract]) +(define_code_iterator any_ext [zero_extend sign_extend]) + +(define_code_attr ext_name [(zero_extend "zero_extend") (sign_extend "sign_extend")]) + +(define_code_attr u [(zero_extend "u") (sign_extend "")]) + +(define_code_attr z [(zero_extract "z") (sign_extract "")]) +(define_code_attr zu [(zero_extract "u") (sign_extract "")]) + +(define_mode_attr ext_shift [(QI "24") (HI "16")]) + +(define_insn "si2" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,?a, b,?b") + (any_ext:SI (match_operand:QIHIM 1 "nonimmediate_operand" "a,b,Q, R, R, Q")))] + "" + "@ + %|%.\\text\\t%$\\t%1, , , %0 + %|%.\\text\\t%$\\t%1, , , %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0 + %|%.\\tld\\t%$\\t%1, %0" + [(set_attr "type" "*,*,load,load,load,load") + (set_attr "units" "s,s,d_addr,d_addr,d_addr,d_addr") + (set_attr "addr_regfile" "*,*,a,b,b,a") + (set_attr "dest_regfile" "*,*,a,a,b,b") + (set_attr "cross" "n,n,n,y,n,y")]) + +(define_insn "*extv_const" + [(set (match_operand:SI 0 "nonimmediate_operand" "=a,b") + (any_extract:SI (match_operand:SI 1 "register_operand" "a,b") + (match_operand:SI 2 "const_int_operand" "n,n") + (match_operand:SI 3 "const_int_operand" "n,n")))] + "INTVAL (operands[3]) >= 0 + && INTVAL (operands[2]) + INTVAL (operands[3]) <= 32" +{ + int pos = INTVAL (operands[3]); + int len = INTVAL (operands[2]); + rtx xop[4]; + xop[0] = operands[0]; + xop[1] = operands[1]; + xop[2] = GEN_INT (32 - pos - len); + xop[3] = GEN_INT (32 - len); + + output_asm_insn ("%|%.\\text\\t%$\\t%1, %2, %3, %0", xop); + return ""; +} + [(set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (any_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")))] + "" +{ + if (INTVAL (operands[2]) < 0 + || INTVAL (operands[2]) + INTVAL (operands[3]) > 32) + FAIL; +}) + +(define_insn "real_" + [(unspec [(match_operand 0 "const_int_operand" "JA,JA,JB,JB") + (any_ext:SI (match_operand:QIHIM 1 "memory_operand" "Q,R,R,Q"))] + UNSPEC_REAL_LOAD)] + "" + "%|%.\\tld\\t%$\\t%1, %k0" + [(set_attr "type" "load") + (set_attr "units" "d_addr") + (set_attr "addr_regfile" "a,b,b,a") + (set_attr "dest_regfile" "a,a,b,b") + (set_attr "cross" "n,y,n,y")]) + +(define_insn "clrr" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (unspec:SI [(match_operand:SI 1 "register_operand" "0,0,0,0") + (match_operand:SI 2 "register_operand" "a,b,?b,?a") + (match_operand:SI 3 "reg_or_const_int_operand" "ai,bi,a,b")] + UNSPEC_CLR))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx xops[4]; + int v1 = INTVAL (operands[2]); + int v2 = (v1 >> 5) & 0x1f; + v1 &= 0x1f; + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = GEN_INT (v1); + xops[3] = GEN_INT (v2); + output_asm_insn ("%|%.\\tclr\\t%$\\t%1, %3, %2, %0", xops); + return ""; + } + return "%|%.\\tclr\\t%$\\t%2, %3, %0"; +} + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "extr" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (unspec:SI [(match_operand:SI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_const_int_operand" "ai,bi,a,b")] + UNSPEC_EXT))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx xops[4]; + int v1 = INTVAL (operands[2]); + int v2 = (v1 >> 5) & 0x1f; + v1 &= 0x1f; + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = GEN_INT (v1); + xops[3] = GEN_INT (v2); + output_asm_insn ("%|%.\\text\\t%$\\t%1, %3, %2, %0", xops); + return ""; + } + return "%|%.\\text\\t%$\\t%1, %2, %0"; +} + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "extru" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (unspec:SI [(match_operand:SI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_const_int_operand" "ai,bi,a,b")] + UNSPEC_EXTU))] + "" +{ + if (CONST_INT_P (operands[2])) + { + rtx xops[4]; + int v1 = INTVAL (operands[2]); + int v2 = (v1 >> 5) & 0x1f; + v1 &= 0x1f; + xops[0] = operands[0]; + xops[1] = operands[1]; + xops[2] = GEN_INT (v1); + xops[3] = GEN_INT (v2); + output_asm_insn ("%|%.\\textu\\t%$\\t%1, %3, %2, %0", xops); + return ""; + } + return "%|%.\\textu\\t%$\\t%1, %2, %0"; +} + [(set_attr "units" "s") + (set_attr "cross" "n,y,n,y")]) + +;; ------------------------------------------------------------------------- +;; Compare instructions +;; ------------------------------------------------------------------------- + +(define_insn "scmpsi_insn" + [(set (match_operand:SI 0 "register_operand" "=ab,a,b,a,b") + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:SI 2 "register_operand" "ab,a,b,?b,?a") + (match_operand:SI 3 "reg_or_scst5_operand" "Is5,aIs5,bIs5,aIs5,bIs5")]))] + "" + "%|%.\\tcmp%C1\\t%$\\t%3, %2, %0" + [(set_attr "units" "l") + (set (attr "cross") + (symbol_ref "CROSS_OPERANDS (operands[0], operands[2])"))]) + +(define_insn "*ucmpsi_insn_64" + [(set (match_operand:SI 0 "register_operand" "=ab,a,b,a,b") + (match_operator:SI 1 "ltugtu_operator" + [(match_operand:SI 2 "register_operand" "ab,a,b,?b,?a") + (match_operand:SI 3 "reg_or_ucst5_operand" "Iu5,aIu5,bIu5,aIu5,bIu5")]))] + "TARGET_INSNS_64" + "%|%.\\tcmp%C1\\t%$\\t%3, %2, %0" + [(set_attr "units" "l") + (set (attr "cross") + (symbol_ref "CROSS_OPERANDS (operands[0], operands[2])"))]) + +(define_insn "*ucmpsi_insn" + [(set (match_operand:SI 0 "register_operand" "=ab,a,b,a,b") + (match_operator:SI 1 "ltugtu_operator" + [(match_operand:SI 2 "register_operand" "ab,a,b,?b,?a") + (match_operand:SI 3 "reg_or_ucst4_operand" "Iu4,aIu4,bIu4,aIu4,bIu4")]))] + "!TARGET_INSNS_64" + "%|%.\\tcmp%C1\\t%$\\t%3, %2, %0" + [(set_attr "units" "l") + (set (attr "cross") + (symbol_ref "CROSS_OPERANDS (operands[0], operands[2])"))]) + +(define_code_iterator andior_eqne [eq ne]) +(define_code_attr andior_name [(eq "and") (ne "ior")]) +(define_code_attr andior_condmod [(eq "") (ne "!")]) + +(define_insn "*scmpsi__insn" + [(set (match_operand:SI 0 "register_operand" "=A,B,A,B") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:SI 2 "register_operand" "a,b,?b,?a") + (match_operand:SI 3 "reg_or_scst5_operand" "aIs5,bIs5,aIs5,bIs5")])))] + "" + "%|[%4]\\tcmp%C1\\t%$\\t%3, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y") + (set_attr "predicable" "no")]) + +(define_insn "*ucmpsi__insn_64" + [(set (match_operand:SI 0 "register_operand" "=A,B,A,B") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "ltugtu_operator" + [(match_operand:SI 2 "register_operand" "a,b,?b,?a") + (match_operand:SI 3 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")])))] + "TARGET_INSNS_64" + "%|[%4]\\tcmp%C1\\t%$\\t%3, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y") + (set_attr "predicable" "no")]) + +(define_insn "*ucmpsi__insn" + [(set (match_operand:SI 0 "register_operand" "=A,B,A,B") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "ltugtu_operator" + [(match_operand:SI 2 "register_operand" "a,b,?b,?a") + (match_operand:SI 3 "reg_or_ucst4_operand" "aIu4,bIu4,aIu4,bIu4")])))] + "!TARGET_INSNS_64" + "%|[%4]\\tcmp%C1\\t%$\\t%3, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y") + (set_attr "predicable" "no")]) + +(define_expand "cmpsi_" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "c6x_comparison_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "reg_or_const_int_operand" "")])))] + "" +{ + if (c6x_force_op_for_comparison_p (GET_CODE (operands[1]), operands[3])) + operands[3] = force_reg (SImode, operands[3]); +}) + +(define_insn "*cmpsf_insn" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:SF 2 "register_operand" "a,b,a,b") + (match_operand:SF 3 "register_operand" "a,b,?b,?a")]))] + "TARGET_FP" + "%|%.\\tcmp%c1sp\\t%$\\t%2, %3, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "*cmpdf_insn" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:DF 2 "register_operand" "a,b,a,b") + (match_operand:DF 3 "register_operand" "a,b,?b,?a")]))] + "TARGET_FP" + "%|%.\\tcmp%c1dp\\t%$\\t%2, %3, %0" + [(set_attr "type" "cmpdp") + (set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_expand "cmp_" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:SFDFM 2 "register_operand" "") + (match_operand:SFDFM 3 "register_operand" "")])))] + "TARGET_FP") + +(define_insn "*cmpsf__insn" + [(set (match_operand:SI 0 "register_operand" "=A,B,A,B") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:SF 2 "register_operand" "a,b,a,b") + (match_operand:SF 3 "register_operand" "a,b,?b,?a")])))] + "TARGET_FP" + "%|[%4]\\tcmp%c1sp\\t%$\\t%2, %3, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y") + (set_attr "predicable" "no")]) + +;; reload_reg_class_lower will ensure that two-word reloads are allocated first, +;; which could exhaust the predicate registers if we used just "a" and "b" +;; constraints on operands 2 and 3. +(define_insn "*cmpdf__insn" + [(set (match_operand:SI 0 "register_operand" "=A,B,A,B") + (if_then_else:SI + (andior_eqne:SI (match_operand:SI 4 "register_operand" "0,0,0,0") + (const_int 0)) + (match_dup 4) + (match_operator:SI 1 "eqltgt_operator" + [(match_operand:DF 2 "register_operand" "Da,Db,Da,Db") + (match_operand:DF 3 "register_operand" "Da,Db,?Db,?Da")])))] + "TARGET_FP" + "%|[%4]\\tcmp%c1dp\\t%$\\t%2, %3, %0" + [(set_attr "type" "cmpdp") + (set_attr "units" "s") + (set_attr "cross" "n,n,y,y") + (set_attr "predicable" "no")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (ior:SI (match_operand 1 "c6x_any_comparison_operand" "") + (match_operand 2 "c6x_any_comparison_operand" "")))] + "!reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (if_then_else:SI (ne:SI (match_dup 0) (const_int 0)) + (match_dup 0) + (match_dup 2)))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (match_operand 1 "c6x_any_comparison_operand" "") + (match_operand 2 "c6x_any_comparison_operand" "")))] + "!reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (if_then_else:SI (eq:SI (match_dup 0) (const_int 0)) + (match_dup 0) + (match_dup 2)))]) + + +;; ------------------------------------------------------------------------- +;; setcc instructions +;; ------------------------------------------------------------------------- + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "comparison_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "reg_or_ucst4_operand" "")]))] + "" +{ + if (!c6x_comparison_operator (operands[1], SImode)) + { + rtx tmpreg = gen_reg_rtx (SImode); + rtx t = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[1])), + SImode, operands[2], operands[3]); + emit_insn (gen_rtx_SET (VOIDmode, tmpreg, t)); + emit_insn (gen_scmpsi_insn (operands[0], + gen_rtx_fmt_ee (EQ, SImode, tmpreg, const0_rtx), + tmpreg, const0_rtx)); + DONE; + } +}) + +;; ------------------------------------------------------------------------- +;; Jump instructions +;; ------------------------------------------------------------------------- + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "register_operand" "a,b"))] + "" + "%|%.\\tb\\t%$\\t%0" + [(set_attr "type" "branch") + (set_attr "units" "s") + (set_attr "cross" "y,n") + (set_attr "dest_regfile" "b")]) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "%|%.\\tb\\t%$\\t%l0" + [(set_attr "type" "branch") + (set_attr "units" "s") + (set_attr "dest_regfile" "any")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:SI 0 "register_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "!flag_pic || !TARGET_INSNS_64" +{ +}) + +(define_insn "*tablejump_internal" + [(set (pc) (match_operand:SI 0 "register_operand" "b")) + (use (label_ref (match_operand 1 "" "")))] + "!flag_pic || !TARGET_INSNS_64" + "%|\\tb\\t%$\\t%0" + [(set_attr "type" "branch") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "b")]) + +;; Implement switch statements when generating PIC code. Switches are +;; implemented by `tablejump' when not using -fpic. + +;; Emit code here to do the range checking and make the index zero based. +;; operand 0 is the index +;; operand 1 is the lower bound +;; operand 2 is the range of indices (highest - lowest + 1) +;; operand 3 is the label that precedes the table itself +;; operand 4 is the fall through label + +(define_expand "casesi" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "const_int_operand" "")) + (use (match_operand:SI 2 "const_int_operand" "")) + (use (match_operand 3 "" "")) + (use (match_operand 4 "" ""))] + "flag_pic && TARGET_INSNS_64" +{ + rtx indx; + rtx low = operands[1]; + rtx range = operands[2]; + rtx table = operands[3]; + rtx fail = operands[4]; + + gcc_assert (GET_CODE (operands[1]) == CONST_INT); + gcc_assert (GET_CODE (operands[2]) == CONST_INT); + + if (!reg_or_ucst4_operand (range, SImode)) + range = force_reg (SImode, range); + + /* If low bound is 0, we don't have to subtract it. */ + if (INTVAL (operands[1]) == 0) + indx = operands[0]; + else + { + rtx offset = GEN_INT (-INTVAL (low)); + indx = gen_reg_rtx (SImode); + if (!addsi_operand (offset, SImode)) + offset = force_reg (SImode, offset); + emit_insn (gen_addsi3 (indx, operands[0], offset)); + } + emit_cmp_and_jump_insns (indx, range, GTU, NULL_RTX, SImode, 1, fail); + + emit_jump_insn (gen_casesi_internal (indx, table)); + DONE; +}) + +;; This is the only instance in this file where a pattern emits more than +;; one instruction. The concern here is that the addkpc insn could otherwise +;; be scheduled too far away from the label. A tablejump always ends an +;; extended basic block, so it shouldn't happen that the scheduler places +;; something in the delay slots. +(define_insn "casesi_internal" + [(set (pc) + (mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "b") + (const_int 4)) + (label_ref (match_operand 1 "" ""))))) + (clobber (match_scratch:SI 2 "=&b")) + (clobber (match_scratch:SI 3 "=b"))] + "flag_pic && TARGET_INSNS_64" + "addkpc\t.s2\t%l1,%2, 0\n\t\tldw\t.d2t2\t*+%2[%0], %3\n\t\tnop\t\t4\n\t\tadd\t.l2\t%2, %3, %3\n\t\tb\t.s2\t%3" + [(set_attr "type" "branch") + (set_attr "predicable" "no") + (set_attr "dest_regfile" "b")]) + +(define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SIDIM 1 "register_operand" "") + (match_operand:SIDIM 2 "reg_or_const_int_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx t = c6x_expand_compare (operands[0], VOIDmode); + operands[0] = t; + operands[1] = XEXP (t, 0); + operands[2] = XEXP (t, 1); +}) + +(define_expand "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "c6x_fp_comparison_operator" + [(match_operand:SFDFM 1 "register_operand" "") + (match_operand:SFDFM 2 "register_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx t = c6x_expand_compare (operands[0], VOIDmode); + operands[0] = t; + operands[1] = XEXP (t, 0); + operands[2] = XEXP (t, 1); +}) + +(define_insn "br_true" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:SI 1 "register_operand" "AB") + (const_int 0)]) + (label_ref (match_operand 2 "" "")) + (pc)))] + "" + "%|[%J0]\\tb\\t%$\\t%l2" + [(set_attr "type" "branch") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "any")]) + +(define_insn "br_false" + [(set (pc) + (if_then_else (match_operator 0 "predicate_operator" + [(match_operand:SI 1 "register_operand" "AB") + (const_int 0)]) + (pc) + (label_ref (match_operand 2 "" ""))))] + "" + "%|[%j0]\\tb\\t%$\\t%l2" + [(set_attr "type" "branch") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "any")]) + +(define_expand "return" + [(parallel + [(return) + (use (reg:SI REG_B3))])] + "reload_completed && get_frame_size () == 0 && c6x_nsaved_regs () == 0") + +;; We can't expand this before we know where the link register is stored. +(define_insn_and_split "eh_return" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "ab")] + UNSPECV_EH_RETURN) + (clobber (match_scratch:SI 1 "=&ab"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + " + { + c6x_set_return_address (operands[0], operands[1]); + DONE; + }" +) + +;; ------------------------------------------------------------------------- +;; Doloop +;; ------------------------------------------------------------------------- + +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else + (ne (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) + (const_int -1))) + (clobber (match_dup 2))])] ; match_scratch + "TARGET_INSNS_64PLUS && optimize" +{ + /* The loop optimizer doesn't check the predicates... */ + if (GET_MODE (operands[0]) != SImode) + FAIL; + operands[2] = gen_rtx_SCRATCH (SImode); +}) + +(define_insn "mvilc" + [(set (reg:SI REG_ILC) + (unspec [(match_operand:SI 0 "register_operand" "a,b")] UNSPEC_MVILC))] + "TARGET_INSNS_64PLUS" + "%|%.\\tmvc\\t%$\\t%0, ILC" + [(set_attr "predicable" "no") + (set_attr "cross" "y,n") + (set_attr "units" "s") + (set_attr "dest_regfile" "b") + (set_attr "type" "mvilc")]) + +(define_insn "sploop" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") + (reg:SI REG_ILC)] + UNSPECV_SPLOOP)] + "TARGET_INSNS_64PLUS" + "%|%.\\tsploop\t%0" + [(set_attr "predicable" "no") + (set_attr "type" "sploop")]) + +(define_insn "spkernel" + [(set (pc) + (if_then_else + (ne (unspec_volatile:SI + [(match_operand:SI 0 "const_int_operand" "i") + (match_operand:SI 1 "const_int_operand" "i")] + UNSPECV_SPKERNEL) + (const_int 1)) + (label_ref (match_operand 2 "" "")) + (pc)))] + "TARGET_INSNS_64PLUS" + "%|%.\\tspkernel\t%0, %1" + [(set_attr "predicable" "no") + (set_attr "type" "spkernel")]) + +(define_insn "loop_end" + [(set (pc) + (if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "0,0,0,*r") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "nonimmediate_operand" "=AB,*r,m,m") + (plus:SI (match_dup 3) + (const_int -1))) + (clobber (match_scratch:SI 2 "=X,&AB,&AB,&AB"))] + "TARGET_INSNS_64PLUS && optimize" + "#" + [(set_attr "type" "spkernel")]) + +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 3 "nonimmediate_operand" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "memory_operand" "") + (plus:SI (match_dup 3) + (const_int -1))) + (clobber (match_scratch 2))] + "" + [(set (match_dup 2) (plus:SI (match_dup 3) (const_int -1))) + (set (match_dup 0) (match_dup 2)) + (set (pc) + (if_then_else (ne (match_dup 2) (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] +{ + if (!REG_P (operands[3])) + { + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; + } +}) + +;; ------------------------------------------------------------------------- +;; Delayed-branch real jumps and shadows +;; ------------------------------------------------------------------------- + +(define_insn "real_jump" + [(unspec [(match_operand 0 "c6x_jump_operand" "a,b,s") (const_int 0)] + UNSPEC_REAL_JUMP)] + "" +{ + if (GET_CODE (operands[0]) == LABEL_REF) + return "%|%.\\tb\\t%$\\t%l0"; + return "%|%.\\tb\\t%$\\t%0"; +} + [(set_attr "type" "branch") + (set_attr "has_shadow" "y") + (set_attr "units" "s") + (set_attr "cross" "y,n,n") + (set_attr "dest_regfile" "b,b,any")]) + +(define_insn "real_call" + [(unspec [(match_operand 0 "c6x_call_operand" "a,b,S1") (const_int 1)] + UNSPEC_REAL_JUMP) + (clobber (reg:SI REG_B3))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "has_shadow" "y") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "cross" "y,n,n") + (set_attr "dest_regfile" "b,b,any")]) + +(define_insn "real_ret" + [(unspec [(match_operand 0 "register_operand" "a,b") (const_int 2)] + UNSPEC_REAL_JUMP)] + "" + "%|%.\\tret\\t%$\\t%0" + [(set_attr "type" "branch") + (set_attr "has_shadow" "y") + (set_attr "units" "s") + (set_attr "cross" "y,n") + (set_attr "dest_regfile" "b")]) + +;; computed_jump_p returns true if it finds a constant; so use one in the +;; unspec. +(define_insn "indirect_jump_shadow" + [(set (pc) (unspec [(const_int 1)] UNSPEC_JUMP_SHADOW))] + "" + ";; indirect jump occurs" + [(set_attr "type" "shadow")]) + +;; Operand 0 may be a PARALLEL which isn't handled by output_operand, so +;; we don't try to print it. +(define_insn "indirect_call_value_shadow" + [(set (match_operand 0 "" "") + (call (unspec [(pc)] UNSPEC_JUMP_SHADOW) + (const_int 0)))] + "" + ";; indirect call occurs, with return value" + [(set_attr "type" "shadow")]) + +(define_insn "indirect_sibcall_shadow" + [(call (unspec [(pc)] UNSPEC_JUMP_SHADOW) + (const_int 0))] + "SIBLING_CALL_P (insn)" + ";; indirect sibcall occurs" + [(set_attr "type" "shadow")]) + +(define_insn "indirect_call_shadow" + [(call (unspec [(pc)] UNSPEC_JUMP_SHADOW) + (const_int 0))] + "" + ";; indirect call occurs" + [(set_attr "type" "shadow")]) + +(define_insn "call_value_shadow" + [(set (match_operand 0 "" "") + (call (unspec [(match_operand 1 "" "")] UNSPEC_JUMP_SHADOW) + (const_int 0)))] + "" + ";; call to %1 occurs, with return value" + [(set_attr "type" "shadow")]) + +(define_insn "call_shadow" + [(call (unspec [(match_operand 0 "" "")] UNSPEC_JUMP_SHADOW) + (const_int 0))] + "!SIBLING_CALL_P (insn)" + ";; call to %0 occurs" + [(set_attr "type" "shadow")]) + +(define_insn "sibcall_shadow" + [(call (unspec [(match_operand 0 "" "")] UNSPEC_JUMP_SHADOW) + (const_int 0))] + "SIBLING_CALL_P (insn)" + ";; sibcall to %0 occurs" + [(set_attr "type" "shadow")]) + +(define_insn "jump_shadow" + [(set (pc) (unspec [(match_operand 0 "" "")] UNSPEC_JUMP_SHADOW))] + "" + ";; jump to %0 occurs" + [(set_attr "type" "shadow")]) + +(define_insn "condjump_shadow" + [(set (pc) + (if_then_else (eq (unspec [(const_int 0)] UNSPEC_JUMP_SHADOW) + (const_int 0)) + (match_operand 0 "" "") + (pc)))] + "" + ";; condjump to %0 occurs" + [(set_attr "type" "shadow")]) + +(define_insn "return_shadow" + [(unspec [(const_int 0)] UNSPEC_JUMP_SHADOW) + (return)] + "" + ";; return occurs" + [(set_attr "type" "shadow")]) + +;; ------------------------------------------------------------------------- +;; Add instructions +;; ------------------------------------------------------------------------- + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" + "=a ,b , a, b, a, b, a, b, ab, a, b, a, b,ab") + (plus:SI (match_operand:SI 1 "register_operand" + "%a ,b , a, b, b, a, b, a, 0, a, b, z, z,0") + (match_operand:SI 2 "addsi_operand" + "aIs5,bIs5,?b,?a,?a,?b,?aIs5,?bIs5,I5x,I5x,I5x,Iux,Iux,IsB")))] + "" +{ + if (CONSTANT_P (operands[2])) + { + HOST_WIDE_INT val = INTVAL (operands[2]); + + if (c6x_get_unit_specifier (insn) == 'd') + { + bool issp = (TARGET_INSNS_64PLUS + && operands[1] == stack_pointer_rtx + && GET_CODE (PATTERN (insn)) != COND_EXEC); + + if (get_attr_cross (insn) == CROSS_N) + { + if (satisfies_constraint_Iu5 (operands[2])) + return "%|%.\\tadd\\t%$\\t%1, %2, %0"; + else if (satisfies_constraint_In5 (operands[2])) + return "%|%.\\tsub\\t%$\\t%1, %n2, %0"; + } + + if (issp && val > 0 && val < 32768) + { + return "%|%.\\taddab\\t%$\\t%1, %2, %0"; + } + if ((val & 1) == 0 && ((val >= -62 && val <= 62) + || (issp && val > 0 && val < 65536))) + { + if (val < 0) + return "%|%.\\tsubah\\t%$\\t%1, %r2, %0"; + else + return "%|%.\\taddah\\t%$\\t%1, %r2, %0"; + } + else if ((val & 3) == 0 && ((val >= -124 && val <= 124) + || (issp && val > 0 && val < 131072))) + { + if (val < 0) + return "%|%.\\tsubaw\\t%$\\t%1, %R2, %0"; + else + return "%|%.\\taddaw\\t%$\\t%1, %R2, %0"; + } + else if ((val & 7) == 0 && val > 0 && val <= 248) + { + rtx xop[3]; + xop[0] = operands[0]; + xop[1] = operands[1]; + xop[2] = GEN_INT (val >> 3); + output_asm_insn ("%|%.\\taddad\\t%$\\t%1, %2, %0", xop); + return ""; + } + } + else + { + if (satisfies_constraint_Is5 (operands[2])) + return "%|%.\\tadd\\t%$\\t%2, %1, %0"; + } + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "%|%.\\taddk\\t%$\\t%2, %0"; + } + if (which_alternative == 4 || which_alternative == 5) + return "%|%.\\tadd\\t%$\\t%2, %1, %0"; + else + return "%|%.\\tadd\\t%$\\t%1, %2, %0"; +} + [(set_attr "units62" "dls,dls,ls,ls,ls,ls,ls,ls,s,d,d,*,*,s") + (set_attr "units67" "dls,dls,ls,ls,ls,ls,ls,ls,ds,d,d,*,*,s") + (set_attr "units64" "dls,dls,dls,dls,dls,dls,ls,ls,ds,d,d,d,d,s") + (set_attr "cross" "n,n,y,y,y,y,y,y,n,n,n,y,n,n") + (set_attr "predicable" "yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,yes,no,no,yes")]) + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b") + (minus:SI (match_operand:SI 1 "reg_or_scst5_operand" "a,b,aIs5,bIs5,bIs5,aIs5") + (match_operand:SI 2 "register_operand" "a,b,a,b,?a,?b")))] + "" + "%|%.\\tsub\\t%$\\t%1, %2, %0" + [(set_attr "units62" "dls,dls,ls,ls,l,l") + (set_attr "units64" "dls,dls,ls,ls,ls,ls") + (set_attr "cross" "n,n,n,n,y,y")]) + +(define_insn "*addshiftsi" + [(set (match_operand:SI 0 "register_operand" "=a,b") + (plus:SI (mult:SI (match_operand:SI 2 "register_operand" "a,b") + (match_operand:SI 3 "adda_scale_operand" "n,n")) + (match_operand:SI 1 "register_operand" "a,b")))] + "" + "%|%.\\tadda%d3\\t%$\\t%1, %2, %0" + [(set_attr "units" "d")]) + +(define_insn "*subshiftsi" + [(set (match_operand:SI 0 "register_operand" "=a,b") + (minus:SI (match_operand:SI 1 "register_operand" "a,b") + (mult:SI (match_operand:SI 2 "register_operand" "a,b") + (match_operand:SI 3 "suba_scale_operand" "n,n"))))] + "" + "%|%.\\tsuba%d3\\t%$\\t%1, %2, %0" + [(set_attr "units" "d")]) + +(define_insn "addsidi3_widen" + [(set (match_operand:DI 0 "register_operand" "=a,b,a,b") + (plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%a,b,a,b")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "a,b,?b,?a"))))] + "" + "%|%.\\taddu\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")))] + "" +{ + rtx tmp; + rtx lo_half[3], hi_half[3]; + split_di (operands + 1, 2, lo_half + 1, hi_half + 1); + if (reg_overlap_mentioned_p (operands[0], hi_half[1]) + || reg_overlap_mentioned_p (operands[0], hi_half[2])) + tmp = gen_reg_rtx (DImode); + else + tmp = operands[0]; + split_di (&tmp, 1, lo_half, hi_half); + emit_insn (gen_addsidi3_widen (tmp, lo_half[1], lo_half[2])); + emit_insn (gen_addsi3 (hi_half[0], copy_rtx (hi_half[0]), hi_half[1])); + emit_insn (gen_addsi3 (copy_rtx (hi_half[0]), + copy_rtx (hi_half[0]), hi_half[2])); + if (tmp != operands[0]) + emit_move_insn (operands[0], tmp); + DONE; +}) + +(define_insn "addsf3" + [(set (match_operand:SF 0 "register_operand" "=a,b,a,b") + (plus:SF (match_operand:SF 1 "register_operand" "%a,b,a,b") + (match_operand:SF 2 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\taddsp\\t%$\\t%1, %2, %0" + [(set_attr "type" "fp4") + (set_attr "units67" "l") + (set_attr "units67p" "ls") + (set_attr "units674" "ls") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "adddf3" + [(set (match_operand:DF 0 "register_operand" "=a,b,a,b") + (plus:DF (match_operand:DF 1 "register_operand" "%a,b,a,b") + (match_operand:DF 2 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tadddp\\t%$\\t%1, %2, %0" + [(set_attr "type" "adddp") + (set_attr "units67" "l") + (set_attr "units67p" "ls") + (set_attr "units674" "ls") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "subsf3" + [(set (match_operand:SF 0 "register_operand" "=a,b, a, b, a, b") + (minus:SF (match_operand:SF 1 "register_operand" "a,b, b, a, a, b") + (match_operand:SF 2 "register_operand" "a,b,?a,?b,?b,?a")))] + "TARGET_FP" + "%|%.\\tsubsp\\t%$\\t%1, %2, %0" + [(set_attr "type" "fp4") + (set_attr "units67" "l") + (set_attr "units67p" "ls") + (set_attr "units674" "ls") + (set_attr "cross" "n,n,y,y,y,y")]) + +(define_insn "subdf3" + [(set (match_operand:DF 0 "register_operand" "=a,b, a, b, a, b") + (minus:DF (match_operand:DF 1 "register_operand" "a,b, b, a, a, b") + (match_operand:DF 2 "register_operand" "a,b,?a,?b,?b,?a")))] + "TARGET_FP" + "%|%.\\tsubdp\\t%$\\t%1, %2, %0" + [(set_attr "type" "adddp") + (set_attr "units67" "l") + (set_attr "units67p" "ls") + (set_attr "units674" "ls") + (set_attr "cross" "n,n,y,y,y,y")]) + +;; ------------------------------------------------------------------------- +;; Logical instructions +;; ------------------------------------------------------------------------- + +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b") + (and:SI (match_operand:SI 1 "register_operand" "%a,b,b,a,a,b") + (match_operand:SI 2 "andsi_operand" "aIs5,bIs5,?aIs5,?bIs5,aJc,bJc")))] + "" +{ + if (which_alternative < 4) + return "%|%.\\tand\\t%$\\t%2, %1, %0"; + else + return "%|%.\\tclr\\t%$\\t%1, %f2, %F2, %0"; +} + [(set_attr "units62" "ls,ls,ls,ls,s,s") + (set_attr "units64" "dls,dls,dls,dls,s,s") + (set_attr "cross" "n,n,y,y,n,n")]) + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b") + (ior:SI (match_operand:SI 1 "register_operand" "%a,b,b,a,a,b") + (match_operand:SI 2 "iorsi_operand" "aIs5,bIs5,?aIs5,?bIs5,aJs,bJs")))] + "" +{ + if (which_alternative < 4) + return "%|%.\\tor\\t%$\\t%2, %1, %0"; + else + return "%|%.\\tset\\t%$\\t%1, %s2, %S2, %0"; +} + [(set_attr "units62" "ls,ls,ls,ls,s,s") + (set_attr "units64" "dls,dls,dls,dls,s,s") + (set_attr "cross" "n,n,y,y,n,n")]) + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (xor:SI (match_operand:SI 1 "register_operand" "%a,b,b,a") + (match_operand:SI 2 "reg_or_scst5_operand" "aIs5,bIs5,?aIs5,?bIs5")))] + "" + "%|%.\\txor\\t%$\\t%2, %1, %0" + [(set_attr "units62" "ls") + (set_attr "units64" "dls") + (set_attr "cross" "n,n,y,y")]) + +;; ------------------------------------------------------------------------- +;; Conversions +;; ------------------------------------------------------------------------- + +(define_insn "extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=a,b,a,b") + (float_extend:DF (match_operand:SF 1 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tspdp\\t%$\\t%1,%0" + [(set_attr "type" "dp2") + (set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "truncdfsf2" + [(set (match_operand:SF 0 "register_operand" "=a,b") + (float_truncate:SF (match_operand:DF 1 "register_operand" "a,b")))] + "TARGET_FP" + "%|%.\\tdpsp\\t%$\\t%1,%0" + [(set_attr "type" "fp4") + (set_attr "units" "l") + (set_attr "cross" "n")]) + +;;;; Convert between signed integer types and floating point. +(define_insn "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "=a,b,a,b") + (float:SF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tintsp\\t%$\\t%1,%0" + [(set_attr "type" "fp4") + (set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "floatunssisf2" + [(set (match_operand:SF 0 "register_operand" "=a,b,a,b") + (unsigned_float:SF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tintspu\\t%$\\t%1,%0" + [(set_attr "type" "fp4") + (set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "floatsidf2" + [(set (match_operand:DF 0 "register_operand" "=a,b,a,b") + (float:DF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tintdp\\t%$\\t%1,%0" + [(set_attr "type" "intdp") + (set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "floatunssidf2" + [(set (match_operand:DF 0 "register_operand" "=a,b,a,b") + (unsigned_float:DF (match_operand:SI 1 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tintdpu\\t%$\\t%1,%0" + [(set_attr "type" "intdp") + (set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "fix_truncsfsi2" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (fix:SI (match_operand:SF 1 "register_operand" "a,b,?b,?a")))] + "TARGET_FP" + "%|%.\\tsptrunc\\t%$\\t%1,%0" + [(set_attr "type" "fp4") + (set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "fix_truncdfsi2" + [(set (match_operand:SI 0 "register_operand" "=a,b") + (fix:SI (match_operand:DF 1 "register_operand" "a,b")))] + "TARGET_FP" + "%|%.\\tdptrunc\\t%$\\t%1,%0" + [(set_attr "type" "fp4") + (set_attr "units" "l") + (set_attr "cross" "n")]) + +;; ------------------------------------------------------------------------- +;; Saturating arithmetic +;; ------------------------------------------------------------------------- + +(define_insn "saddsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b,a,b,a,b") + (ss_plus:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a,a,b,?b,?a") + (match_operand:SI 2 "reg_or_const_int_operand" "a,b,a,b,aIs5,bIs5,aIs5,bIs5")))] + "" + "%|%.\\tsadd\\t%$\\t%2, %1, %0" + [(set_attr "units" "ls,ls,ls,ls,l,l,l,l") + (set_attr "cross" "n,n,y,y,n,n,y,y")]) + +(define_insn "ssubsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (ss_minus:SI (match_operand:SI 1 "reg_or_scst5_operand" "aIs5,bIs5,?bIs5,?aIs5") + (match_operand:SI 2 "register_operand" "a,b,a,b")))] + "" + "%|%.\\tssub\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "subcsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (unspec:SI + [(match_operand:SI 1 "register_operand" "a,b,a,b") + (match_operand:SI 2 "register_operand" "a,b,?b,?a")] + UNSPEC_SUBC))] + "" + "%|%.\\tsubc\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +;; ------------------------------------------------------------------------- +;; Call instructions +;; ------------------------------------------------------------------------- + +(define_expand "call" + [(match_operand 0 "" "")] + "" +{ + c6x_expand_call (NULL_RTX, operands[0], false); + DONE; +}) + +(define_expand "call_value" + [(match_operand 0 "" "") + (match_operand 1 "" "")] + "" +{ + c6x_expand_call (operands[0], operands[1], false); + DONE; +}) + +(define_expand "sibcall" + [(match_operand 0 "" "")] + "" +{ + c6x_expand_call (NULL_RTX, operands[0], true); + cfun->machine->contains_sibcall = true; + DONE; +}) + +(define_expand "sibcall_value" + [(match_operand 0 "" "") + (match_operand 1 "" "")] + "" +{ + c6x_expand_call (operands[0], operands[1], true); + cfun->machine->contains_sibcall = true; + DONE; +}) + +(define_insn "call_internal" + [(call (mem (match_operand:SI 0 "c6x_call_operand" "S1,a,b")) + (const_int 0))] + "!SIBLING_CALL_P (insn)" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "any,b,b") + (set_attr "cross" "n,y,n")]) + +(define_insn "call_value_internal" + [(set (match_operand 0 "" "") + (call (mem (match_operand:SI 1 "c6x_call_operand" "S1,a,b")) + (const_int 0)))] + "" + "%|%.\\tcall\\t%$\\t%1" + [(set_attr "type" "call") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "any,b,b") + (set_attr "cross" "n,y,n")]) + +(define_insn "sibcall_internal" + [(call (mem (match_operand:SI 0 "c6x_call_operand" "S1,C")) + (const_int 0))] + "SIBLING_CALL_P (insn)" + "%|%.\\tb\\t%$\\t%0" + [(set_attr "type" "branch") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "any,b")]) + +(define_insn "callp" + [(call (mem (match_operand:SI 0 "c6x_call_operand" "S1")) + (const_int 0)) + (unspec [(const_int 6)] UNSPEC_NOP)] + "!SIBLING_CALL_P (insn)" + "%|%.\\tcallp\\t%$\\t%0, B3" + [(set_attr "type" "callp") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "b") + (set_attr "cross" "n")]) + +(define_insn "callp_value" + [(set (match_operand:SI 0 "register_operand" "") + (call (mem (match_operand:SI 1 "c6x_call_operand" "S1")) + (const_int 0))) + (unspec [(const_int 6)] UNSPEC_NOP)] + "!SIBLING_CALL_P (insn)" + "%|%.\\tcallp\\t%$\\t%1, B3" + [(set_attr "type" "callp") + (set_attr "predicable" "no") + (set_attr "units" "s") + (set_attr "dest_regfile" "b") + (set_attr "cross" "n")]) + +(define_insn "return_internal" + [(return) + (use (match_operand:SI 0 "register_operand" "b"))] + "reload_completed" + "%|%.\\tret\\t%$\\t%0" + [(set_attr "type" "branch") + (set_attr "units" "s") + (set_attr "dest_regfile" "b")]) + +(define_insn "addkpc" + [(set (match_operand:SI 0 "register_operand" "=b") + (unspec:SI [(match_operand 1 "" "")] UNSPEC_ADDKPC)) + (unspec [(match_operand 2 "const_int_operand" "n")] UNSPEC_NOP)] + "TARGET_INSNS_64" + "%|%.\\taddkpc\\t%$\\t%l1, %0, %2" + [(set_attr "units" "s") + (set_attr "dest_regfile" "b")]) + +;; ------------------------------------------------------------------------- +;; Unary operations +;; ------------------------------------------------------------------------- + +(define_insn "negsi2" + [(set (match_operand:SI 0 "register_operand" "=a, a, b, b") + (neg:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))] + "" + "%|%.\\tneg\\t%$\\t%1, %0" + [(set_attr "units" "ls") + (set_attr "cross" "n,y,n,y")]) + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=a, a, b, b") + (not:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))] + "" + "%|%.\\tnot\\t%$\\t%1, %0" + [(set_attr "units" "ls") + (set_attr "cross" "n,y,n,y")]) + +(define_insn "clrsbsi2" + [(set (match_operand:SI 0 "register_operand" "=a, a, b, b") + (clrsb:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))] + "" + "%|%.\\tnorm\\t%$\\t%1, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,y,n,y")]) + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=a, a, b, b") + (clz:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))] + "" + "%|%.\\tlmbd\\t%$\\t1, %1, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,y,n,y")]) + +;; bitrevsi2 is defined in c6x-mult.md.in. + +(define_expand "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "") + (ctz:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_INSNS_64" +{ + rtx tmpreg = gen_reg_rtx (SImode); + emit_insn (gen_bitrevsi2 (tmpreg, operands[1])); + emit_insn (gen_clzsi2 (operands[0], tmpreg)); + DONE; +}) + +(define_expand "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "") + (ctz:DI (match_operand:DI 1 "register_operand" "")))] + "TARGET_INSNS_64" +{ + rtx tmpreg = gen_reg_rtx (DImode); + rtx out; + emit_insn (gen_bitrevsi2 (gen_highpart (SImode, tmpreg), + gen_lowpart (SImode, operands[1]))); + emit_insn (gen_bitrevsi2 (gen_lowpart (SImode, tmpreg), + gen_highpart (SImode, operands[1]))); + out = expand_unop (DImode, clz_optab, tmpreg, operands[0], 1); + if (!rtx_equal_p (out, operands[0])) + emit_move_insn (operands[0], out); + DONE; +}) + +(define_insn "ssabssi2" + [(set (match_operand:SI 0 "register_operand" "=a, a, b, b") + (ss_abs:SI (match_operand:SI 1 "register_operand" "a,?b, b,?a")))] + "" + "%|%.\\tabs\\t%$\\t%1, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,y,n,y")]) + +;; ------------------------------------------------------------------------- +;; Shift instructions +;; ------------------------------------------------------------------------- + +(define_code_iterator any_shift [ss_ashift ashift ashiftrt lshiftrt]) +(define_code_iterator any_rshift [ashiftrt lshiftrt]) +(define_code_attr shift_code [(ss_ashift "ss_ashl") (ashift "ashl") + (ashiftrt "ashr") (lshiftrt "lshr")]) +(define_code_attr shift_insn [(ss_ashift "sshl") (ashift "shl") + (ashiftrt "shr") (lshiftrt "shru")]) + +(define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (any_shift:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")))] + "" + "%|%.\\t\\t%$\\t%1, %2, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +;; See c6x-mult.md.in for the rotlsi3 pattern. + +(define_insn "rotrdi3_16" + [(set (match_operand:DI 0 "register_operand" "=a,b") + (rotatert:DI (match_operand:DI 1 "register_operand" "a,b") + (const_int 16)))] + "TARGET_INSNS_64PLUS" + "%|%.\\tdpackx2\\t%$\\t%P1, %p1, %0" + [(set_attr "units" "l") + (set_attr "cross" "n")]) + +(define_insn "shlmbsi3" + [(set (match_operand:SI 0 "register_operand" "=a,b,a,b") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "a,b,?b,?a") + (const_int 8)) + (lshiftrt:SI (match_operand:SI 2 "register_operand" "a,b,a,b") + (const_int 24))))] + "TARGET_INSNS_64" + "%|%.\\tshlmb\\t%$\\t%2, %1, %0" + [(set_attr "units" "ls") + (set_attr "cross" "n,n,y,y")]) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_INSNS_64" +{ + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 8) + { + rtx lo0, lo1, hi0, hi1, tmp; + lo0 = gen_lowpart (SImode, operands[0]); + hi0 = gen_highpart (SImode, operands[0]); + lo1 = gen_lowpart (SImode, operands[1]); + hi1 = gen_highpart (SImode, operands[1]); + if (reg_overlap_mentioned_p (hi0, lo1)) + tmp = gen_reg_rtx (SImode); + else + tmp = hi0; + emit_insn (gen_shlmbsi3 (tmp, hi1, lo1)); + emit_insn (gen_ashlsi3 (lo0, lo1, operands[2])); + if (tmp != hi0) + emit_move_insn (hi0, tmp); + DONE; + } + FAIL; +}) + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "register_operand" "") + (rotatert:DI (match_operand:DI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")))] + "TARGET_INSNS_64PLUS" +{ + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 16) + { + emit_insn (gen_rotrdi3_16 (operands[0], operands[1])); + DONE; + } + FAIL; +}) + +(define_insn "bswapv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b") + (bswap:V2HI (match_operand:V2HI 1 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tswap4\\t%$\\t%1, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "TARGET_INSNS_64" +{ + rtx tmpreg = gen_reg_rtx (SImode); + rtx tmpv2 = gen_lowpart (V2HImode, tmpreg); + rtx op0v2 = gen_lowpart (V2HImode, operands[0]); + emit_insn (gen_rotlsi3 (tmpreg, operands[1], GEN_INT (16))); + emit_insn (gen_bswapv2hi2 (op0v2, tmpv2)); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Division +;; ------------------------------------------------------------------------- + +(define_insn "divsi3_insn" + [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B5)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t__c6xabi_divi" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "divsi3_insn_indcall" + [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (use (match_operand:SI 0 "register_operand" "b")) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B5)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "udivsi3_insn" + [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t__c6xabi_divu" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "udivsi3_insn_indcall" + [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (use (match_operand:SI 0 "register_operand" "b")) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "modsi3_insn" + [(set (reg:SI REG_A4) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t__c6xabi_remi" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "modsi3_insn_indcall" + [(set (reg:SI REG_A4) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (use (match_operand:SI 0 "register_operand" "b")) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "divmodsi4_insn" + [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (set (reg:SI REG_A5) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t__c6xabi_divremi" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "divmodsi4_insn_indcall" + [(set (reg:SI REG_A4) (div:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (set (reg:SI REG_A5) (mod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (use (match_operand:SI 0 "register_operand" "b")) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "umodsi3_insn" + [(set (reg:SI REG_A4) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A7)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t__c6xabi_remu" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "umodsi3_insn_indcall" + [(set (reg:SI REG_A4) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (use (match_operand:SI 0 "register_operand" "b")) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A7)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "udivmodsi4_insn" + [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (set (reg:SI REG_A5) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t__c6xabi_divremu" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "udivmodsi4_insn_indcall" + [(set (reg:SI REG_A4) (udiv:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (set (reg:SI REG_A5) (umod:SI (reg:SI REG_A4) (reg:SI REG_B4))) + (use (match_operand:SI 0 "register_operand" "b")) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "%|%.\\tcall\\t%$\\t%0" + [(set_attr "type" "call") + (set_attr "dest_regfile" "any") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn_and_split "divmodsi4" + [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A4)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B5)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "#" + "" + [(const_int 0)] +{ + rtx reg = NULL_RTX; + + if (TARGET_LONG_CALLS) + { + if (reload_completed) + reg = gen_rtx_REG (SImode, REG_A6); + else + reg = gen_reg_rtx (SImode); + } + emit_move_insn (gen_rtx_REG (SImode, REG_A4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, REG_B4), operands[2]); + if (find_reg_note (curr_insn, REG_UNUSED, operands[3])) + { + if (TARGET_LONG_CALLS) + { + emit_move_insn (reg, optab_libfunc (sdiv_optab, SImode)); + emit_insn (gen_divsi3_insn_indcall (reg)); + } + else + emit_insn (gen_divsi3_insn ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4)); + } + else if (find_reg_note (curr_insn, REG_UNUSED, operands[0])) + { + if (TARGET_LONG_CALLS) + { + emit_move_insn (reg, optab_libfunc (smod_optab, SImode)); + emit_insn (gen_modsi3_insn_indcall (reg)); + } + else + emit_insn (gen_modsi3_insn ()); + emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A4)); + } + else + { + if (TARGET_LONG_CALLS) + { + emit_move_insn (reg, optab_libfunc (sdivmod_optab, SImode)); + emit_insn (gen_divmodsi4_insn_indcall (reg)); + } + else + emit_insn (gen_divmodsi4_insn ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4)); + emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A5)); + } + DONE; +}) + +(define_insn_and_split "udivmodsi4" + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:SI REG_A0)) + (clobber (reg:SI REG_A1)) + (clobber (reg:SI REG_A2)) + (clobber (reg:SI REG_A4)) + (clobber (reg:SI REG_A5)) + (clobber (reg:SI REG_A6)) + (clobber (reg:SI REG_A7)) + (clobber (reg:SI REG_B0)) + (clobber (reg:SI REG_B1)) + (clobber (reg:SI REG_B2)) + (clobber (reg:SI REG_B3)) + (clobber (reg:SI REG_B4)) + (clobber (reg:SI REG_B30)) + (clobber (reg:SI REG_B31))] + "" + "#" + "" + [(const_int 0)] +{ + rtx reg = NULL_RTX; + + if (TARGET_LONG_CALLS) + { + if (reload_completed) + reg = gen_rtx_REG (SImode, REG_A6); + else + reg = gen_reg_rtx (SImode); + } + + emit_move_insn (gen_rtx_REG (SImode, REG_A4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, REG_B4), operands[2]); + if (find_reg_note (curr_insn, REG_UNUSED, operands[3])) + { + if (TARGET_LONG_CALLS) + { + emit_move_insn (reg, optab_libfunc (udiv_optab, SImode)); + emit_insn (gen_udivsi3_insn_indcall (reg)); + } + else + emit_insn (gen_udivsi3_insn ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4)); + } + else if (find_reg_note (curr_insn, REG_UNUSED, operands[0])) + { + if (TARGET_LONG_CALLS) + { + emit_move_insn (reg, optab_libfunc (umod_optab, SImode)); + emit_insn (gen_umodsi3_insn_indcall (reg)); + } + else + emit_insn (gen_umodsi3_insn ()); + emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A4)); + } + else + { + if (TARGET_LONG_CALLS) + { + emit_move_insn (reg, optab_libfunc (udivmod_optab, SImode)); + emit_insn (gen_udivmodsi4_insn_indcall (reg)); + } + else + emit_insn (gen_udivmodsi4_insn ()); + emit_move_insn (operands[0], gen_rtx_REG (SImode, REG_A4)); + emit_move_insn (operands[3], gen_rtx_REG (SImode, REG_A5)); + } + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Multiplication +;; See c6x-mult.md.in for define_insn patterns. +;; ------------------------------------------------------------------------- + +(define_expand "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "")) + (sign_extend:SI (match_operand:HI 2 "reg_or_scst5_operand" ""))))] + "" +{ + if (CONSTANT_P (operands[2])) + { + emit_insn (gen_mulhisi3_const (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_expand "usmulhisi3" + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "")) + (sign_extend:SI (match_operand:HI 2 "reg_or_scst5_operand" ""))))] + "" +{ + if (CONSTANT_P (operands[2])) + { + emit_insn (gen_usmulhisi3_const (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_expand "mulsi3" + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "register_operand" "")))] + "" +{ + if (!TARGET_MPY32) + { + rtx lo1 = gen_lowpart (HImode, operands[1]); + rtx lo2 = gen_lowpart (HImode, operands[2]); + /* (N * AH + AL) * (N * BH + BL) + = N*(AH * BL + BH * AL) + AL*BL */ + rtx tmp1 = gen_reg_rtx (SImode); + rtx tmp2 = gen_reg_rtx (SImode); + rtx tmp3 = gen_reg_rtx (SImode); + emit_insn (gen_umulhisi3 (tmp1, lo1, lo2)); + emit_insn (gen_umulhisi3_lh (tmp2, lo1, operands[2])); + emit_insn (gen_umulhisi3_hl (tmp3, operands[1], lo2)); + emit_insn (gen_addsi3 (tmp2, tmp2, tmp3)); + emit_insn (gen_ashlsi3 (tmp2, tmp2, GEN_INT (16))); + emit_insn (gen_addsi3 (operands[0], tmp1, tmp2)); + DONE; + } +}) + +;; ------------------------------------------------------------------------- +;; Floating point multiplication +;; ------------------------------------------------------------------------- + +(define_insn "mulsf3" + [(set (match_operand:SF 0 "register_operand" "=a,b,a,b") + (mult:SF (match_operand:SF 1 "register_operand" "%a,b,?a,?b") + (match_operand:SF 2 "register_operand" "a,b,b,a")))] + "TARGET_FP" + "%|%.\\tmpysp\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpy4") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "muldf3" + [(set (match_operand:DF 0 "register_operand" "=a,b") + (mult:DF (match_operand:DF 1 "register_operand" "%a,b") + (match_operand:DF 2 "register_operand" "a,b")))] + "TARGET_FP" + "%|%.\\tmpydp\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpydp") + (set_attr "units" "m") + (set_attr "cross" "n")]) + +;; Note that mpyspdp and mpysp2dp are available on C67x, despite what the +;; manual says. +(define_insn "*muldf_ext1" + [(set (match_operand:DF 0 "register_operand" "=a,b,a,b") + (mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "a,b,a,b")) + (match_operand:DF 2 "register_operand" "a,b,?b,?a")))] + "TARGET_FP_EXT" + "%|%.\\tmpyspdp\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpyspdp") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "*muldf_ext2" + [(set (match_operand:DF 0 "register_operand" "=a,b,a,b") + (mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "%a,b,a,b")) + (float_extend:DF (match_operand:SF 2 "register_operand" "a,b,?b,?a"))))] + "TARGET_FP_EXT" + "%|%.\\tmpysp2dp\\t%$\\t%1, %2, %0" + [(set_attr "type" "mpysp2dp") + (set_attr "units" "m") + (set_attr "cross" "n,n,y,y")]) + +;; ------------------------------------------------------------------------- +;; Floating point division +;; ------------------------------------------------------------------------- + +(define_insn "rcpsf2" + [(set (match_operand:SF 0 "register_operand" "=a,b,a,b") + (unspec:SF [(match_operand:SF 1 "register_operand" "a,b,?b,?a")] + UNSPEC_RCP))] + "TARGET_FP" + "%|%.\\trcpsp\\t%$\\t%1, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "rcpdf2" + [(set (match_operand:DF 0 "register_operand" "=a,b") + (unspec:DF [(match_operand:DF 1 "register_operand" "a,b")] + UNSPEC_RCP))] + "TARGET_FP" + "%|%.\\trcpdp\\t%$\\t%1, %0" + [(set_attr "type" "dp2") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_expand "divsf3" + [(set (match_dup 4) + (unspec:SF [(match_operand:SF 2 "register_operand" "")] + UNSPEC_RCP)) + (set (match_dup 5) (mult:SF (match_dup 2) (match_dup 4))) + (set (match_dup 6) (minus:SF (match_dup 3) (match_dup 5))) + (set (match_dup 4) (mult:SF (match_dup 4) (match_dup 6))) + (set (match_dup 5) (mult:SF (match_dup 2) (match_dup 4))) + (set (match_dup 6) (minus:SF (match_dup 3) (match_dup 5))) + (set (match_dup 4) (mult:SF (match_dup 4) (match_dup 6))) + (set (match_operand:SF 0 "register_operand" "") + (mult:SF (match_operand:SF 1 "register_operand") + (match_dup 4)))] + "TARGET_FP && flag_reciprocal_math" +{ + operands[3] = force_reg (SFmode, + CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode)); + operands[4] = gen_reg_rtx (SFmode); + operands[5] = gen_reg_rtx (SFmode); + operands[6] = gen_reg_rtx (SFmode); +}) + +(define_expand "divdf3" + [(set (match_dup 4) + (unspec:DF [(match_operand:DF 2 "register_operand" "")] + UNSPEC_RCP)) + (set (match_dup 5) (mult:DF (match_dup 2) (match_dup 4))) + (set (match_dup 6) (minus:DF (match_dup 3) (match_dup 5))) + (set (match_dup 4) (mult:DF (match_dup 4) (match_dup 6))) + (set (match_dup 5) (mult:DF (match_dup 2) (match_dup 4))) + (set (match_dup 6) (minus:DF (match_dup 3) (match_dup 5))) + (set (match_dup 4) (mult:DF (match_dup 4) (match_dup 6))) + (set (match_dup 5) (mult:DF (match_dup 2) (match_dup 4))) + (set (match_dup 6) (minus:DF (match_dup 3) (match_dup 5))) + (set (match_dup 4) (mult:DF (match_dup 4) (match_dup 6))) + (set (match_operand:DF 0 "register_operand" "") + (mult:DF (match_operand:DF 1 "register_operand") + (match_dup 4)))] + "TARGET_FP && flag_reciprocal_math" +{ + operands[3] = force_reg (DFmode, + CONST_DOUBLE_FROM_REAL_VALUE (dconst2, DFmode)); + operands[4] = gen_reg_rtx (DFmode); + operands[5] = gen_reg_rtx (DFmode); + operands[6] = gen_reg_rtx (DFmode); +}) + +;; ------------------------------------------------------------------------- +;; Block moves +;; ------------------------------------------------------------------------- + +(define_expand "movmemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] + "" +{ + if (c6x_expand_movmem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +;; ------------------------------------------------------------------------- +;; Prologue and epilogue. +;; ------------------------------------------------------------------------- + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "type" "blockage")]) + +(define_insn "push_rts" + [(set (mem:SI (reg:SI REG_SP)) (reg:SI REG_B14)) + (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -8))) (reg:DI REG_A14)) + (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -16))) (reg:DI REG_B12)) + (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -24))) (reg:DI REG_A12)) + (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -32))) (reg:DI REG_B10)) + (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -40))) (reg:DI REG_A10)) + (set (mem:DI (plus:SI (reg:SI REG_SP) (const_int -48))) (reg:DI REG_B2)) + (set (reg:SI REG_SP) (plus:SI (reg:SI REG_SP) (const_int -56))) + (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE) + (clobber (reg:SI REG_A3))] + "TARGET_INSNS_64PLUS" + "%|%.\\tcallp\\t%$\\t__c6xabi_push_rts, a3" + [(set_attr "type" "callp") + (set_attr "dest_regfile" "a") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_insn "pop_rts" + [(set (reg:SI REG_B14) (mem:SI (plus:SI (reg:SI REG_SP) (const_int 56)))) + (set (reg:DI REG_A14) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 48)))) + (set (reg:DI REG_B12) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 40)))) + (set (reg:DI REG_A12) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 32)))) + (set (reg:DI REG_B10) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 24)))) + (set (reg:DI REG_A10) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 16)))) + (set (reg:DI REG_B2) (mem:DI (plus:SI (reg:SI REG_SP) (const_int 8)))) + (set (reg:SI REG_SP) (plus:SI (reg:SI REG_SP) (const_int 56))) + (clobber (reg:SI REG_A3)) + (return)] + "TARGET_INSNS_64PLUS" + "%|%.\\tretp\\t%$\\t__c6xabi_pop_rts, a3" + [(set_attr "type" "callp") + (set_attr "dest_regfile" "a") + (set_attr "units" "s") + (set_attr "cross" "n")]) + +(define_expand "prologue" + [(const_int 1)] + "" + "c6x_expand_prologue (); DONE;") + +(define_expand "epilogue" + [(const_int 1)] + "" + "c6x_expand_epilogue (false); DONE;") + +(define_expand "sibcall_epilogue" + [(return)] + "" +{ + c6x_expand_epilogue (true); + DONE; +}) + +(define_insn "setup_dsbt" + [(set (match_operand:SI 0 "pic_register_operand" "+Z") + (unspec:SI [(match_dup 0) + (match_operand:SI 1 "symbolic_operand" "")] + UNSPEC_SETUP_DSBT))] + "TARGET_DSBT" + "%|%.\\tldw\\t%$\\t*+%0($DSBT_index%1), %0" + [(set_attr "type" "load") + (set_attr "units" "d_addr") + (set_attr "dest_regfile" "b") + (set_attr "addr_regfile" "b")]) + + +;; A dummy use/set to prevent prologue and epiloge overlapping. +;; This can be caused by sched-ebb in the presence of multiple +;; exit sequences, and causes the unwinding table generation to explode. +(define_insn "epilogue_barrier" + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "")] + UNSPEC_EPILOGUE_BARRIER))] + "" + "" + [(set_attr "type" "blockage")]) + +;; ------------------------------------------------------------------------- +;; Vector insns +;; ------------------------------------------------------------------------- + +(define_code_iterator logical [and ior xor]) +(define_code_attr logical_insn [(and "and") (ior "ior") (xor "xor")]) +(define_code_attr logical_opcode [(and "and") (ior "or") (xor "xor")]) +(define_code_iterator plusminus [plus minus]) +(define_code_attr plusminus_insn [(plus "add") (minus "sub")]) +(define_code_iterator ss_plusminus [ss_plus ss_minus]) +(define_code_attr ss_plusminus_insn [(ss_plus "add") (ss_minus "sub")]) + +;; Vector logical insns + +(define_insn "3" + [(set (match_operand:VEC4M 0 "register_operand" "=a,b,a,b") + (logical:VEC4M (match_operand:VEC4M 1 "register_operand" "a,b,a,b") + (match_operand:VEC4M 2 "register_operand" "a,b,?b,?a")))] + "" + "%|%.\\t\\t%$\\t%1, %2, %0" + [(set_attr "units62" "ls") + (set_attr "units64" "dls") + (set_attr "cross" "n,n,y,y")]) + +;; Vector add/subtract + +(define_insn "v2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b") + (plusminus:V2HI (match_operand:V2HI 1 "register_operand" "a,b,a,b") + (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))] + "" + "%|%.\\t2\\t%$\\t%1, %2, %0" + [(set_attr "units62" "l") + (set_attr "units64" "dls") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "v4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=a,b,a,b") + (plusminus:V4QI (match_operand:V4QI 1 "register_operand" "a,b,a,b") + (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\t4\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "ss_addv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b") + (ss_plus:V2HI (match_operand:V2HI 1 "register_operand" "a,b,a,b") + (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tsadd2\\t%$\\t%1, %2, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "ss_subv2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b") + (ss_minus:V2HI (match_operand:V2HI 1 "register_operand" "a,b,a,b") + (match_operand:V2HI 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tssub2\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "us_addv4qi3" + [(set (match_operand:V4QI 0 "register_operand" "=a,b,a,b") + (ss_plus:V4QI (match_operand:V4QI 1 "register_operand" "a,b,a,b") + (match_operand:V4QI 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tsaddu4\\t%$\\t%1, %2, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +;; Vector/scalar min/max + +(define_mode_iterator SMINMAX [HI V2HI]) +(define_mode_iterator UMINMAX [QI V4QI]) + +(define_insn "smax3" + [(set (match_operand:SMINMAX 0 "register_operand" "=a,b,a,b") + (smax:SMINMAX (match_operand:SMINMAX 1 "register_operand" "a,b,a,b") + (match_operand:SMINMAX 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tmax2\\t%$\\t%1, %2, %0" + [(set_attr "units64" "l") + (set_attr "units64p" "ls") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "smin3" + [(set (match_operand:SMINMAX 0 "register_operand" "=a,b,a,b") + (smin:SMINMAX (match_operand:SMINMAX 1 "register_operand" "a,b,a,b") + (match_operand:SMINMAX 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tmin2\\t%$\\t%1, %2, %0" + [(set_attr "units64" "l") + (set_attr "units64p" "ls") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umax3" + [(set (match_operand:UMINMAX 0 "register_operand" "=a,b,a,b") + (umax:UMINMAX (match_operand:UMINMAX 1 "register_operand" "a,b,a,b") + (match_operand:UMINMAX 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tmaxu4\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +(define_insn "umin3" + [(set (match_operand:UMINMAX 0 "register_operand" "=a,b,a,b") + (umin:UMINMAX (match_operand:UMINMAX 1 "register_operand" "a,b,a,b") + (match_operand:UMINMAX 2 "register_operand" "a,b,?b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tminu4\\t%$\\t%1, %2, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,n,y,y")]) + +;; Vector shifts + +(define_insn "v2hi3" + [(set (match_operand:V2HI 0 "register_operand" "=a,b,a,b") + (any_rshift:V2HI (match_operand:V2HI 1 "register_operand" "a,b,?b,?a") + (match_operand:SI 2 "reg_or_ucst5_operand" "aIu5,bIu5,aIu5,bIu5")))] + "TARGET_INSNS_64" + "%|%.\\t2\\t%$\\t%1, %2, %0" + [(set_attr "units" "s") + (set_attr "cross" "n,n,y,y")]) + +;; See c6x-mult.md.in for avg2/avgu4 + +;; Widening vector multiply and dot product. +;; See c6x-mult.md.in for the define_insn patterns + +(define_expand "sdot_prodv2hi" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")] + "TARGET_INSNS_64" +{ + rtx t = gen_reg_rtx (SImode); + emit_insn (gen_dotv2hi (t, operands[1], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[3], t)); + DONE; +}) + +;; Unary vector operations + +(define_insn "ssabsv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=a, a, b, b") + (ss_abs:V2HI (match_operand:V2HI 1 "register_operand" "a,?b, b,?a")))] + "TARGET_INSNS_64" + "%|%.\\tabs2\\t%$\\t%1, %0" + [(set_attr "units" "l") + (set_attr "cross" "n,y,n,y")]) + +;; Pack insns + +(define_insn "*packv2hi_insv" + [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+a,b,a,b,ab") + (const_int 16) + (const_int 16)) + (match_operand:SI 1 "nonmemory_operand" "a,b,?b,?a,n"))] + "TARGET_INSNS_64" + "@ + %|%.\\tpack2\\t%$\\t%1, %0, %0 + %|%.\\tpack2\\t%$\\t%1, %0, %0 + %|%.\\tpack2\\t%$\\t%1, %0, %0 + %|%.\\tpack2\\t%$\\t%1, %0, %0 + %|%.\\tmvklh\\t%$\\t%1, %0" + [(set_attr "units" "ls") + (set_attr "cross" "n,n,y,y,n")]) + +(define_insn "movstricthi" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+a,b,a,b")) + (match_operand:HI 1 "register_operand" "a,b,?b,?a"))] + "TARGET_INSNS_64" + "%|%.\\tpackhl2\\t%$\\t%0, %1, %0" + [(set_attr "units" "ls") + (set_attr "cross" "n,n,y,y")]) + +(include "c6x-mult.md") +(include "sync.md") diff --git a/gcc-4.9/gcc/config/c6x/c6x.opt b/gcc-4.9/gcc/config/c6x/c6x.opt new file mode 100644 index 000000000..1a96f6086 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x.opt @@ -0,0 +1,67 @@ +; Option definitions for TI C6X. +; Copyright (C) 2010-2014 Free Software Foundation, Inc. +; Contributed by Bernd Schmidt +; Contributed by CodeSourcery. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify +; it under the terms of the GNU General Public License as published by +; the Free Software Foundation; either version 3, or (at your option) +; any later version. +; +; GCC is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +HeaderInclude +config/c6x/c6x-opts.h + +SourceInclude +config/c6x/c6x-opts.h + +mbig-endian +Target Report RejectNegative Mask(BIG_ENDIAN) +Use big-endian byte order + +mlittle-endian +Target Report RejectNegative InverseMask(BIG_ENDIAN, LITTLE_ENDIAN) +Use little-endian byte order + +msim +Target RejectNegative +Use simulator runtime + +msdata= +Target RejectNegative Enum(c6x_sdata) Joined Var(c6x_sdata_mode) Init(C6X_SDATA_DEFAULT) +Select method for sdata handling + +Enum +Name(c6x_sdata) Type(enum c6x_sdata) +Valid arguments for the -msdata= option + +EnumValue +Enum(c6x_sdata) String(none) Value(C6X_SDATA_NONE) + +EnumValue +Enum(c6x_sdata) String(default) Value(C6X_SDATA_DEFAULT) + +EnumValue +Enum(c6x_sdata) String(all) Value(C6X_SDATA_ALL) + +mdsbt +Target Mask(DSBT) +Compile for the DSBT shared library ABI + +mlong-calls +Target Report Mask(LONG_CALLS) +Avoid generating pc-relative calls; use indirection + +march= +Target RejectNegative Joined Enum(c6x_isa) Var(c6x_arch_option) +Specify the name of the target architecture diff --git a/gcc-4.9/gcc/config/c6x/c6x_intrinsics.h b/gcc-4.9/gcc/config/c6x/c6x_intrinsics.h new file mode 100644 index 000000000..ce0436ca7 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/c6x_intrinsics.h @@ -0,0 +1,194 @@ +/* Intrinsics for TI C6X. + + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_C6X_INTRINSICS_H +#define _GCC_C6X_INTRINSICS_H + +#if !defined(__TMS320C6X__) +# error "c6x_intrinsics.h is only supported for C6X targets" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Define vector types. */ +typedef uint8_t __uv4qi __attribute__((vector_size (4))); +typedef int16_t __v2hi __attribute__((vector_size (4))); +typedef int32_t __v2si __attribute__((vector_size (8))); + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_abs (int src) +{ + return __builtin_c6x_abs (src); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_abs2 (int src) +{ + return (int)__builtin_c6x_abs2 ((__v2hi)src); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_sadd (int src1, int src2) +{ + return __builtin_c6x_sadd (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_ssub (int src1, int src2) +{ + return __builtin_c6x_ssub (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_add2 (int src1, int src2) +{ + return (int)__builtin_c6x_add2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_sub2 (int src1, int src2) +{ + return (int)__builtin_c6x_sub2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_add4 (int src1, int src2) +{ + return (int)__builtin_c6x_add4 ((__uv4qi)src1, (__uv4qi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_sub4 (int src1, int src2) +{ + return (int)__builtin_c6x_sub4 ((__uv4qi)src1, (__uv4qi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_sadd2 (int src1, int src2) +{ + return (int)__builtin_c6x_sadd2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_ssub2 (int src1, int src2) +{ + return (int)__builtin_c6x_ssub2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_saddu4 (int src1, int src2) +{ + return (int)__builtin_c6x_saddu4 ((__uv4qi)src1, (__uv4qi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_smpy (int src1, int src2) +{ + return __builtin_c6x_smpy (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_smpylh (int src1, int src2) +{ + return __builtin_c6x_smpylh (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_smpyhl (int src1, int src2) +{ + return __builtin_c6x_smpyhl (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_smpyh (int src1, int src2) +{ + return __builtin_c6x_smpyh (src1, src2); +} + +__extension__ static __inline long long __attribute__ ((__always_inline__)) +_smpy2ll (int src1, int src2) +{ + return (long long)__builtin_c6x_smpy2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline long long __attribute__ ((__always_inline__)) +_mpy2ll (int src1, int src2) +{ + return (long long)__builtin_c6x_mpy2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_extr (int src1, int src2) +{ + return __builtin_c6x_extr (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_extru (int src1, int src2) +{ + return __builtin_c6x_extru (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_clrr (int src1, int src2) +{ + return __builtin_c6x_clrr (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_avg2 (int src1, int src2) +{ + return (int)__builtin_c6x_avg2 ((__v2hi)src1, (__v2hi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_avgu4 (int src1, int src2) +{ + return (int)__builtin_c6x_avgu4 ((__uv4qi)src1, (__uv4qi)src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_sshl (int src1, int src2) +{ + return __builtin_c6x_sshl (src1, src2); +} + +__extension__ static __inline int __attribute__ ((__always_inline__)) +_subc (int src1, int src2) +{ + return __builtin_c6x_subc (src1, src2); +} + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gcc-4.9/gcc/config/c6x/constraints.md b/gcc-4.9/gcc/config/c6x/constraints.md new file mode 100644 index 000000000..e2721d9a7 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/constraints.md @@ -0,0 +1,174 @@ +;; Constraint definitions for TI C6X. +;; Copyright (C) 2010-2014 Free Software Foundation, Inc. +;; Contributed by Andrew Jenner +;; Contributed by Bernd Schmidt +;; Contributed by CodeSourcery. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_register_constraint "a" "A_REGS" + "Register file A (A0--A31).") + +(define_register_constraint "b" "B_REGS" + "Register file B (B0--B31).") + +(define_register_constraint "A" "PREDICATE_A_REGS" + "Predicate registers in register file A (A0--A2 on C64X and higher, + A1 and A2 otherwise).") + +(define_register_constraint "B" "PREDICATE_B_REGS" + "Predicate registers in register file B (B0--B2).") + +(define_register_constraint "C" "CALL_USED_B_REGS" + "A call-used register in register file B (B0--B9, B16--B31).") + +(define_register_constraint "Da" "NONPREDICATE_A_REGS" + "Register file A, excluding predicate registers (A3--A31, plus A0 if +not C64X or higher).") + +(define_register_constraint "Db" "NONPREDICATE_B_REGS" + "Register file B, excluding predicate registers (B3--B31).") + +(define_register_constraint "Z" "PICREG" + "Register B14 (aka DP).") + +(define_register_constraint "z" "SPREG" + "Register B15 (aka SP).") + +(define_constraint "Iu4" + "Integer constant in the range 0 @dots{} 15, aka ucst4." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 15"))) + +(define_constraint "Iu5" + "Integer constant in the range 0 @dots{} 31, aka ucst5." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 31"))) + +(define_constraint "In5" + "Integer constant in the range @minus{}31 @dots{} 0, negation of ucst5." + (and (match_code "const_int") + (match_test "ival >= -31 && ival <= 0"))) + +(define_constraint "Is5" + "Integer constant in the range @minus{}16 @dots{} 15, aka scst5." + (and (match_code "const_int") + (match_test "ival >= -16 && ival <= 15"))) + +(define_constraint "I5x" + "Integer constant that can be the operand of an ADDA or a SUBA insn." + (and (match_code "const_int") + (match_test "(ival >= -31 && ival <= 31) + || ((ival & 1) == 0 && ival >= -62 && ival <= 62) + || ((ival & 3) == 0 && ival >= -124 && ival <= 124) + || ((TARGET_INSNS_64 || TARGET_INSNS_67) + && (ival & 7) == 0 && ival > 0 && ival <= 248)"))) + +(define_constraint "Iux" + "Integer constant that can be the operand of a long ADDA or a SUBA insn, + i.e. one involving B14 or B15 as source operand." + (and (match_code "const_int") + (and (match_test "TARGET_INSNS_64PLUS") + (match_test "ival >= 0 + && (ival < 32768 + || ((ival & 1) == 0 && ival < 65536) + || ((ival & 3) == 0 && ival < 131072))")))) + +(define_constraint "IuB" + "Integer constant in the range 0 @dots{} 65535, aka ucst16." + (and (match_code "const_int") + (match_test "ival >= 0 && ival <= 65535"))) + +(define_constraint "IsB" + "Integer constant in the range @minus{}32768 @dots{} 32767." + (and (match_code "const_int") + (match_test "ival >= -32768 && ival <= 32767"))) + +(define_constraint "IsC" + "Integer constant in the range @math{-2^{20}} @dots{} @math{2^{20} - 1}." + (and (match_code "const_int") + (match_test "ival >= -0x100000 && ival <= 0xfffff"))) + +(define_constraint "JA" + "@internal + Integer constant in the range 0 @dots{} 31, corresponding to an A register + number." + (and (match_code "const_int") + (match_test "ival >= 0 && ival < 32"))) + +(define_constraint "JB" + "@internal + Integer constant in the range 32 @dots{} 63, corresponding to a B register + number." + (and (match_code "const_int") + (match_test "ival >= 32 && ival < 64"))) + +(define_constraint "Jc" + "Integer constant that is a valid mask for the clr instruction" + (and (match_code "const_int") + (match_test "c6x_valid_mask_p (ival)"))) + +(define_constraint "Js" + "Integer constant that is a valid mask for the set instruction" + (and (match_code "const_int") + (match_test "c6x_valid_mask_p (~ival)"))) + +(define_memory_constraint "Q" + "Memory location with A base register." + (and (match_code "mem") + (match_test "c6x_mem_operand (op, A_REGS, false)"))) + +(define_memory_constraint "R" + "Memory location with B base register." + (and (match_code "mem") + (match_test "c6x_mem_operand (op, B_REGS, false)"))) + +(define_memory_constraint "T" + "@internal + Memory location with B base register, but not using a long offset." + (and (match_code "mem") + (match_test "c6x_mem_operand (op, B_REGS, true)"))) + +(define_constraint "S0" + "@internal + On C64x+ targets, a GP-relative small data reference" + (and (match_test "TARGET_INSNS_64PLUS") + (match_operand 0 "sdata_symbolic_operand"))) + +(define_constraint "S1" + "@internal + Any kind of @code{SYMBOL_REF}, for use in a call address." + (and (match_code "symbol_ref") + (match_operand 0 "c6x_call_operand"))) + +(define_constraint "S2" + "@internal + Any SYMBOL_REF or LABEL_REF." + (ior (match_code "symbol_ref") (match_code "label_ref"))) + +(define_constraint "Si" + "@internal + Any immediate value, unless it matches the S0 constraint." + (and (match_operand 0 "immediate_operand") + (match_test "!satisfies_constraint_S0 (op)"))) + +(define_memory_constraint "W" + "@internal + A memory operand with an address that can't be used in an unaligned access." + (and (match_code "mem") + (match_test "!c6x_legitimate_address_p_1 (GET_MODE (op), XEXP (op, 0), + reload_completed, true)"))) diff --git a/gcc-4.9/gcc/config/c6x/elf-common.h b/gcc-4.9/gcc/config/c6x/elf-common.h new file mode 100644 index 000000000..8eef1b82e --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/elf-common.h @@ -0,0 +1,37 @@ +/* ELF definitions for TI C6X + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Andrew Jenner + Contributed by Bernd Schmidt + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Controlling the Compilation Driver. */ +#define ENDIAN_LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} " + +#undef ASM_SPEC +#define ASM_SPEC "%{march=*:-march=%*} %{mbig-endian:-mbig-endian} \ + %{mdsbt:-mdsbt %{!fPIC:-mpid=near} %{fPIC:-mpid=far -mpic} %{fpic:-mpic}} \ + %{!mdsbt:%{fpic:-mpic -mpid=near} %{fPIC:-mpic -mpid=far}}" + +#undef DATA_SECTION_ASM_OP +#define DATA_SECTION_ASM_OP "\t.section\t\".fardata\",\"aw\"" +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t\".const\",\"a\",@progbits" +#define BSS_SECTION_ASM_OP "\t.section\t\".far\",\"aw\",@nobits" +#define SDATA_SECTION_ASM_OP "\t.section\t\".neardata\",\"aw\"" +#define SBSS_SECTION_ASM_OP "\t.section\t\".bss\",\"aw\",@nobits" +#define TARGET_LIBGCC_SDATA_SECTION ".neardata" diff --git a/gcc-4.9/gcc/config/c6x/elf.h b/gcc-4.9/gcc/config/c6x/elf.h new file mode 100644 index 000000000..a4189f6ae --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/elf.h @@ -0,0 +1,35 @@ +/* ELF definitions for TI C6X + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Andrew Jenner + Contributed by Bernd Schmidt + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* crt0.o should come from the linker script, but for compatibility, + we mention it here for -msim. */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "%{msim:crt0%O%s} crti%O%s crtbegin%O%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend%O%s crtn%O%s" + +#undef LIB_SPEC +#define LIB_SPEC "%{msim:--start-group -lc -lsim --end-group;" \ + ":-lc}" + +#undef LINK_SPEC +#define LINK_SPEC ENDIAN_LINK_SPEC diff --git a/gcc-4.9/gcc/config/c6x/genmult.sh b/gcc-4.9/gcc/config/c6x/genmult.sh new file mode 100644 index 000000000..dd8a086f4 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/genmult.sh @@ -0,0 +1,33 @@ +#! /bin/sh +# Generate c6x-mult.md from c6x-mult.md.in +# The input file is passed as an argument. + +# Copyright (C) 2011-2014 Free Software Foundation, Inc. + +#This file is part of GCC. + +#GCC is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3, or (at your option) +#any later version. + +#GCC is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. + +#You should have received a copy of the GNU General Public License +#along with GCC; see the file COPYING3. If not see +#. + +echo ";; -*- buffer-read-only: t -*-" +echo ";; Generated automatically from c6x-mult.md.in by genmult.sh" + +sed -e "s,_VARIANT_,,g" -e "s,_SET_,set,g" -e "s,_.BRK_,,g" \ + -e "s,_A_,a,g" -e "s,_B_,b,g" -e "s,_DESTOPERAND_,register_operand,g" \ + -e "s,_MOD._,,g" -e "s,:_M,:,g" < $1 + +sed -e "s,_VARIANT_,_real,g" -e "s,_SET_,unspec,g" -e "s,_OBRK_,[,g" \ + -e "s,_CBRK_,] UNSPEC_REAL_MULT,g" -e "s,_A_,JA,g" -e "s,_B_,JB,g" \ + -e "s,_DESTOPERAND_,const_int_operand,g" -e "s,_MODk_,k,g" \ + -e "s,_MODK_,K,g" -e 's,:_MV..[IQ],:SI,g' -e "s,:_MSQ,:SI,g" < $1 diff --git a/gcc-4.9/gcc/config/c6x/genopt.sh b/gcc-4.9/gcc/config/c6x/genopt.sh new file mode 100644 index 000000000..406823a7b --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/genopt.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# Generate c6x-tables.opt from the lists in *.def. +# Copyright (C) 2011-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +cat <. + +Enum +Name(c6x_isa) Type(int) +Known C6X ISAs (for use with the -march= option): + +EOF + +awk -F'[(, ]+' 'BEGIN { + value = 0 +} +/^C6X_ISA/ { + name = $2 + gsub("\"", "", name) + print "EnumValue" + print "Enum(c6x_isa) String(" name ") Value(" value ")" + print "" + value++ +}' $1/c6x-isas.def diff --git a/gcc-4.9/gcc/config/c6x/gensched.sh b/gcc-4.9/gcc/config/c6x/gensched.sh new file mode 100644 index 000000000..4d389cc03 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/gensched.sh @@ -0,0 +1,44 @@ +#! /bin/sh +# Generate c6x-sched.md from c6x-sched.md.in +# The input file is passed as an argument. + +# Copyright (C) 2010-2014 Free Software Foundation, Inc. + +#This file is part of GCC. + +#GCC is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3, or (at your option) +#any later version. + +#GCC is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. + +#You should have received a copy of the GNU General Public License +#along with GCC; see the file COPYING3. If not see +#. + +echo ";; -*- buffer-read-only: t -*-" +echo ";; Generated automatically from c6x-sched.md.in by gensched.sh" + +for cross in n y; do + for side in 1 2; do + tside=$side + case $side in + 1) rf="a"; otherside=2 ;; + 2) rf="b"; otherside=1 ;; + esac + case $cross in + y) cunit="+x$side"; tside=$otherside;; + n) cunit="";; + esac + echo + echo ";; Definitions for side $side, cross $cross" + echo + sed -e "s,_CROSS_,$cross,g" -e "s,_CUNIT_,$cunit,g" \ + -e "s,_N_,$side,g" -e "s,_RF_,$rf,g" -e "s,_NX_,$tside,g" \ + < $1 + done +done diff --git a/gcc-4.9/gcc/config/c6x/predicates.md b/gcc-4.9/gcc/config/c6x/predicates.md new file mode 100644 index 000000000..464d27689 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/predicates.md @@ -0,0 +1,226 @@ +/* Predicates for TI C6X + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Andrew Jenner + Contributed by Bernd Schmidt + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +(define_predicate "reg_or_const_int_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_int_operand"))) + +(define_predicate "const_vector_operand" + (match_code "const_vector")) + +(define_predicate "scst5_operand" + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Is5 (op)"))) + +(define_predicate "reg_or_ucst4_operand" + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu4 (op)")))) + +(define_predicate "reg_or_scst5_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "scst5_operand"))) + +(define_predicate "reg_or_ucst5_operand" + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Iu5 (op)")))) + +(define_predicate "addsi_operand" + (ior (match_operand 0 "register_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_IsB (op)")))) + +(define_predicate "andsi_operand" + (ior (match_operand 0 "reg_or_scst5_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Jc (op)")))) + +(define_predicate "iorsi_operand" + (ior (match_operand 0 "reg_or_scst5_operand") + (and (match_operand 0 "const_int_operand") + (match_test "satisfies_constraint_Js (op)")))) + +(define_predicate "insv_operand" + (and (match_operand 0 "const_int_operand") + (match_test "INTVAL (op) == 0 || INTVAL (op) == -1"))) + +(define_predicate "c6x_jump_operand" + (match_code "label_ref,symbol_ref,reg")) + +(define_predicate "c6x_call_operand" + (ior (match_code "symbol_ref,reg") + (and (match_code "subreg") + (match_test "GET_CODE (XEXP (op, 0)) == REG"))) +{ + /* The linker transforms jumps to undefined weak symbols in a way that + is incompatible with our code generation. */ + return (GET_CODE (op) != SYMBOL_REF + || (!SYMBOL_REF_WEAK (op) + && !c6x_long_call_p (op))); +}) + +;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref, +;; possibly with an offset. +(define_predicate "symbolic_operand" + (ior (match_code "symbol_ref,label_ref") + (and (match_code "const") + (match_test "GET_CODE (XEXP (op,0)) == PLUS + && (GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (op, 0), 0)) == LABEL_REF) + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT")))) + +(define_predicate "const_int_or_symbolic_operand" + (ior (match_operand 0 "symbolic_operand") + (match_operand 0 "const_int_operand"))) + +;; Return nonzero iff OP is one of the integer constants 2, 4 or 8. +(define_predicate "adda_scale_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 + || ((TARGET_INSNS_64 || TARGET_INSNS_67) + && INTVAL (op) == 8)"))) + +;; Return nonzero iff OP is one of the integer constants 2 or 4. +(define_predicate "suba_scale_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 4"))) + +;; True if this operator is valid for predication. +(define_predicate "predicate_operator" + (match_code "eq,ne")) + +(define_predicate "c6x_comparison_operator" + (match_code "eq,ltu,gtu,lt,gt")) + +(define_predicate "non_c6x_comparison_operator" + (match_code "ne,leu,geu,le,ge")) + +;; FP Comparisons handled by c6x_expand_compare. +(define_predicate "c6x_fp_comparison_operator" + (ior (match_code "eq,lt,gt,le,ge") + (and (match_test "TARGET_FP") + (match_code "ltgt,uneq,unlt,ungt,unle,unge,ordered,unordered")))) + +(define_predicate "c6x_any_comparison_operand" + (match_code "eq,lt,gt,le,ge,ltu,gtu") +{ + rtx op0 = XEXP (op, 0); + rtx op1 = XEXP (op, 1); + if (ltugtu_operator (op, SImode) + && register_operand (op0, SImode) + && ((TARGET_INSNS_64 && reg_or_ucst5_operand (op1, SImode)) + || (!TARGET_INSNS_64 && reg_or_ucst4_operand (op1, SImode)))) + return true; + if (eqltgt_operator (op, SImode) + && register_operand (op0, SImode) + && reg_or_scst5_operand (op1, SImode)) + return true; + if (!TARGET_FP) + return false; + if (!eqltgt_operator (op, SFmode) && !eqltgt_operator (op, DFmode)) + return false; + if (register_operand (op0, GET_MODE (op)) + && register_operand (op1, GET_MODE (op))) + return true; + return false; +}) + +(define_predicate "ltugtu_operator" + (match_code "ltu,gtu")) + +(define_predicate "eqltgt_operator" + (match_code "eq,lt,gt")) + +(define_predicate "eqne_operator" + (match_code "eq,ne")) + +(define_predicate "predicate_register" + (and (match_code "reg") + (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == PREDICATE_A_REGS") + (match_test "REGNO_REG_CLASS (REGNO (op)) == PREDICATE_B_REGS")))) + +;; Allow const_ints for things like the real_mult patterns. +(define_predicate "a_register" + (ior (and (match_code "reg") + (match_test "A_REGNO_P (REGNO (op))")) + (and (match_code "const_int") + (match_test "A_REGNO_P (INTVAL (op))")))) + +(define_predicate "b_register" + (ior (and (match_code "reg") + (match_test "B_REGNO_P (REGNO (op))")) + (and (match_code "const_int") + (match_test "B_REGNO_P (INTVAL (op))")))) + +(define_predicate "pic_register_operand" + (and (match_code "reg") + (match_test "op == pic_offset_table_rtx"))) + +;; True if OP refers to a symbol in the sdata section. +(define_predicate "sdata_symbolic_operand" + (match_code "symbol_ref,const") +{ + HOST_WIDE_INT offset = 0, size = 0; + tree t; + + switch (GET_CODE (op)) + { + case CONST: + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS + || GET_CODE (XEXP (op, 0)) != SYMBOL_REF + || GET_CODE (XEXP (op, 1)) != CONST_INT) + return false; + offset = INTVAL (XEXP (op, 1)); + op = XEXP (op, 0); + /* FALLTHRU */ + + case SYMBOL_REF: + /* For shared libraries, only allow symbols we know are local. + For executables, the linker knows to create copy relocs if + necessary so we can use DP-relative addressing for all small + objects. */ + if ((c6x_initial_flag_pic && !SYMBOL_REF_LOCAL_P (op)) + || !SYMBOL_REF_SMALL_P (op)) + return false; + + /* Note that in addition to DECLs, we can get various forms + of constants here. */ + t = SYMBOL_REF_DECL (op); + if (DECL_P (t)) + t = DECL_SIZE_UNIT (t); + else + t = TYPE_SIZE_UNIT (TREE_TYPE (t)); + if (t && tree_fits_shwi_p (t)) + { + size = tree_to_shwi (t); + if (size < 0) + size = 0; + } + + /* Don't allow addressing outside the object. */ + return (offset >= 0 && offset <= size); + + default: + gcc_unreachable (); + } +}) diff --git a/gcc-4.9/gcc/config/c6x/sync.md b/gcc-4.9/gcc/config/c6x/sync.md new file mode 100644 index 000000000..fff6c4394 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/sync.md @@ -0,0 +1,270 @@ +;; GCC machine description for C6X synchronization instructions. +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; C64X+ has atomic instructions, but they are not atomic on all +;; devices and have other problems. We use normal loads and stores, +;; and place them in overlapping branch shadows to ensure interrupts +;; are disabled during the sequence, which guarantees atomicity on all +;; single-core systems. + +(define_code_iterator FETCHOP [plus minus ior xor and]) +(define_code_attr fetchop_name + [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")]) +(define_code_attr fetchop_pred + [(plus "reg_or_scst5_operand") (minus "register_operand") + (ior "reg_or_scst5_operand") (xor "reg_or_scst5_operand") + (and "reg_or_scst5_operand")]) +(define_code_attr fetchop_constr + [(plus "bIs5") (minus "b") (ior "bIs5") (xor "bIs5") (and "bIs5")]) +(define_code_attr fetchop_opcode + [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")]) +(define_code_attr fetchop_inops02 + [(plus "%2, %0") (minus "%0, %2") (ior "%2, %0") (xor "%2, %0") + (and "%2, %0")]) +(define_code_attr fetchop_inops21 + [(plus "%1, %2") (minus "%2, %1") (ior "%1, %2") (xor "%1, %2") + (and "%1, %2")]) + +(define_expand "sync_compare_and_swapsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec_volatile:SI + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")] + UNSPECV_CAS)) + (clobber (match_scratch:SI 4 ""))])] + "" +{ +}) + +(define_expand "sync_si" + [(parallel + [(set (match_operand:SI 0 "memory_operand" "") + (unspec:SI + [(FETCHOP:SI (match_dup 0) + (match_operand:SI 1 "" ""))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 2 ""))])] + "" +{ +}) + +(define_expand "sync_old_si" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) + (match_operand:SI 2 "" ""))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 ""))])] + "" +{ +}) + +(define_expand "sync_new_si" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (FETCHOP:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "" ""))) + (set (match_dup 1) + (unspec:SI [(FETCHOP:SI (match_dup 1) (match_dup 2))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 ""))])] + "" +{ +}) + +(define_expand "sync_nandsi" + [(parallel + [(set (match_operand:SI 0 "memory_operand" "") + (unspec:SI + [(not:SI (and:SI (match_dup 0) + (match_operand:SI 1 "reg_or_scst5_operand" "")))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 2 ""))])] + "" +{ +}) + +(define_expand "sync_old_nandsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (set (match_dup 1) + (unspec:SI + [(not:SI (and:SI (match_dup 1) + (match_operand:SI 2 "reg_or_scst5_operand" "")))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 ""))])] + "" +{ +}) + +(define_expand "sync_new_nandsi" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (not:SI (and:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "reg_or_scst5_operand" "")))) + (set (match_dup 1) + (unspec:SI [(not:SI (and:SI (match_dup 1) (match_dup 2)))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 ""))])] + "" +{ +}) + +(define_insn "*sync_compare_and_swapsi" + [(set (match_operand:SI 0 "register_operand" "=&b") + (match_operand:SI 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec_volatile:SI + [(match_operand:SI 2 "register_operand" "B") + (match_operand:SI 3 "register_operand" "b")] + UNSPECV_CAS)) + (clobber (match_scratch:SI 4 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ + || ldw .d%U1t%U0 %1, %0\n\\ + nop 4\n\\ +|| b .s2 2f ; 1\n\\ + cmpeq .l2 %0, %2, %2 ; 5\n\\ +1: [%2] stw .d%U1t%U3 %3, %1 ; 6\n\\ +2:" + [(set_attr "type" "atomic")]) + +(define_insn "sync_si_insn" + [(set (match_operand:SI 0 "memory_operand" "+m") + (unspec:SI + [(FETCHOP:SI (match_dup 0) + (match_operand:SI 1 "" ""))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 2 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ +|| ldw .d%U0t%U2 %0, %2\n\\ + nop 4\n\\ +|| b .s2 2f ; 1\n\\ + .l2 , %2 ; 5\n\\ +1: stw .d%U0t%U2 %2, %0 ; 6\n\\ +2:" + [(set_attr "type" "atomic")]) + +(define_insn "sync_old_si_insn" + [(set (match_operand:SI 0 "register_operand" "=&b") + (match_operand:SI 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) + (match_operand:SI 2 "" ""))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ +|| ldw .d%U1t%U0 %1, %0\n\\ + nop 4\n\\ +|| b .s2 2f ; 1\n\\ + .l2 , %3 ; 5\n\\ +1: stw .d%U1t%U3 %3, %1 ; 6\n\\ +2:" + [(set_attr "type" "atomic")]) + +(define_insn "sync_new_si_insn" + [(set (match_operand:SI 0 "register_operand" "=&b") + (FETCHOP:SI (match_operand:SI 1 "memory_operand" "+m") + (match_operand:SI 2 "" ""))) + (set (match_dup 1) + (unspec:SI + [(FETCHOP:SI (match_dup 1) + (match_dup 2))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ +|| ldw .d%U1t%U0 %1, %0\n\\ + nop 4\n\\ +|| b .s2 2f ; 1\n\\ + .l2 , %0 ; 5\n\\ +1: stw .d%U1t%U0 %0, %1 ; 6\n\\ +2:" + [(set_attr "type" "atomic")]) + +(define_insn "sync_nandsi_insn" + [(set (match_operand:SI 0 "memory_operand" "+m") + (unspec:SI + [(not:SI (and:SI (match_dup 0) + (match_operand:SI 1 "reg_or_scst5_operand" "bIs5")))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 2 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ +|| ldw .d%U0t%U2 %0, %2\n\\ + nop 1\n\\ + nop 3\n\\ +|| b .s2 2f ; 2\n\\ + and .l2 %1, %2, %2 ; 5\n\\ +1: not .l2 %2, %2 ; 6\n\\ + stw .d%U0t%U2 %2, %0 ; 7\n\\ +2:" + [(set_attr "type" "atomic")]) + +(define_insn "sync_old_nandsi_insn" + [(set (match_operand:SI 0 "register_operand" "=&b") + (match_operand:SI 1 "memory_operand" "+m")) + (set (match_dup 1) + (unspec:SI + [(not:SI (and:SI (match_dup 1) + (match_operand:SI 2 "reg_or_scst5_operand" "bIs5")))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ +|| ldw .d%U1t%U0 %1, %0\n\\ + nop 1\n\\ + nop 3\n\\ +|| b .s2 2f ; 2\n\\ + and .l2 %2, %0, %3 ; 5\n\\ +1: not .l2 %3, %3 ; 6\n\\ + stw .d%U1t%U3 %3, %1 ; 7\n\\ +2:" + [(set_attr "type" "atomic")]) + +(define_insn "sync_new_nandsi_insn" + [(set (match_operand:SI 0 "register_operand" "=&b") + (not:SI (and:SI (match_operand:SI 1 "memory_operand" "+m") + (match_operand:SI 2 "reg_or_scst5_operand" "bIs5")))) + (set (match_dup 1) + (unspec:SI + [(not:SI (and:SI (match_dup 1) (match_dup 2)))] + UNSPEC_ATOMIC)) + (clobber (match_scratch:SI 3 "=&B"))] + "" + "0: b .s2 1f ; 0\n\\ +|| ldw .d%U1t%U0 %1, %0\n\\ + nop 1\n\\ + nop 3\n\\ +|| b .s2 2f ; 2\n\\ + and .l2 %2, %0, %0 ; 5\n\\ +1: not .l2 %0, %0 ; 6\n\\ + stw .d%U1t%U0 %0, %1 ; 7\n\\ +2:" + [(set_attr "type" "atomic")]) diff --git a/gcc-4.9/gcc/config/c6x/t-c6x b/gcc-4.9/gcc/config/c6x/t-c6x new file mode 100644 index 000000000..4cde36ce8 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/t-c6x @@ -0,0 +1,42 @@ +# Target Makefile Fragment for TI C6X. +# Copyright (C) 2010-2014 Free Software Foundation, Inc. +# Contributed by CodeSourcery. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MD_INCLUDES= $(srcdir)/config/c6x/constraints.md \ + $(srcdir)/config/c6x/predicates.md \ + $(srcdir)/config/c6x/c6x-mult.md \ + $(srcdir)/config/c6x/c6x-sched.md + +s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \ + s-opinit s-extract s-peep s-attr s-attrtab s-output: $(MD_INCLUDES) + +$(srcdir)/config/c6x/c6x-sched.md: $(srcdir)/config/c6x/gensched.sh \ + $(srcdir)/config/c6x/c6x-sched.md.in + $(SHELL) $(srcdir)/config/c6x/gensched.sh \ + $(srcdir)/config/c6x/c6x-sched.md.in > $@ + +$(srcdir)/config/c6x/c6x-mult.md: $(srcdir)/config/c6x/genmult.sh \ + $(srcdir)/config/c6x/c6x-mult.md.in + $(SHELL) $(srcdir)/config/c6x/genmult.sh \ + $(srcdir)/config/c6x/c6x-mult.md.in > $@ + +$(srcdir)/config/c6x/c6x-tables.opt: $(srcdir)/config/c6x/genopt.sh \ + $(srcdir)/config/c6x/c6x-isas.def + $(SHELL) $(srcdir)/config/c6x/genopt.sh $(srcdir)/config/c6x > \ + $(srcdir)/config/c6x/c6x-tables.opt diff --git a/gcc-4.9/gcc/config/c6x/t-c6x-elf b/gcc-4.9/gcc/config/c6x/t-c6x-elf new file mode 100644 index 000000000..8d7276be4 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/t-c6x-elf @@ -0,0 +1,30 @@ +# Target Makefile Fragment for TI C6X using ELF. +# Copyright (C) 2010-2014 Free Software Foundation, Inc. +# Contributed by CodeSourcery. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +EXTRA_HEADERS += $(srcdir)/ginclude/unwind-arm-common.h + +# Use this variant for fully testing all CPU types +#MULTILIB_OPTIONS = mbig-endian march=c674x/march=c64x/march=c67x/march=c67x+/march=c62x +#MULTILIB_DIRNAMES = be c674x c64x c67x c67x+ c62x + +MULTILIB_OPTIONS = mbig-endian march=c674x +MULTILIB_DIRNAMES = be c674x +MULTILIB_EXCEPTIONS = +MULTILIB_MATCHES = diff --git a/gcc-4.9/gcc/config/c6x/t-c6x-uclinux b/gcc-4.9/gcc/config/c6x/t-c6x-uclinux new file mode 100644 index 000000000..e4b93908f --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/t-c6x-uclinux @@ -0,0 +1,3 @@ +MULTILIB_OSDIRNAMES = march.c674x=!c674x +MULTILIB_OSDIRNAMES += mbig-endian=!be +MULTILIB_OSDIRNAMES += mbig-endian/march.c674x=!be/c674x diff --git a/gcc-4.9/gcc/config/c6x/uclinux-elf.h b/gcc-4.9/gcc/config/c6x/uclinux-elf.h new file mode 100644 index 000000000..3f3964ba7 --- /dev/null +++ b/gcc-4.9/gcc/config/c6x/uclinux-elf.h @@ -0,0 +1,63 @@ +/* Definitions for TI C6X running ucLinux using ELF + Copyright (C) 2010-2014 Free Software Foundation, Inc. + Contributed by Andrew Jenner + Contributed by Bernd Schmidt + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__uClinux__"); \ + builtin_define_std ("linux"); \ + builtin_define_std ("unix"); \ + builtin_assert ("system=linux"); \ + builtin_assert ("system=unix"); \ + builtin_assert ("system=posix"); \ + } \ + while (false) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{!shared:crt1%O%s} crti%O%s %{shared|pie:crtbeginS.o%s;:crtbegin.o%s}" + +#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" + +#undef LINK_SPEC +#define LINK_SPEC ENDIAN_LINK_SPEC \ + "%{shared} %{fpie|fPIE:-pie} \ + %{!shared: %{!static: \ + %{rdynamic:-export-dynamic} \ + %{!dynamic-linker:-dynamic-linker " UCLIBC_DYNAMIC_LINKER "}} \ + %{static}}" + +#undef DRIVER_SELF_SPECS +#define DRIVER_SELF_SPECS "%{!mno-dsbt:-mdsbt}" + +/* Clear the instruction cache from `beg' to `end'. This makes an + inline system call to SYS_cacheflush. */ +#undef CLEAR_INSN_CACHE +#define CLEAR_INSN_CACHE(BEG, END) \ +{ \ + register unsigned long _beg __asm ("A4") = (unsigned long) (BEG); \ + register unsigned long _end __asm ("B4") = (unsigned long) (END); \ + register unsigned long _scno __asm ("B0") = 244; \ + __asm __volatile ("swe ; sys_cache_sync" \ + : "=a" (_beg) \ + : "0" (_beg), "b" (_end), "b" (_scno)); \ +} diff --git a/gcc-4.9/gcc/config/cr16/constraints.md b/gcc-4.9/gcc/config/cr16/constraints.md new file mode 100644 index 000000000..9d9789bce --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/constraints.md @@ -0,0 +1,81 @@ +;; Predicates of machine description for CR16. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; Contributed by KPIT Cummins Infosystems Limited. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Constraints +;; Register constraints +(define_register_constraint "b" "NOSP_REGS" + "@no sp registers") + +(define_register_constraint "c" "SHORT_REGS" + "@short registers") + +(define_register_constraint "d" "LONG_REGS" + "@long registers") + +;; Integer constraints. +(define_constraint "I" + "A signed 4-bit immediate." + (and (match_code "const_int") + (match_test "SIGNED_INT_FITS_N_BITS (ival, 4)"))) + +(define_constraint "J" + "A signed 5-bit immediate." + (and (match_code "const_int") + (match_test "SIGNED_INT_FITS_N_BITS (ival, 5)"))) + +(define_constraint "K" + "A signed 6-bit immediate." + (and (match_code "const_int") + (match_test "SIGNED_INT_FITS_N_BITS (ival, 6)"))) + +(define_constraint "L" + "A unsigned 4-bit immediate." + (and (match_code "const_int") + (match_test "UNSIGNED_INT_FITS_N_BITS (ival, 4)"))) + +(define_constraint "M" + "A unsigned and customized 4-bit immediate." + (and (match_code "const_int") + (match_test "(IN_RANGE_P (ival, 0, 15) && ((ival != 9) && (ival != 11)))"))) + +(define_constraint "N" + "A signed 16-bit immediate." + (and (match_code "const_int") + (match_test "IN_RANGE_P (ival, -32768, 32767)"))) + +(define_constraint "O" + "A unsigned 20-bit immediate." + (and (match_code "const_int") + (match_test "IN_RANGE_P (ival, 0, 1048575)"))) + +(define_constraint "Q" + "A shift QI immediate." + (and (match_code "const_int") + (match_test "IN_RANGE_P (ival, 0, 7)"))) + +(define_constraint "R" + "A shift HI immediate." + (and (match_code "const_int") + (match_test "IN_RANGE_P (ival, 0, 15)"))) + +(define_constraint "S" + "A shift SI immediate." + (and (match_code "const_int") + (match_test "IN_RANGE_P (ival, 0, 31)"))) diff --git a/gcc-4.9/gcc/config/cr16/cr16-protos.h b/gcc-4.9/gcc/config/cr16/cr16-protos.h new file mode 100644 index 000000000..80ea43260 --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/cr16-protos.h @@ -0,0 +1,99 @@ +/* Prototypes for exported functions defined in cr16.c + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by KPIT Cummins Infosystems Limited. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_CR16_PROTOS_H +#define GCC_CR16_PROTOS_H + +/* Register usage. */ +extern enum reg_class cr16_regno_reg_class (int); +extern int cr16_hard_regno_mode_ok (int regno, enum machine_mode); + +/* Passing function arguments. */ +extern int cr16_function_arg_regno_p (int); + +#ifdef TREE_CODE +#ifdef RTX_CODE + +extern void cr16_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx); + +#endif /* RTX_CODE. */ +#endif /* TREE_CODE. */ + +/* Enumeration giving the various data models we support. */ +enum data_model_type +{ + DM_DEFAULT, /* Default data model (in CR16C/C+ - up to 16M). */ + DM_NEAR, /* Near data model (in CR16C/C+ - up to 1M). */ + DM_FAR, /* Far data model (in CR16C+ - up to 4G) + (in CR16C - up to 16M). */ + ILLEGAL_DM /* Illegal data model. */ +}; + +#ifdef RTX_CODE + +/* Addressing Modes. */ +struct cr16_address +{ + rtx base; /* Base register: Any register or register pair. */ + rtx index; /* Index register: If one is present. */ + rtx disp; /* Displacement or Absolute address. */ + enum data_model_type data; /* data ref type. */ + int code; /* Whether the address is code address. + 0 - data, 1 - code label, 2 - function label. */ +}; + +enum cr16_addrtype +{ + CR16_INVALID, + CR16_REG_REL, + CR16_REGP_REL, + CR16_INDEX_REGP_REL, + CR16_ABSOLUTE +}; + +extern void notice_update_cc (rtx); +extern int cr16_operand_bit_pos (int val, int bitval); +extern void cr16_decompose_const (rtx x, int *code, + enum data_model_type *data, + bool treat_as_const); +extern enum cr16_addrtype cr16_decompose_address (rtx addr, + struct cr16_address *out, + bool debug_print, + bool treat_as_const); +extern int cr16_const_double_ok (rtx op); +extern int legitimate_pic_operand_p (rtx); +extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx); + + +/* Prologue/Epilogue functions. */ +extern int cr16_initial_elimination_offset (int, int); +extern char *cr16_prepare_push_pop_string (int); +extern void cr16_expand_prologue (void); +extern void cr16_expand_epilogue (void); +extern const char *cr16_emit_add_sub_di (rtx *, enum rtx_code); +extern const char *cr16_emit_logical_di (rtx *, enum rtx_code); + +#endif /* RTX_CODE. */ + +/* Handling the "interrupt" attribute. */ +extern int cr16_interrupt_function_p (void); +extern bool cr16_is_data_model (enum data_model_type); + +#endif /* Not GCC_CR16_PROTOS_H. */ diff --git a/gcc-4.9/gcc/config/cr16/cr16.c b/gcc-4.9/gcc/config/cr16/cr16.c new file mode 100644 index 000000000..f5a444bec --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/cr16.c @@ -0,0 +1,2194 @@ +/* Output routines for CR16 processor. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by KPIT Cummins Infosystems Limited. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "stor-layout.h" +#include "calls.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-codes.h" +#include "insn-attr.h" +#include "flags.h" +#include "except.h" +#include "function.h" +#include "recog.h" +#include "expr.h" +#include "optabs.h" +#include "diagnostic-core.h" +#include "basic-block.h" +#include "target.h" +#include "target-def.h" +#include "df.h" + +/* Definitions. */ + +/* Maximum number of register used for passing parameters. */ +#define MAX_REG_FOR_PASSING_ARGS 6 + +/* Minimum number register used for passing parameters. */ +#define MIN_REG_FOR_PASSING_ARGS 2 + +/* The maximum count of words supported in the assembly of the architecture in + a push/pop instruction. */ +#define MAX_COUNT 8 + +/* Predicate is true if the current function is a 'noreturn' function, + i.e. it is qualified as volatile. */ +#define FUNC_IS_NORETURN_P(decl) (TREE_THIS_VOLATILE (decl)) + +/* Predicate that holds when we need to save registers even for 'noreturn' + functions, to accommodate for unwinding. */ +#define MUST_SAVE_REGS_P() \ + (flag_unwind_tables || (flag_exceptions && !UI_SJLJ)) + +/* Nonzero if the rtx X is a signed const int of n bits. */ +#define RTX_SIGNED_INT_FITS_N_BITS(X, n) \ + ((GET_CODE (X) == CONST_INT \ + && SIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0) + +/* Nonzero if the rtx X is an unsigned const int of n bits. */ +#define RTX_UNSIGNED_INT_FITS_N_BITS(X, n) \ + ((GET_CODE (X) == CONST_INT \ + && UNSIGNED_INT_FITS_N_BITS (INTVAL (X), n)) ? 1 : 0) + +/* Structure for stack computations. */ + +/* variable definitions in the struture + args_size Number of bytes saved on the stack for local + variables + + reg_size Number of bytes saved on the stack for + non-scratch registers + + total_size The sum of 2 sizes: locals vars and padding byte + for saving the registers. Used in expand_prologue() + and expand_epilogue() + + last_reg_to_save Will hold the number of the last register the + prologue saves, -1 if no register is saved + + save_regs[16] Each object in the array is a register number. + Mark 1 for registers that need to be saved + + num_regs Number of registers saved + + initialized Non-zero if frame size already calculated, not + used yet + + function_makes_calls Does the function make calls ? not used yet. */ + +struct cr16_frame_info +{ + unsigned long var_size; + unsigned long args_size; + unsigned int reg_size; + unsigned long total_size; + long last_reg_to_save; + long save_regs[FIRST_PSEUDO_REGISTER]; + int num_regs; + int initialized; + int function_makes_calls; +}; + +/* Current frame information calculated by cr16_compute_frame_size. */ +static struct cr16_frame_info current_frame_info; + +/* Static Variables. */ + +/* Data model that was supplied by user via command line option + This will be overridden in case of invalid combination + of core and data model options are supplied. */ +static enum data_model_type data_model = DM_DEFAULT; + +/* TARGETM Function Prototypes and forward declarations */ +static void cr16_print_operand (FILE *, rtx, int); +static void cr16_print_operand_address (FILE *, rtx); + +/* Stack layout and calling conventions. */ +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX cr16_struct_value_rtx +#undef TARGET_RETURN_IN_MEMORY +#define TARGET_RETURN_IN_MEMORY cr16_return_in_memory + +/* Target-specific uses of '__attribute__'. */ +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE cr16_attribute_table +#undef TARGET_NARROW_VOLATILE_BITFIELD +#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false + +/* EH related. */ +#undef TARGET_UNWIND_WORD_MODE +#define TARGET_UNWIND_WORD_MODE cr16_unwind_word_mode + +/* Override Options. */ +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE cr16_override_options + +/* Conditional register usuage. */ +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE cr16_conditional_register_usage + +/* Controlling register spills. */ +#undef TARGET_CLASS_LIKELY_SPILLED_P +#define TARGET_CLASS_LIKELY_SPILLED_P cr16_class_likely_spilled_p + +/* Passing function arguments. */ +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG cr16_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE cr16_function_arg_advance +#undef TARGET_RETURN_POPS_ARGS +#define TARGET_RETURN_POPS_ARGS cr16_return_pops_args + +/* Initialize the GCC target structure. */ +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED cr16_frame_pointer_required +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE cr16_can_eliminate +#undef TARGET_LEGITIMIZE_ADDRESS +#define TARGET_LEGITIMIZE_ADDRESS cr16_legitimize_address +#undef TARGET_LEGITIMATE_CONSTANT_P +#define TARGET_LEGITIMATE_CONSTANT_P cr16_legitimate_constant_p +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P cr16_legitimate_address_p + +/* Returning function value. */ +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE cr16_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE cr16_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P cr16_function_value_regno_p + +/* printing the values. */ +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND cr16_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS cr16_print_operand_address + +/* Relative costs of operations. */ +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST cr16_address_cost +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST cr16_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST cr16_memory_move_cost + +/* Table of machine attributes. */ +static const struct attribute_spec cr16_attribute_table[] = { + /* ISRs have special prologue and epilogue requirements. */ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity }. */ + {"interrupt", 0, 0, false, true, true, NULL, false}, + {NULL, 0, 0, false, false, false, NULL, false} +}; + +/* TARGET_ASM_UNALIGNED_xx_OP generates .?byte directive + .?byte directive along with @c is not understood by assembler. + Therefore, make all TARGET_ASM_UNALIGNED_xx_OP same + as TARGET_ASM_ALIGNED_xx_OP. */ +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP + +/* Target hook implementations. */ + +/* Implements hook TARGET_RETURN_IN_MEMORY. */ +static bool +cr16_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + const HOST_WIDE_INT size = int_size_in_bytes (type); + return ((size == -1) || (size > 8)); +} + +/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ +static bool +cr16_class_likely_spilled_p (reg_class_t rclass) +{ + if ((rclass) == SHORT_REGS || (rclass) == DOUBLE_BASE_REGS + || (rclass) == LONG_REGS || (rclass) == GENERAL_REGS) + return true; + + return false; +} + +static int +cr16_return_pops_args (tree fundecl ATTRIBUTE_UNUSED, + tree funtype ATTRIBUTE_UNUSED, + int size ATTRIBUTE_UNUSED) +{ + return 0; +} + +/* Returns true if data model selected via command line option + is same as function argument. */ +bool +cr16_is_data_model (enum data_model_type model) +{ + return (model == data_model); +} + +/* Parse relevant options and override. */ +static void +cr16_override_options (void) +{ + /* Disable -fdelete-null-pointer-checks option for CR16 target. + Programs which rely on NULL pointer dereferences _not_ halting the + program may not work properly with this option. So disable this + option. */ + flag_delete_null_pointer_checks = 0; + + /* FIXME: To avoid spill_failure ICE during exception handling, + * disable cse_fllow_jumps. The spill error occurs when compiler + * can't find a suitable candidate in GENERAL_REGS class to reload + * a 32bit register. + * Need to find a better way of avoiding this situation. */ + if (flag_exceptions) + flag_cse_follow_jumps = 0; + + /* If -fpic option, data_model == DM_FAR. */ + if (flag_pic == NEAR_PIC) + { + data_model = DM_FAR; + } + + /* The only option we want to examine is data model option. */ + if (cr16_data_model) + { + if (strcmp (cr16_data_model, "medium") == 0) + data_model = DM_DEFAULT; + else if (strcmp (cr16_data_model, "near") == 0) + data_model = DM_NEAR; + else if (strcmp (cr16_data_model, "far") == 0) + { + if (TARGET_CR16CP) + data_model = DM_FAR; + else + error ("data-model=far not valid for cr16c architecture"); + } + else + error ("invalid data model option -mdata-model=%s", cr16_data_model); + } + else + data_model = DM_DEFAULT; +} + +/* Implements the macro TARGET_CONDITIONAL_REGISTER_USAGE. */ +static void +cr16_conditional_register_usage (void) +{ + if (flag_pic) + { + fixed_regs[12] = call_used_regs[12] = 1; + } +} + +/* Stack layout and calling conventions routines. */ + +/* Return nonzero if the current function being compiled is an interrupt + function as specified by the "interrupt" attribute. */ +int +cr16_interrupt_function_p (void) +{ + tree attributes; + + attributes = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)); + return (lookup_attribute ("interrupt", attributes) != NULL_TREE); +} + +/* Compute values for the array current_frame_info.save_regs and the variable + current_frame_info.reg_size. The index of current_frame_info.save_regs + is numbers of register, each will get 1 if we need to save it in the + current function, 0 if not. current_frame_info.reg_size is the total sum + of the registers being saved. */ +static void +cr16_compute_save_regs (void) +{ + unsigned int regno; + + /* Initialize here so in case the function is no-return it will be -1. */ + current_frame_info.last_reg_to_save = -1; + + /* Initialize the number of bytes to be saved. */ + current_frame_info.reg_size = 0; + + /* No need to save any registers if the function never returns. */ + if (FUNC_IS_NORETURN_P (current_function_decl) && !MUST_SAVE_REGS_P ()) + return; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + { + if (fixed_regs[regno]) + { + current_frame_info.save_regs[regno] = 0; + continue; + } + + /* If this reg is used and not call-used (except RA), save it. */ + if (cr16_interrupt_function_p ()) + { + if (!crtl->is_leaf && call_used_regs[regno]) + /* This is a volatile reg in a non-leaf interrupt routine - save + it for the sake of its sons. */ + current_frame_info.save_regs[regno] = 1; + else if (df_regs_ever_live_p (regno)) + /* This reg is used - save it. */ + current_frame_info.save_regs[regno] = 1; + else + /* This reg is not used, and is not a volatile - don't save. */ + current_frame_info.save_regs[regno] = 0; + } + else + { + /* If this reg is used and not call-used (except RA), save it. */ + if (df_regs_ever_live_p (regno) + && (!call_used_regs[regno] || regno == RETURN_ADDRESS_REGNUM)) + current_frame_info.save_regs[regno] = 1; + else + current_frame_info.save_regs[regno] = 0; + } + } + + /* Save registers so the exception handler can modify them. */ + if (crtl->calls_eh_return) + { + unsigned int i; + + for (i = 0;; ++i) + { + regno = EH_RETURN_DATA_REGNO (i); + if (INVALID_REGNUM == regno) + break; + current_frame_info.save_regs[regno] = 1; + } + } + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (current_frame_info.save_regs[regno] == 1) + { + current_frame_info.last_reg_to_save = regno; + if (regno >= CR16_FIRST_DWORD_REGISTER) + current_frame_info.reg_size += CR16_UNITS_PER_DWORD; + else + current_frame_info.reg_size += UNITS_PER_WORD; + } +} + +/* Compute the size of the local area and the size to be adjusted by the + prologue and epilogue. */ +static void +cr16_compute_frame (void) +{ + /* For aligning the local variables. */ + int stack_alignment = STACK_BOUNDARY / BITS_PER_UNIT; + int padding_locals; + + /* Padding needed for each element of the frame. */ + current_frame_info.var_size = get_frame_size (); + + /* Align to the stack alignment. */ + padding_locals = current_frame_info.var_size % stack_alignment; + if (padding_locals) + padding_locals = stack_alignment - padding_locals; + + current_frame_info.var_size += padding_locals; + current_frame_info.total_size = current_frame_info.var_size + + (ACCUMULATE_OUTGOING_ARGS + ? crtl->outgoing_args_size : 0); +} + +/* Implements the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ +int +cr16_initial_elimination_offset (int from, int to) +{ + /* Compute this since we need to use current_frame_info.reg_size. */ + cr16_compute_save_regs (); + + /* Compute this since we need to use current_frame_info.var_size. */ + cr16_compute_frame (); + + if (((from) == FRAME_POINTER_REGNUM) && ((to) == STACK_POINTER_REGNUM)) + return (ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0); + else if (((from) == ARG_POINTER_REGNUM) && ((to) == FRAME_POINTER_REGNUM)) + return (current_frame_info.reg_size + current_frame_info.var_size); + else if (((from) == ARG_POINTER_REGNUM) && ((to) == STACK_POINTER_REGNUM)) + return (current_frame_info.reg_size + current_frame_info.var_size + + (ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : 0)); + else + gcc_unreachable (); +} + +/* Register Usage. */ + +/* Return the class number of the smallest class containing reg number REGNO. + This could be a conditional expression or could index an array. */ +enum reg_class +cr16_regno_reg_class (int regno) +{ + if ((regno >= 0) && (regno < CR16_FIRST_DWORD_REGISTER)) + return SHORT_REGS; + + if ((regno >= CR16_FIRST_DWORD_REGISTER) && (regno < FIRST_PSEUDO_REGISTER)) + return LONG_REGS; + + return NO_REGS; +} + +/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ +int +cr16_hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + if ((GET_MODE_SIZE (mode) >= 4) && (regno == 11)) + return 0; + + if (mode == DImode || mode == DFmode) + { + if ((regno > 8) || (regno & 1)) + return 0; + return 1; + } + + if ((TARGET_INT32) + && ((regno >= 12) && (GET_MODE_SIZE (mode) < 4 ))) + return 0; + + /* CC can only hold CCmode values. */ + if (GET_MODE_CLASS (mode) == MODE_CC) + return 0; + return 1; +} + +/* Returns register number for function return value.*/ +static inline unsigned int +cr16_ret_register (void) +{ + return 0; +} + +/* Implements hook TARGET_STRUCT_VALUE_RTX. */ +static rtx +cr16_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, cr16_ret_register ()); +} + +/* Returning function value. */ + +/* Worker function for TARGET_FUNCTION_VALUE_REGNO_P. */ +static bool +cr16_function_value_regno_p (const unsigned int regno) +{ + return (regno == cr16_ret_register ()); +} + +/* Create an RTX representing the place where a + library function returns a value of mode MODE. */ +static rtx +cr16_libcall_value (enum machine_mode mode, + const_rtx func ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, cr16_ret_register ()); +} + +/* Create an RTX representing the place where a + function returns a value of data type VALTYPE. */ +static rtx +cr16_function_value (const_tree type, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (TYPE_MODE (type), cr16_ret_register ()); +} + +/* Passing function arguments. */ + +/* If enough param regs are available for passing the param of type TYPE return + the number of registers needed else 0. */ +static int +enough_regs_for_param (CUMULATIVE_ARGS * cum, const_tree type, + enum machine_mode mode) +{ + int type_size; + int remaining_size; + + if (mode != BLKmode) + type_size = GET_MODE_BITSIZE (mode); + else + type_size = int_size_in_bytes (type) * BITS_PER_UNIT; + + remaining_size = BITS_PER_WORD * (MAX_REG_FOR_PASSING_ARGS + - (MIN_REG_FOR_PASSING_ARGS + cum->ints) + + 1); + + /* Any variable which is too big to pass in two registers, will pass on + stack. */ + if ((remaining_size >= type_size) && (type_size <= 2 * BITS_PER_WORD)) + return (type_size + BITS_PER_WORD - 1) / BITS_PER_WORD; + + return 0; +} + +/* Implements the macro FUNCTION_ARG defined in cr16.h. */ +static rtx +cr16_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + cum->last_parm_in_reg = 0; + + /* function_arg () is called with this type just after all the args have + had their registers assigned. The rtx that function_arg returns from + this type is supposed to pass to 'gen_call' but currently it is not + implemented (see macro GEN_CALL). */ + if (type == void_type_node) + return NULL_RTX; + + if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0)) + return NULL_RTX; + + if (mode == BLKmode) + { + /* Enable structures that need padding bytes at the end to pass to a + function in registers. */ + if (enough_regs_for_param (cum, type, mode) != 0) + { + cum->last_parm_in_reg = 1; + return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints); + } + } + + if ((MIN_REG_FOR_PASSING_ARGS + cum->ints) > MAX_REG_FOR_PASSING_ARGS) + return NULL_RTX; + else + { + if (enough_regs_for_param (cum, type, mode) != 0) + { + cum->last_parm_in_reg = 1; + return gen_rtx_REG (mode, MIN_REG_FOR_PASSING_ARGS + cum->ints); + } + } + + return NULL_RTX; +} + +/* Implements the macro INIT_CUMULATIVE_ARGS defined in cr16.h. */ +void +cr16_init_cumulative_args (CUMULATIVE_ARGS * cum, tree fntype, + rtx libfunc ATTRIBUTE_UNUSED) +{ + tree param, next_param; + + cum->ints = 0; + + /* Determine if this function has variable arguments. This is indicated by + the last argument being 'void_type_mode' if there are no variable + arguments. Change here for a different vararg. */ + for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0; + param != NULL_TREE; param = next_param) + { + next_param = TREE_CHAIN (param); + if ((next_param == NULL_TREE) && (TREE_VALUE (param) != void_type_node)) + { + cum->ints = -1; + return; + } + } +} + +/* Implements the macro FUNCTION_ARG_ADVANCE defined in cr16.h. */ +static void +cr16_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS * cum = get_cumulative_args (cum_v); + + /* l holds the number of registers required. */ + int l = GET_MODE_BITSIZE (mode) / BITS_PER_WORD; + + /* If the parameter isn't passed on a register don't advance cum. */ + if (!cum->last_parm_in_reg) + return; + + if (targetm.calls.must_pass_in_stack (mode, type) || (cum->ints < 0)) + return; + + if ((mode == SImode) || (mode == HImode) + || (mode == QImode) || (mode == DImode)) + { + if (l <= 1) + cum->ints += 1; + else + cum->ints += l; + } + else if ((mode == SFmode) || (mode == DFmode)) + cum->ints += l; + else if ((mode) == BLKmode) + { + if ((l = enough_regs_for_param (cum, type, mode)) != 0) + cum->ints += l; + } + return; +} + +/* Implements the macro FUNCTION_ARG_REGNO_P defined in cr16.h. + Return nonzero if N is a register used for passing parameters. */ +int +cr16_function_arg_regno_p (int n) +{ + return ((n <= MAX_REG_FOR_PASSING_ARGS) && (n >= MIN_REG_FOR_PASSING_ARGS)); +} + +/* Addressing modes. + Following set of function implement the macro GO_IF_LEGITIMATE_ADDRESS + defined in cr16.h. */ + +/* Helper function to check if is a valid base register that can + hold address. */ +static int +cr16_addr_reg_p (rtx addr_reg) +{ + rtx reg; + + if (REG_P (addr_reg)) + reg = addr_reg; + else if ((GET_CODE (addr_reg) == SUBREG) + && REG_P (SUBREG_REG (addr_reg)) + && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (addr_reg))) + <= UNITS_PER_WORD)) + reg = SUBREG_REG (addr_reg); + else + return FALSE; + + if (GET_MODE (reg) != Pmode) + return FALSE; + + return TRUE; +} + +/* Helper functions: Created specifically for decomposing operand of CONST + Recursively look into expression x for code or data symbol. + The function expects the expression to contain combination of + SYMBOL_REF, CONST_INT, (PLUS or MINUS) + LABEL_REF, CONST_INT, (PLUS or MINUS) + SYMBOL_REF + LABEL_REF + All other combinations will result in code = -1 and data = ILLEGAL_DM + code data + -1 ILLEGAL_DM The expression did not contain SYMBOL_REF or LABEL_REF + 0 DM_FAR SYMBOL_REF was found and it was far data reference. + 0 DM_DEFAULT SYMBOL_REF was found and it was medium data reference. + 1 ILLEGAL_DM LABEL_REF was found. + 2 ILLEGAL_DM SYMBOL_REF was found and it was function reference. */ +void +cr16_decompose_const (rtx x, int *code, enum data_model_type *data, + bool treat_as_const) +{ + *code = -1; + *data = ILLEGAL_DM; + switch (GET_CODE (x)) + { + case SYMBOL_REF: + *code = SYMBOL_REF_FUNCTION_P (x) ? 2 : 0; + /* 2 indicates func sym. */ + if (*code == 0) + { + if (CR16_TARGET_DATA_NEAR) + *data = DM_DEFAULT; + else if (CR16_TARGET_DATA_MEDIUM) + *data = DM_FAR; + else if (CR16_TARGET_DATA_FAR) + { + if (treat_as_const) + /* This will be used only for printing + the qualifier. This call is (may be) + made by cr16_print_operand_address. */ + *data = DM_FAR; + else + /* This call is (may be) made by + cr16_legitimate_address_p. */ + *data = ILLEGAL_DM; + } + } + return; + + case LABEL_REF: + /* 1 - indicates non-function symbol. */ + *code = 1; + return; + + case PLUS: + case MINUS: + /* Look into the tree nodes. */ + if (GET_CODE (XEXP (x, 0)) == CONST_INT) + cr16_decompose_const (XEXP (x, 1), code, data, treat_as_const); + else if (GET_CODE (XEXP (x, 1)) == CONST_INT) + cr16_decompose_const (XEXP (x, 0), code, data, treat_as_const); + return; + default: + return; + } +} + +/* Decompose Address + This function decomposes the address returns the type of address + as defined in enum cr16_addrtype. It also fills the parameter *out. + The decomposed address can be used for two purposes. One to + check if the address is valid and second to print the address + operand. + + Following tables list valid address supported in CR16C/C+ architectures. + Legend: + aN : Absoulte address N-bit address + R : One 16-bit register + RP : Consecutive two 16-bit registers or one 32-bit register + I : One 32-bit register + dispN : Signed displacement of N-bits + + ----Code addresses---- + Branch operands: + disp9 : CR16_ABSOLUTE (disp) + disp17 : CR16_ABSOLUTE (disp) + disp25 : CR16_ABSOLUTE (disp) + RP + disp25 : CR16_REGP_REL (base, disp) + + Jump operands: + RP : CR16_REGP_REL (base, disp=0) + a24 : CR16_ABSOLUTE (disp) + + ----Data addresses---- + a20 : CR16_ABSOLUTE (disp) near (1M) + a24 : CR16_ABSOLUTE (disp) medium (16M) + R + d20 : CR16_REG_REL (base, disp) near (1M+64K) + RP + d4 : CR16_REGP_REL (base, disp) far (4G) + RP + d16 : CR16_REGP_REL (base, disp) far (4G) + RP + d20 : CR16_REGP_REL (base, disp) far (4G) + I : *** Valid but port does not support this + I + a20 : *** Valid but port does not support this + I + RP + d14: CR16_INDEX_REGP_REL (base, index, disp) far (4G) + I + RP + d20: CR16_INDEX_REGP_REL (base, index, disp) far (4G) + + Decomposing Data model in case of absolute address. + + Target Option Address type Resultant Data ref type + ---------------------- ------------ ----------------------- + CR16_TARGET_MODEL_NEAR ABS20 DM_DEFAULT + CR16_TARGET_MODEL_NEAR IMM20 DM_DEFAULT + CR16_TARGET_MODEL_NEAR ABS24 Invalid + CR16_TARGET_MODEL_NEAR IMM32 Invalid + + CR16_TARGET_MODEL_MEDIUM ABS20 DM_DEFAULT + CR16_TARGET_MODEL_MEDIUM IMM20 DM_DEFAULT + CR16_TARGET_MODEL_MEDIUM ABS24 DM_FAR + CR16_TARGET_MODEL_MEDIUM IMM32 Invalid + + CR16_TARGET_MODEL_FAR ABS20 DM_DEFAULT + CR16_TARGET_MODEL_FAR IMM20 DM_DEFAULT + CR16_TARGET_MODEL_FAR ABS24 DM_FAR + CR16_TARGET_MODEL_FAR IMM32 DM_FAR. */ +enum cr16_addrtype +cr16_decompose_address (rtx addr, struct cr16_address *out, + bool debug_print, bool treat_as_const) +{ + rtx base = NULL_RTX, disp = NULL_RTX, index = NULL_RTX; + enum data_model_type data = ILLEGAL_DM; + int code = -1; + enum cr16_addrtype retval = CR16_INVALID; + + switch (GET_CODE (addr)) + { + case CONST_INT: + /* Absolute address (known at compile time). */ + code = 0; + if (debug_print) + fprintf (stderr, "\ncode:%d", code); + disp = addr; + + if (debug_print) + { + fprintf (stderr, "\ndisp:"); + debug_rtx (disp); + } + + if (UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 20)) + { + data = DM_DEFAULT; + if (debug_print) + fprintf (stderr, "\ndata:%d", data); + retval = CR16_ABSOLUTE; + } + else if (UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 24)) + { + if (!CR16_TARGET_DATA_NEAR) + { + data = DM_FAR; + if (debug_print) + fprintf (stderr, "\ndata:%d", data); + retval = CR16_ABSOLUTE; + } + else + return CR16_INVALID; /* ABS24 is not support in NEAR model. */ + } + else + return CR16_INVALID; + break; + + case CONST: + /* A CONST is an expression of PLUS or MINUS with + CONST_INT, SYMBOL_REF or LABEL_REF. This is the + result of assembly-time arithmetic computation. */ + retval = CR16_ABSOLUTE; + disp = addr; + /* Call the helper function to check the validity. */ + cr16_decompose_const (XEXP (addr, 0), &code, &data, treat_as_const); + if ((code == 0) && (data == ILLEGAL_DM)) + /* CONST is not valid code or data address. */ + return CR16_INVALID; + if (debug_print) + { + fprintf (stderr, "\ndisp:"); + debug_rtx (disp); + fprintf (stderr, "\ncode:%d", code); + fprintf (stderr, "\ndata:%d", data); + } + break; + + case LABEL_REF: + retval = CR16_ABSOLUTE; + disp = addr; + /* 1 - indicates non-function symbol. */ + code = 1; + if (debug_print) + { + fprintf (stderr, "\ndisp:"); + debug_rtx (disp); + fprintf (stderr, "\ncode:%d", code); + } + break; + + case SYMBOL_REF: + /* Absolute address (known at link time). */ + retval = CR16_ABSOLUTE; + disp = addr; + /* This is a code address if symbol_ref is a function. */ + /* 2 indicates func sym. */ + code = SYMBOL_REF_FUNCTION_P (addr) ? 2 : 0; + if (debug_print) + { + fprintf (stderr, "\ndisp:"); + debug_rtx (disp); + fprintf (stderr, "\ncode:%d", code); + } + /* If not function ref then check if valid data ref. */ + if (code == 0) + { + if (CR16_TARGET_DATA_NEAR) + data = DM_DEFAULT; + else if (CR16_TARGET_DATA_MEDIUM) + data = DM_FAR; + else if (CR16_TARGET_DATA_FAR) + { + if (treat_as_const) + /* This will be used only for printing the + qualifier. This call is (may be) made + by cr16_print_operand_address. */ + data = DM_FAR; + else + /* This call is (may be) made by + cr16_legitimate_address_p. */ + return CR16_INVALID; + } + else + data = DM_DEFAULT; + } + if (debug_print) + fprintf (stderr, "\ndata:%d", data); + break; + + case REG: + case SUBREG: + /* Register relative address. */ + /* Assume REG fits in a single register. */ + retval = CR16_REG_REL; + if (GET_MODE_BITSIZE (GET_MODE (addr)) > BITS_PER_WORD) + if (!LONG_REG_P (REGNO (addr))) + /* REG will result in reg pair. */ + retval = CR16_REGP_REL; + base = addr; + if (debug_print) + { + fprintf (stderr, "\nbase:"); + debug_rtx (base); + } + break; + + case PLUS: + switch (GET_CODE (XEXP (addr, 0))) + { + case REG: + case SUBREG: + /* REG + DISP20. */ + /* All Reg relative addresses having a displacement needs + to fit in 20-bits. */ + disp = XEXP (addr, 1); + if (debug_print) + { + fprintf (stderr, "\ndisp:"); + debug_rtx (disp); + } + switch (GET_CODE (XEXP (addr, 1))) + { + case CONST_INT: + /* Shall fit in 20-bits. */ + if (!UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 20)) + return CR16_INVALID; + code = 0; + if (debug_print) + fprintf (stderr, "\ncode:%d", code); + break; + + case UNSPEC: + switch (XINT (XEXP (addr, 1), 1)) + { + case UNSPEC_LIBRARY_OFFSET: + default: + gcc_unreachable (); + } + break; + + case LABEL_REF: + case SYMBOL_REF: + case CONST: + /* This is also a valid expression for address. + However, we cannot ascertain if the resultant + displacement will be valid 20-bit value. Therefore, + lets not allow such an expression for now. This will + be updated when we find a way to validate this + expression as legitimate address. + Till then fall through CR16_INVALID. */ + default: + return CR16_INVALID; + } + + /* Now check if REG can fit into single or pair regs. */ + retval = CR16_REG_REL; + base = XEXP (addr, 0); + if (debug_print) + { + fprintf (stderr, "\nbase:"); + debug_rtx (base); + } + if (GET_MODE_BITSIZE (GET_MODE ((XEXP (addr, 0)))) > BITS_PER_WORD) + { + if (!LONG_REG_P (REGNO ((XEXP (addr, 0))))) + /* REG will result in reg pair. */ + retval = CR16_REGP_REL; + } + break; + + case PLUS: + /* Valid expr: + plus + /\ + / \ + plus idx + /\ + / \ + reg const_int + + Check if the operand 1 is valid index register. */ + data = ILLEGAL_DM; + if (debug_print) + fprintf (stderr, "\ndata:%d", data); + switch (GET_CODE (XEXP (addr, 1))) + { + case REG: + case SUBREG: + if (!REG_OK_FOR_INDEX_P (XEXP (addr, 1))) + return CR16_INVALID; + /* OK. REG is a valid index register. */ + index = XEXP (addr, 1); + if (debug_print) + { + fprintf (stderr, "\nindex:"); + debug_rtx (index); + } + break; + default: + return CR16_INVALID; + } + /* Check if operand 0 of operand 0 is REGP. */ + switch (GET_CODE (XEXP (XEXP (addr, 0), 0))) + { + case REG: + case SUBREG: + /* Now check if REG is a REGP and not in LONG regs. */ + if (GET_MODE_BITSIZE (GET_MODE (XEXP (XEXP (addr, 0), 0))) + > BITS_PER_WORD) + { + if (REGNO (XEXP (XEXP (addr, 0), 0)) + >= CR16_FIRST_DWORD_REGISTER) + return CR16_INVALID; + base = XEXP (XEXP (addr, 0), 0); + if (debug_print) + { + fprintf (stderr, "\nbase:"); + debug_rtx (base); + } + } + else + return CR16_INVALID; + break; + default: + return CR16_INVALID; + } + /* Now check if the operand 1 of operand 0 is const_int. */ + if (GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) + { + disp = XEXP (XEXP (addr, 0), 1); + if (debug_print) + { + fprintf (stderr, "\ndisp:"); + debug_rtx (disp); + } + if (!UNSIGNED_INT_FITS_N_BITS (INTVAL (disp), 20)) + return CR16_INVALID; + } + else + return CR16_INVALID; + retval = CR16_INDEX_REGP_REL; + break; + default: + return CR16_INVALID; + } + break; + + default: + return CR16_INVALID; + } + + /* Check if the base and index registers are valid. */ + if (base && !(cr16_addr_reg_p (base))) + return CR16_INVALID; + if (base && !(CR16_REG_OK_FOR_BASE_P (base))) + return CR16_INVALID; + if (index && !(REG_OK_FOR_INDEX_P (index))) + return CR16_INVALID; + + /* Write the decomposition to out parameter. */ + out->base = base; + out->disp = disp; + out->index = index; + out->data = data; + out->code = code; + + return retval; +} + +/* Return non-zero value if 'x' is legitimate PIC operand + when generating PIC code. */ +int +legitimate_pic_operand_p (rtx x) +{ + switch (GET_CODE (x)) + { + case SYMBOL_REF: + return 0; + break; + case LABEL_REF: + return 0; + break; + case CONST: + /* REVISIT: Use something like symbol_referenced_p. */ + if (GET_CODE (XEXP (x, 0)) == PLUS + && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF + || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF) + && (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)) + return 0; + break; + case MEM: + return legitimate_pic_operand_p (XEXP (x, 0)); + break; + default: + break; + } + return 1; +} + +/* Convert a non-PIC address in `orig' to a PIC address in `reg'. + + Input Output (-f pic) Output (-f PIC) + orig reg + + C1 symbol symbol@BRO (r12) symbol@GOT (r12) + + C2 symbol + offset symbol+offset@BRO (r12) symbol+offset@GOT (r12) + + NOTE: @BRO is added using unspec:BRO + NOTE: @GOT is added using unspec:GOT. */ +rtx +legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED, + rtx reg) +{ + /* First handle a simple SYMBOL_REF or LABEL_REF. */ + if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF) + { + if (reg == 0) + reg = gen_reg_rtx (Pmode); + + if (flag_pic == NEAR_PIC) + { + /* Unspec to handle -fpic option. */ + emit_insn (gen_unspec_bro_addr (reg, orig)); + emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx)); + } + else if (flag_pic == FAR_PIC) + { + /* Unspec to handle -fPIC option. */ + emit_insn (gen_unspec_got_addr (reg, orig)); + } + return reg; + } + else if (GET_CODE (orig) == CONST) + { + /* To handle (symbol + offset). */ + rtx base, offset; + + if (GET_CODE (XEXP (orig, 0)) == PLUS + && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) + return orig; + + if (reg == 0) + { + gcc_assert (can_create_pseudo_p ()); + reg = gen_reg_rtx (Pmode); + } + + gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); + + base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); + offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, + base == reg ? 0 : reg); + + /* REVISIT: Optimize for const-offsets. */ + emit_insn (gen_addsi3 (reg, base, offset)); + + return reg; + } + return orig; +} + +/* Implementation of TARGET_LEGITIMATE_ADDRESS_P. */ +static bool +cr16_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx addr, bool strict) +{ + enum cr16_addrtype addrtype; + struct cr16_address address; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\n======\nTARGET_LEGITIMATE_ADDRESS_P, mode = %s, strict = %d", + GET_MODE_NAME (mode), strict); + debug_rtx (addr); + } + addrtype = cr16_decompose_address (addr, &address, + (TARGET_DEBUG_ADDR ? 1 : 0), FALSE); + + if (TARGET_DEBUG_ADDR) + { + const char *typestr; + + switch (addrtype) + { + case CR16_INVALID: + typestr = "invalid"; + break; + case CR16_ABSOLUTE: + typestr = "absolute"; + break; + case CR16_REG_REL: + typestr = "register relative"; + break; + case CR16_REGP_REL: + typestr = "register pair relative"; + break; + case CR16_INDEX_REGP_REL: + typestr = "index + register pair relative"; + break; + default: + gcc_unreachable (); + } + fprintf (stderr, "\ncr16 address type: %s\n", typestr); + } + + if (addrtype == CR16_INVALID) + return FALSE; + + if (strict) + { + if (address.base + && !REGNO_MODE_OK_FOR_BASE_P (REGNO (address.base), mode)) + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "base register not strict\n"); + return FALSE; + } + if (address.index && !REGNO_OK_FOR_INDEX_P (REGNO (address.index))) + { + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "index register not strict\n"); + return FALSE; + } + } + + /* Return true if addressing mode is register relative. */ + if (flag_pic) + { + if (addrtype == CR16_REG_REL || addrtype == CR16_REGP_REL) + return TRUE; + else + return FALSE; + } + + return TRUE; +} + +/* Routines to compute costs. */ + +/* Return cost of the memory address x. */ +static int +cr16_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) +{ + enum cr16_addrtype addrtype; + struct cr16_address address; + int cost = 2; + + addrtype = cr16_decompose_address (addr, &address, 0, FALSE); + + gcc_assert (addrtype != CR16_INVALID); + + /* CR16_ABSOLUTE : 3 + CR16_REG_REL (disp !=0) : 4 + CR16_REG_REL (disp ==0) : 5 + CR16_REGP_REL (disp !=0) : 6 + CR16_REGP_REL (disp ==0) : 7 + CR16_INDEX_REGP_REL (disp !=0) : 8 + CR16_INDEX_REGP_REL (disp ==0) : 9. */ + switch (addrtype) + { + case CR16_ABSOLUTE: + cost += 1; + break; + case CR16_REGP_REL: + cost += 2; + /* Fall through. */ + case CR16_REG_REL: + cost += 3; + if (address.disp) + cost -= 1; + break; + case CR16_INDEX_REGP_REL: + cost += 7; + if (address.disp) + cost -= 1; + default: + break; + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\n======\nmacro TARGET_ADDRESS_COST = %d\n", cost); + debug_rtx (addr); + } + + return cost; +} + + +/* Implement `TARGET_REGISTER_MOVE_COST'. */ +static int +cr16_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from ATTRIBUTE_UNUSED, reg_class_t to) +{ + return (to != GENERAL_REGS ? 8 : 2); +} + +/* Implement `TARGET_MEMORY_MOVE_COST'. */ + +/* Return the cost of moving data of mode MODE between a register of class + CLASS and memory; IN is zero if the value is to be written to memory, + nonzero if it is to be read in. This cost is relative to those in + REGISTER_MOVE_COST. */ +static int +cr16_memory_move_cost (enum machine_mode mode, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + /* One LD or ST takes twice the time of a simple reg-reg move. */ + if (reg_classes_intersect_p (rclass, GENERAL_REGS)) + return (4 * HARD_REGNO_NREGS (0, mode)); + else + return (100); +} + +/* Instruction output. */ + +/* Check if a const_double is ok for cr16 store-immediate instructions. */ +int +cr16_const_double_ok (rtx op) +{ + if (GET_MODE (op) == SFmode) + { + REAL_VALUE_TYPE r; + long l; + REAL_VALUE_FROM_CONST_DOUBLE (r, op); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + return UNSIGNED_INT_FITS_N_BITS (l, 4) ? 1 : 0; + } + + return ((UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_LOW (op), 4)) && + (UNSIGNED_INT_FITS_N_BITS (CONST_DOUBLE_HIGH (op), 4))) ? 1 : 0; +} + +/* Returns bit position of first 0 or 1 bit. + It is safe to assume val as 16-bit wide. */ +int +cr16_operand_bit_pos (int val, int bitval) +{ + int i; + if (bitval == 0) + val = ~val; + + for (i = 0; i < 16; i++) + if (val & (1 << i)) + break; + return i; +} + +/* Implements the macro PRINT_OPERAND defined in cr16.h. */ +static void +cr16_print_operand (FILE * file, rtx x, int code) +{ + int ptr_dereference = 0; + + switch (code) + { + case 'd': + { + const char *cr16_cmp_str; + switch (GET_CODE (x)) + { + /* MD: compare (reg, reg or imm) but CR16: cmp (reg or imm, reg) + -> swap all non symmetric ops. */ + case EQ: + cr16_cmp_str = "eq"; + break; + case NE: + cr16_cmp_str = "ne"; + break; + case GT: + cr16_cmp_str = "lt"; + break; + case GTU: + cr16_cmp_str = "lo"; + break; + case LT: + cr16_cmp_str = "gt"; + break; + case LTU: + cr16_cmp_str = "hi"; + break; + case GE: + cr16_cmp_str = "le"; + break; + case GEU: + cr16_cmp_str = "ls"; + break; + case LE: + cr16_cmp_str = "ge"; + break; + case LEU: + cr16_cmp_str = "hs"; + break; + default: + gcc_unreachable (); + } + fprintf (file, "%s", cr16_cmp_str); + return; + } + case '$': + putc ('$', file); + return; + + case 'p': + if (GET_CODE (x) == REG) + { + /* For Push instructions, we should not print register pairs. */ + fprintf (file, "%s", reg_names[REGNO (x)]); + return; + } + break; + + case 'b': + /* Print the immediate address for bal + 'b' is used instead of 'a' to avoid compiler calling + the GO_IF_LEGITIMATE_ADDRESS which cannot + perform checks on const_int code addresses as it + assumes all const_int are data addresses. */ + fprintf (file, "0x%lx", INTVAL (x)); + return; + + case 'r': + /* Print bit position of first 0. */ + fprintf (file, "%d", cr16_operand_bit_pos (INTVAL (x), 0)); + return; + + case 's': + /* Print bit position of first 1. */ + fprintf (file, "%d", cr16_operand_bit_pos (INTVAL (x), 1)); + return; + case 'g': + /* 'g' is used for implicit mem: dereference. */ + ptr_dereference = 1; + case 'f': + case 0: + /* default. */ + switch (GET_CODE (x)) + { + case REG: + if (GET_MODE_BITSIZE (GET_MODE (x)) > BITS_PER_WORD) + { + if (LONG_REG_P (REGNO (x))) + fprintf (file, "(%s)", reg_names[REGNO (x)]); + else + fprintf (file, "(%s,%s)", reg_names[REGNO (x) + 1], + reg_names[REGNO (x)]); + } + else + fprintf (file, "%s", reg_names[REGNO (x)]); + return; + + case MEM: + output_address (XEXP (x, 0)); + return; + + case CONST_DOUBLE: + { + REAL_VALUE_TYPE r; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + + fprintf (file, "$0x%lx", l); + return; + } + case CONST_INT: + { + fprintf (file, "$%ld", INTVAL (x)); + return; + } + case UNSPEC: + switch (XINT (x, 1)) + { + default: + gcc_unreachable (); + } + break; + + default: + if (!ptr_dereference) + { + putc ('$', file); + } + cr16_print_operand_address (file, x); + return; + } + default: + output_operand_lossage ("invalid %%xn code"); + } + + gcc_unreachable (); +} + +/* Implements the macro PRINT_OPERAND_ADDRESS defined in cr16.h. */ + +static void +cr16_print_operand_address (FILE * file, rtx addr) +{ + enum cr16_addrtype addrtype; + struct cr16_address address; + + /* Decompose the address. Also ask it to treat address as constant. */ + addrtype = cr16_decompose_address (addr, &address, 0, TRUE); + + if (address.disp && GET_CODE (address.disp) == UNSPEC) + { + debug_rtx (addr); + } + + switch (addrtype) + { + case CR16_REG_REL: + if (address.disp) + { + if (GET_CODE (address.disp) == UNSPEC) + cr16_print_operand (file, address.disp, 0); + else + output_addr_const (file, address.disp); + } + else + fprintf (file, "0"); + fprintf (file, "(%s)", reg_names[REGNO (address.base)]); + break; + + case CR16_ABSOLUTE: + if (address.disp) + output_addr_const (file, address.disp); + else + fprintf (file, "0"); + break; + + case CR16_INDEX_REGP_REL: + fprintf (file, "[%s]", reg_names[REGNO (address.index)]); + /* Fall through. */ + case CR16_REGP_REL: + if (address.disp) + { + if (GET_CODE (address.disp) == UNSPEC) + cr16_print_operand (file, address.disp, 0); + else + output_addr_const (file, address.disp); + } + else + fprintf (file, "0"); + fprintf (file, "(%s,%s)", reg_names[REGNO (address.base) + 1], + reg_names[REGNO (address.base)]); + break; + default: + debug_rtx (addr); + gcc_unreachable (); + } + /* Add qualifiers to the address expression that was just printed. */ + if (flag_pic < NEAR_PIC && address.code == 0) + { + if (address.data == DM_FAR) + /* Addr contains SYMBOL_REF & far data ptr. */ + fprintf (file, "@l"); + else if (address.data == DM_DEFAULT) + /* Addr contains SYMBOL_REF & medium data ptr. */ + fprintf (file, "@m"); + /* Addr contains SYMBOL_REF & medium data ptr. */ + else if (address.data == DM_NEAR) + /* Addr contains SYMBOL_REF & near data ptr. */ + fprintf (file, "@s"); + } + else if (flag_pic == NEAR_PIC + && (address.code == 0) && (address.data == DM_FAR + || address.data == DM_DEFAULT + || address.data == DM_NEAR)) + { + fprintf (file, "@l"); + } + else if (flag_pic == NEAR_PIC && address.code == 2) + { + fprintf (file, "pic"); + } + else if (flag_pic == NEAR_PIC && address.code == 1) + { + fprintf (file, "@cpic"); + } + + else if (flag_pic == FAR_PIC && address.code == 2) + { + /* REVISIT: cr16 register indirect jump expects a 1-bit right shifted + address ! GOTc tells assembler this symbol is a text-address + This needs to be fixed in such a way that this offset is done + only in the case where an address is being used for indirect jump + or call. Determining the potential usage of loadd is of course not + possible always. Eventually, this has to be fixed in the + processor. */ + fprintf (file, "GOT (%s)", reg_names[PIC_OFFSET_TABLE_REGNUM]); + } + else if (flag_pic == FAR_PIC && address.code == 1) + { + fprintf (file, "@cGOT (%s)", reg_names[PIC_OFFSET_TABLE_REGNUM]); + } + + else if (flag_pic == FAR_PIC && + (address.data == DM_FAR || address.data == DM_DEFAULT + || address.data == DM_NEAR)) + { + fprintf (file, "@GOT (%s)", reg_names[PIC_OFFSET_TABLE_REGNUM]); + } +} + +/* Machine description helper functions. */ + +/* Called from cr16.md. The return value depends on the parameter push_or_pop: + When push_or_pop is zero -> string for push instructions of prologue. + When push_or_pop is nonzero -> string for pop/popret/retx in epilogue. + Relies on the assumptions: + 1. RA is the last register to be saved. + 2. The maximal value of the counter is MAX_COUNT. */ +char * +cr16_prepare_push_pop_string (int push_or_pop) +{ + /* j is the number of registers being saved, takes care that there won't be + more than 8 in one push/pop instruction. */ + + /* For the register mask string. */ + static char one_inst_str[50]; + + /* i is the index of current_frame_info.save_regs[], going from 0 until + current_frame_info.last_reg_to_save. */ + int i, start_reg; + int word_cnt; + int print_ra; + char *return_str; + + /* For reversing on the push instructions if there are more than one. */ + char *temp_str; + + return_str = (char *) xmalloc (160); + temp_str = (char *) xmalloc (160); + + /* Initialize. */ + memset (return_str, 0, 3); + + i = 0; + while (i <= current_frame_info.last_reg_to_save) + { + /* Prepare mask for one instruction. */ + one_inst_str[0] = 0; + + /* To count number of words in one instruction. */ + word_cnt = 0; + start_reg = i; + print_ra = 0; + while ((word_cnt < MAX_COUNT) + && (i <= current_frame_info.last_reg_to_save)) + { + /* For each non consecutive save register, + a new instruction shall be generated. */ + if (!current_frame_info.save_regs[i]) + { + /* Move to next reg and break. */ + ++i; + break; + } + + if (i == RETURN_ADDRESS_REGNUM) + print_ra = 1; + else + { + /* Check especially if adding 2 does not cross the MAX_COUNT. */ + if ((word_cnt + ((i < CR16_FIRST_DWORD_REGISTER) ? 1 : 2)) + >= MAX_COUNT) + break; + /* Increase word count by 2 for long registers except RA. */ + word_cnt += ((i < CR16_FIRST_DWORD_REGISTER) ? 1 : 2); + } + ++i; + } + + /* No need to generate any instruction as + no register or RA needs to be saved. */ + if ((word_cnt == 0) && (print_ra == 0)) + continue; + + /* Now prepare the instruction operands. */ + if (word_cnt > 0) + { + sprintf (one_inst_str, "$%d, %s", word_cnt, reg_names[start_reg]); + if (print_ra) + strcat (one_inst_str, ", ra"); + } + else + strcat (one_inst_str, "ra"); + + if (push_or_pop == 1) + { + /* Pop instruction. */ + if (print_ra && !cr16_interrupt_function_p () + && !crtl->calls_eh_return) + /* Print popret if RA is saved and its not a interrupt + function. */ + strcpy (temp_str, "\n\tpopret\t"); + else + strcpy (temp_str, "\n\tpop\t"); + + strcat (temp_str, one_inst_str); + + /* Add the pop instruction list. */ + strcat (return_str, temp_str); + } + else + { + /* Push instruction. */ + strcpy (temp_str, "\n\tpush\t"); + strcat (temp_str, one_inst_str); + + /* We need to reverse the order of the instructions if there + are more than one. (since the pop will not be reversed in + the epilogue. */ + strcat (temp_str, return_str); + strcpy (return_str, temp_str); + } + } + + if (push_or_pop == 1) + { + /* POP. */ + if (cr16_interrupt_function_p ()) + strcat (return_str, "\n\tretx\n"); + else if (crtl->calls_eh_return) + { + /* Add stack adjustment before returning to exception handler + NOTE: EH_RETURN_STACKADJ_RTX must refer to (r5, r4). */ + strcat (return_str, "\n\taddd\t (r5, r4), (sp)\t\n"); + strcat (return_str, "\n\tjump\t (ra)\n"); + + /* But before anything else, undo the adjustment addition done in + cr16_expand_epilogue (). */ + strcpy (temp_str, "\n\tsubd\t (r5, r4), (sp)\t\n"); + strcat (temp_str, return_str); + strcpy (return_str, temp_str); + } + else if (!FUNC_IS_NORETURN_P (current_function_decl) + && !(current_frame_info.save_regs[RETURN_ADDRESS_REGNUM])) + strcat (return_str, "\n\tjump\t (ra)\n"); + } + + /* Skip the newline and the tab in the start of return_str. */ + return_str += 2; + return return_str; +} + + +/* Generate DWARF2 annotation for multi-push instruction. */ +static void +cr16_create_dwarf_for_multi_push (rtx insn) +{ + rtx dwarf, reg, tmp; + int i, j, from, to, word_cnt, dwarf_par_index, inc; + enum machine_mode mode; + int num_regs = 0, offset = 0, split_here = 0, total_push_bytes = 0; + + for (i = 0; i <= current_frame_info.last_reg_to_save; ++i) + { + if (current_frame_info.save_regs[i]) + { + ++num_regs; + if (i < CR16_FIRST_DWORD_REGISTER) + total_push_bytes += 2; + else + total_push_bytes += 4; + } + } + + if (!num_regs) + return; + + dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1)); + dwarf_par_index = num_regs; + + from = current_frame_info.last_reg_to_save + 1; + to = current_frame_info.last_reg_to_save; + word_cnt = 0; + + for (i = current_frame_info.last_reg_to_save; i >= 0;) + { + if (!current_frame_info.save_regs[i] || 0 == i || split_here) + { + /* This block of regs is pushed in one instruction. */ + if (0 == i && current_frame_info.save_regs[i]) + from = 0; + + for (j = to; j >= from; --j) + { + if (j < CR16_FIRST_DWORD_REGISTER) + { + mode = HImode; + inc = 1; + } + else + { + mode = SImode; + inc = 2; + } + reg = gen_rtx_REG (mode, j); + offset += 2 * inc; + tmp = gen_rtx_SET (VOIDmode, + gen_frame_mem (mode, + plus_constant + (Pmode, stack_pointer_rtx, + total_push_bytes - offset)), + reg); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, dwarf_par_index--) = tmp; + } + from = i; + to = --i; + split_here = 0; + word_cnt = 0; + continue; + } + + if (i != RETURN_ADDRESS_REGNUM) + { + inc = (i < CR16_FIRST_DWORD_REGISTER) ? 1 : 2; + if (word_cnt + inc >= MAX_COUNT || FRAME_POINTER_REGNUM == i) + { + split_here = 1; + from = i; + continue; + } + word_cnt += inc; + } + + from = i--; + } + + tmp = gen_rtx_SET (SImode, stack_pointer_rtx, + gen_rtx_PLUS (SImode, stack_pointer_rtx, + GEN_INT (-offset))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (dwarf, 0, 0) = tmp; + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); +} + +/* +CompactRISC CR16 Architecture stack layout: + + 0 +--------------------- + | + . + . + | + +==================== Sp (x) = Ap (x+1) + A | Args for functions + | | called by X and Dynamically + | | Dynamic allocations allocated and + | | (alloca, variable deallocated + Stack | length arrays). + grows +-------------------- Fp (x) + down| | Local variables of X + ward| +-------------------- + | | Regs saved for X-1 + | +==================== Sp (x-1) = Ap (x) + | Args for func X + | pushed by X-1 + +-------------------- Fp (x-1) + | + | + V +*/ +void +cr16_expand_prologue (void) +{ + rtx insn; + + cr16_compute_frame (); + cr16_compute_save_regs (); + + /* If there is no need in push and adjustment to sp, return. */ + if ((current_frame_info.total_size + current_frame_info.reg_size) == 0) + return; + + if (current_frame_info.last_reg_to_save != -1) + { + /* If there are registers to push. */ + insn = emit_insn (gen_push_for_prologue + (GEN_INT (current_frame_info.reg_size))); + cr16_create_dwarf_for_multi_push (insn); + RTX_FRAME_RELATED_P (insn) = 1; + } + + + if (current_frame_info.total_size > 0) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (-current_frame_info.total_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (frame_pointer_needed) + { + /* Initialize the frame pointer with the value of the stack pointer + pointing now to the locals. */ + insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + } +} + +/* Generate insn that updates the stack for local variables and padding + for registers we save. - Generate the appropriate return insn. */ +void +cr16_expand_epilogue (void) +{ + rtx insn; + + /* Nonzero if we need to return and pop only RA. This will generate a + different insn. This differentiate is for the peepholes for call as + last statement in function. */ + int only_popret_RA = (current_frame_info.save_regs[RETURN_ADDRESS_REGNUM] + && (current_frame_info.reg_size + == CR16_UNITS_PER_DWORD)); + + if (frame_pointer_needed) + { + /* Restore the stack pointer with the frame pointers value. */ + insn = emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + } + + if (current_frame_info.total_size > 0) + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (current_frame_info.total_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (crtl->calls_eh_return) + { + /* Add this here so that (r5, r4) is actually loaded with the adjustment + value; otherwise, the load might be optimized away... + NOTE: remember to subtract the adjustment before popping the regs + and add it back before returning. */ + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + EH_RETURN_STACKADJ_RTX)); + } + + if (cr16_interrupt_function_p ()) + { + insn = emit_jump_insn (gen_interrupt_return ()); + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (crtl->calls_eh_return) + { + /* Special case, pop what's necessary, adjust SP and jump to (RA). */ + insn = emit_jump_insn (gen_pop_and_popret_return + (GEN_INT (current_frame_info.reg_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (current_frame_info.last_reg_to_save == -1) + /* Nothing to pop. */ + /* Don't output jump for interrupt routine, only retx. */ + emit_jump_insn (gen_jump_return ()); + else if (only_popret_RA) + { + insn = emit_jump_insn (gen_popret_RA_return ()); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + insn = emit_jump_insn (gen_pop_and_popret_return + (GEN_INT (current_frame_info.reg_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } +} + +/* Implements FRAME_POINTER_REQUIRED. */ +static bool +cr16_frame_pointer_required (void) +{ + return (cfun->calls_alloca || crtl->calls_eh_return + || cfun->has_nonlocal_label || crtl->calls_eh_return); +} + +static bool +cr16_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true); +} + + +/* A C compound statement that attempts to replace X with + a valid memory address for an operand of mode MODE. WIN + will be a C statement label elsewhere in the code. + X will always be the result of a call to break_out_memory_refs (), + and OLDX will be the operand that was given to that function to + produce X. + The code generated by this macro should not alter the + substructure of X. If it transforms X into a more legitimate form, + it should assign X (which will always be a C variable) a new value. */ +static rtx +cr16_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED, + enum machine_mode mode ATTRIBUTE_UNUSED) +{ + if (flag_pic) + return legitimize_pic_address (orig_x, mode, NULL_RTX); + else + return x; +} + +/* Implement TARGET_LEGITIMATE_CONSTANT_P + Nonzero if X is a legitimate constant for an immediate + operand on the target machine. You can assume that X + satisfies CONSTANT_P. In cr16c treat legitimize float + constant as an immediate operand. */ +static bool +cr16_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, + rtx x ATTRIBUTE_UNUSED) +{ + return 1; +} + +void +notice_update_cc (rtx exp) +{ + if (GET_CODE (exp) == SET) + { + /* Jumps do not alter the cc's. */ + if (SET_DEST (exp) == pc_rtx) + return; + + /* Moving register or memory into a register: + it doesn't alter the cc's, but it might invalidate + the RTX's which we remember the cc's came from. + (Note that moving a constant 0 or 1 MAY set the cc's). */ + if (REG_P (SET_DEST (exp)) + && (REG_P (SET_SRC (exp)) || GET_CODE (SET_SRC (exp)) == MEM)) + { + return; + } + + /* Moving register into memory doesn't alter the cc's. + It may invalidate the RTX's which we remember the cc's came from. */ + if (GET_CODE (SET_DEST (exp)) == MEM && REG_P (SET_SRC (exp))) + { + return; + } + } + + CC_STATUS_INIT; + return; +} + +static enum machine_mode +cr16_unwind_word_mode (void) +{ + return SImode; +} + +/* Helper function for md file. This function is used to emit arithmetic + DI instructions. The argument "num" decides which instruction to be + printed. */ +const char * +cr16_emit_add_sub_di (rtx *operands, enum rtx_code code) +{ + rtx lo_op[2] ; + rtx hi0_op[2] ; + rtx hi1_op[2] ; + + lo_op[0] = gen_lowpart (SImode, operands[0]); + hi0_op[0] = simplify_gen_subreg (HImode, operands[0], DImode, 4); + hi1_op[0] = simplify_gen_subreg (HImode, operands[0], DImode, 6); + + lo_op[1] = gen_lowpart (SImode, operands[2]); + hi0_op[1] = simplify_gen_subreg (HImode, operands[2], DImode, 4); + hi1_op[1] = simplify_gen_subreg (HImode, operands[2], DImode, 6); + + switch (code) + { + case PLUS: + { + output_asm_insn ("addd\t%1, %0", lo_op) ; + output_asm_insn ("addcw\t%1, %0", hi0_op) ; + output_asm_insn ("addcw\t%1, %0", hi1_op) ; + break; + } + case MINUS: + { + output_asm_insn ("subd\t%1, %0", lo_op) ; + output_asm_insn ("subcw\t%1, %0", hi0_op) ; + output_asm_insn ("subcw\t%1, %0", hi1_op) ; + break; + } + default: + break; + } + + return ""; +} + + +/* Helper function for md file. This function is used to emit logical + DI instructions. The argument "num" decides which instruction to be + printed. */ +const char * +cr16_emit_logical_di (rtx *operands, enum rtx_code code) +{ + rtx lo_op[2] ; + rtx hi_op[2] ; + + lo_op[0] = gen_lowpart (SImode, operands[0]); + hi_op[0] = simplify_gen_subreg (SImode, operands[0], DImode, 4); + + lo_op[1] = gen_lowpart (SImode, operands[2]); + hi_op[1] = simplify_gen_subreg (SImode, operands[2], DImode, 4); + + switch (code) + { + case AND: + { + output_asm_insn ("andd\t%1, %0", lo_op) ; + output_asm_insn ("andd\t%1, %0", hi_op) ; + return ""; + } + case IOR: + { + output_asm_insn ("ord\t%1, %0", lo_op) ; + output_asm_insn ("ord\t%1, %0", hi_op) ; + return ""; + } + case XOR: + { + output_asm_insn ("xord\t%1, %0", lo_op) ; + output_asm_insn ("xord\t%1, %0", hi_op) ; + return ""; + } + default: + break; + } + + return ""; +} + +/* Initialize 'targetm' variable which contains pointers to functions + and data relating to the target machine. */ + +struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc-4.9/gcc/config/cr16/cr16.h b/gcc-4.9/gcc/config/cr16/cr16.h new file mode 100644 index 000000000..c40f7940a --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/cr16.h @@ -0,0 +1,586 @@ +/* Definitions of target machine for GNU compiler, for CR16. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + Contributed by KPIT Cummins Infosystems Limited. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef GCC_CR16_H +#define GCC_CR16_H + +#define OBJECT_FORMAT_ELF + +/* Controlling the driver. */ + +/* The GNU C++ standard library requires that these macros be defined. */ +#undef CPLUSPLUS_CPP_SPEC +#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)" + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt1.o%s crti.o%s crtbegin.o%s crtlibid.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#undef MATH_LIBRARY +#define MATH_LIBRARY "" + +#undef LIB_SPEC +#define LIB_SPEC "-( -lc %{msim*:-lsim}%{!msim*:-lnosys} -) \ +%{msim*:%{!T*:-Tsim.ld}} \ +%{!T*:%{!msim*: %{-Telf32cr16.x}}}" + +/* Run-time target specification. */ +#ifndef TARGET_CPU_CPP_BUILTINS +#define TARGET_CPU_CPP_BUILTINS() \ +do \ + { \ + builtin_define ("__CR__"); \ + builtin_define ("__CR16__"); \ + builtin_define ("__CR16C__"); \ + if (TARGET_CR16CP) \ + builtin_define ("__CR16CP__"); \ + else \ + builtin_define ("__CR16CSTD__"); \ + if (CR16_TARGET_DATA_NEAR) \ + builtin_define ("__DATA_NEAR__"); \ + if (CR16_TARGET_DATA_MEDIUM) \ + builtin_define ("__DATA_MEDIUM__"); \ + if (CR16_TARGET_DATA_FAR) \ + builtin_define ("__DATA_FAR__"); \ + if (TARGET_INT32) \ + builtin_define ("__INT32__"); \ + } \ +while (0) +#endif + +/* Force the generation of dwarf .debug_frame sections even if not + compiling -g. This guarantees that we can unwind the stack. */ +#define DWARF2_FRAME_INFO 1 + +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Generate .file/.loc directives, so that the assembler generates the + line table. */ +#define DWARF2_ASM_LINE_DEBUG_INFO 1 + +#define CR16_TARGET_DATA_NEAR cr16_is_data_model (DM_NEAR) +#define CR16_TARGET_DATA_MEDIUM cr16_is_data_model (DM_DEFAULT) +#define CR16_TARGET_DATA_FAR cr16_is_data_model (DM_FAR) + +/* Storage layout. */ +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN 0 + +#define WORDS_BIG_ENDIAN 0 + +#define UNITS_PER_WORD 2 + +/* Units per 32-bit (DWORD). */ +#define CR16_UNITS_PER_DWORD 4 + +#define POINTER_SIZE 32 + +#define PARM_BOUNDARY 16 + +#define STACK_BOUNDARY (MAX (BIGGEST_ALIGNMENT, PARM_BOUNDARY)) + +#define FUNCTION_BOUNDARY BIGGEST_ALIGNMENT + +/* Biggest alignment on CR16C+ is 32-bit as internal bus is AMBA based + where as CR16C is proprietary internal bus architecture. */ +#define BIGGEST_ALIGNMENT ((TARGET_CR16CP) ? 32 : 16) + +#define MAX_FIXED_MODE_SIZE 64 + +/* In CR16 arrays of chars are word-aligned, so strcpy () will be faster. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (((TREE_CODE (TYPE) == ARRAY_TYPE) \ + && (TYPE_MODE (TREE_TYPE (TYPE)) == QImode) \ + && ((ALIGN) < BITS_PER_WORD)) \ + ? (BITS_PER_WORD) : (ALIGN)) + +/* In CR16 strings are word-aligne; strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(CONSTANT, ALIGN) \ + (((TREE_CODE (CONSTANT) == STRING_CST) && ((ALIGN) < BITS_PER_WORD)) \ + ? (BITS_PER_WORD) : (ALIGN)) + +#define STRICT_ALIGNMENT 0 + +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* Layout of source language data types. */ +#define INT_TYPE_SIZE (TARGET_INT32 ? 32 : 16) + +#define SHORT_TYPE_SIZE 16 + +#define LONG_TYPE_SIZE 32 + +#define LONG_LONG_TYPE_SIZE 64 + +#define FLOAT_TYPE_SIZE 32 + +#define DOUBLE_TYPE_SIZE 64 + +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +#define SIZE_TYPE "long unsigned int" + +#define PTRDIFF_TYPE "long int" + +#define WCHAR_TYPE "short unsigned int" + +#define WCHAR_TYPE_SIZE 16 + +/* By default, the C++ compiler will use the lowest bit of the pointer + to function to indicate a pointer-to-member-function points to a + virtual member function. However, in CR architecture FUNCTION_BOUNDARY + indicates function addresses are always even, but function pointers can be + odd (after right-shifting them when loading them into a register), and the + default doesn't work. In that case, the lowest bit of the delta + field will be used (the remainder of the field is shifted to the left). */ +#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_delta + +/* Define DEFAULT_PCC_STRUCT_RETURN to 1 if all structure and union return + values must be in memory. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* Register usage. */ + +/* First 32-bit register is R12. */ +#define CR16_FIRST_DWORD_REGISTER 12 + +#define FIRST_PSEUDO_REGISTER 16 + +/* 1 for registers that have pervasive standard uses + and are not available for the register allocator. + On the CR16, only the stack pointer (r15) is such. */ +#define FIXED_REGISTERS \ + { \ + /* r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10. */ \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + /* r11 r12 r13 ra sp. */ \ + 0, 0, 0, 0, 1 \ + } + +/* 1 for registers not available across function calls. + These must include the FIXED_REGISTERS and also any + registers that can be used without being saved. + The latter must include the registers where values are returned + and the register where structure-value addresses are passed. + + On the CR16, calls clobbers r0-r6 (scratch registers), + ra (the return address) and sp (the stack pointer). */ +#define CALL_USED_REGISTERS \ + { \ + /* r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10. */ \ + 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, \ + /* r11 r12 r13 ra sp. */ \ + 0, 0, 0, 1, 1 \ + } + +/* Returns 1 if the register is longer than word size, 0 otherwise. */ +#define LONG_REG_P(REGNO) \ + (HARD_REGNO_NREGS (REGNO, \ + GET_MODE_WIDER_MODE (smallest_mode_for_size \ + (BITS_PER_WORD, MODE_INT))) == 1) + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((REGNO >= CR16_FIRST_DWORD_REGISTER) \ + ? ((GET_MODE_SIZE (MODE) + CR16_UNITS_PER_DWORD - 1) / CR16_UNITS_PER_DWORD)\ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* Nonzero if it is permissible to store a value of mode @var{mode} in hard + register number @var{regno} (or in several registers starting with that + one). On the CR16 architecture, all registers can hold all modes, + except that double precision floats (and double ints) must fall on + even-register boundaries. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) cr16_hard_regno_mode_ok (REGNO, MODE) + +#define NOTICE_UPDATE_CC(EXP, INSN) \ + notice_update_cc ((EXP)) + +/* Interrupt functions can only use registers that have already been + saved by the prologue, even if they would normally be call-clobbered + Check if sizes are same and then check if it is possible to rename. */ +#define HARD_REGNO_RENAME_OK(SRC, DEST) \ + (!cr16_interrupt_function_p () || (df_regs_ever_live_p (DEST))) + +/* Exception handling stuff. */ + +/*To ensure correct dwarf unwinding. */ +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((optimize ("no-gcse","no-dse"))) + +#define gen_rtx_RA gen_rtx_REG (Pmode, RETURN_ADDRESS_REGNUM) + +/* Use (r8,r7) and (r10,r9) to pass exception handling information. */ +#define EH_RETURN_DATA_REGNO(N) (((N) < 2) ? (N*2 + 7) : INVALID_REGNUM) + +#define DWARF2_UNWIND_INFO 1 + +/* (r5,r4) holds a stack adjustment for returning to a handler. */ +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 4) + +#define EH_RETURN_HANDLER_RTX \ + gen_rtx_MEM (Pmode, plus_constant (Pmode, arg_pointer_rtx, -4)) + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_RA + +#define DWARF_FRAME_RETURN_COLUMN \ + DWARF_FRAME_REGNUM (RETURN_ADDRESS_REGNUM) + +#define INCOMING_FRAME_SP_OFFSET 0 +#define FRAME_POINTER_CFA_OFFSET(FNDECL) 0 + +/* A C expression whose value is RTL representing the value of the return + address for the frame COUNT steps up from the current frame. */ +#define RETURN_ADDR_RTX(COUNT, FRAME) \ + (0 == COUNT) ? gen_rtx_PLUS (Pmode, gen_rtx_RA, gen_rtx_RA) \ + : const0_rtx + +#define MODES_TIEABLE_P(MODE1, MODE2) \ + (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) + +enum reg_class +{ + NO_REGS, + SHORT_REGS, + LONG_REGS, + NOSP_REGS, + DOUBLE_BASE_REGS, + GENERAL_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES \ + { \ + "NO_REGS", \ + "SHORT_REGS", \ + "LONG_REGS", \ + "NOSP_REGS", \ + "DOUBLE_BASE_REGS", \ + "GENERAL_REGS", \ + "ALL_REGS" \ + } + +#define REG_CLASS_CONTENTS \ + { \ + {0x00000000}, /* NO_REGS */ \ + {0x00000FFF}, /* SHORT_REGS : 0 - 11 */ \ + {0x0000F000}, /* LONG_REGS : 12 - 15 */ \ + {0x00007FFF}, /* NOSP_REGS : 0 - 14 */ \ + {0x0000F555}, /* DOUBLE_BASE_REGS : 2,4,6,8,10 */ \ + {0x0000FFFF}, /* GENERAL_REGS : 0 - 15 */ \ + {0x0000FFFF} /* ALL_REGS : 0 - 15 */ \ + } + +#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true + +#define REGNO_REG_CLASS(REGNO) cr16_regno_reg_class (REGNO) + +#define BASE_REG_CLASS GENERAL_REGS + +#define MODE_BASE_REG_CLASS(MODE) \ + (GET_MODE_SIZE (MODE) <= 4 ? (BASE_REG_CLASS) : (DOUBLE_BASE_REGS)) + +#define INDEX_REG_CLASS LONG_REGS + +#define CR16_REGNO_OK_FOR_BASE_P(REGNO) \ + (((REGNO) < FIRST_PSEUDO_REGISTER) \ + || (reg_renumber && ((unsigned) reg_renumber[REGNO] \ + < FIRST_PSEUDO_REGISTER))) + +/* Use even-numbered reg for 64-bit accesses. */ +#define REGNO_MODE_OK_FOR_BASE_P(REGNO, MODE) \ + (CR16_REGNO_OK_FOR_BASE_P(REGNO) && \ + ((GET_MODE_SIZE (MODE) > 4 && \ + (REGNO) < CR16_FIRST_DWORD_REGISTER) \ + ? (0 == ((REGNO) & 1)) \ + : 1)) + +/* TODO: For now lets not support index addressing mode. */ +#define REGNO_OK_FOR_INDEX_P(REGNO) \ + (((REGNO >= CR16_FIRST_DWORD_REGISTER) \ + && ((REGNO) < FIRST_PSEUDO_REGISTER)) \ + || (reg_renumber \ + && (((unsigned) reg_renumber[REGNO] >= CR16_FIRST_DWORD_REGISTER) \ + && ((unsigned) reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER))) \ + ) + +#define PREFERRED_RELOAD_CLASS(X, CLASS) CLASS + +/* The maximum number of consecutive registers of class CLASS needed to + hold a value of mode MODE. + On the CompactRISC architecture, the size of MODE in words. + The size of MODE in double words for the class LONG_REGS. + + The following check assumes if the class is not LONG_REGS, then + all (NO_REGS, SHORT_REGS, NOSP_REGS and GENERAL_REGS) other classes are + short. We may have to check if this can cause any degradation in + performance. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + (CLASS == LONG_REGS \ + ? (GET_MODE_SIZE (MODE) + CR16_UNITS_PER_DWORD - 1) / CR16_UNITS_PER_DWORD\ + : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Macros to check the range of integers . These macros were used across + the port, majorly in constraints.md, predicates.md files. */ +#define SIGNED_INT_FITS_N_BITS(imm, N) \ + ((((imm) < ((HOST_WIDE_INT) 1 << ((N) - 1))) \ + && ((imm) >= -((HOST_WIDE_INT) 1 << ((N) - 1)))) ? 1 : 0) + +#define UNSIGNED_INT_FITS_N_BITS(imm, N) \ + (((imm) < ((HOST_WIDE_INT) 1 << (N)) && (imm) >= (HOST_WIDE_INT) 0) ? 1 : 0) + +#define IN_RANGE_P(VALUE, LOW, HIGH) \ + ((((HOST_WIDE_INT)(VALUE)) >= (HOST_WIDE_INT)(LOW)) \ + && (((HOST_WIDE_INT)(VALUE)) <= ((HOST_WIDE_INT)(HIGH)))) + +#define IN_RAN(VALUE, LOW, HIGH) \ + (((((HOST_WIDE_INT)(VALUE)) >= (HOST_WIDE_INT)(LOW)) \ + && (((HOST_WIDE_INT)(VALUE)) <= ((HOST_WIDE_INT)(HIGH)))) ? 1 : 0) + +/* This check is for sbit/cbit instruction. */ +#define OK_FOR_Z(OP) \ + ((GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == CONST_INT) \ + || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == REG) \ + || (GET_CODE (OP) == MEM && GET_CODE (XEXP (OP, 0)) == PLUS \ + && GET_CODE (XEXP ((XEXP (OP, 0)), 0)) == REG \ + && GET_CODE (XEXP ((XEXP (OP, 0)), 1)) == CONST_INT)) + +/* Stack layout and calling conventions. */ +#define STACK_GROWS_DOWNWARD + +#define STARTING_FRAME_OFFSET 0 + +#define STACK_POINTER_REGNUM 15 + +#define FRAME_POINTER_REGNUM 13 + +#define ARG_POINTER_REGNUM 12 + +#define STATIC_CHAIN_REGNUM 1 + +#define RETURN_ADDRESS_REGNUM 14 + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +#define ELIMINABLE_REGS \ + { \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ + } + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + do \ + { \ + (OFFSET) = cr16_initial_elimination_offset ((FROM), (TO)); \ + } \ + while (0) + +/* Passing function arguments. */ + +#define ACCUMULATE_OUTGOING_ARGS 0 + +#define PUSH_ARGS 1 + +#define PUSH_ROUNDING(BYTES) (((BYTES) + 1) & ~1) + +#ifndef CUMULATIVE_ARGS +struct cumulative_args +{ + int ints; + int last_parm_in_reg; +}; + +#define CUMULATIVE_ARGS struct cumulative_args +#endif + +/* On the CR16 architecture, Varargs routines should receive their parameters + on the stack. */ + +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + cr16_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME)) + +#define FUNCTION_ARG_REGNO_P(REGNO) cr16_function_arg_regno_p (REGNO) + +/* Generating code for profiling - NOT IMPLEMENTED. */ +#undef FUNCTION_PROFILER +#define FUNCTION_PROFILER(STREAM, LABELNO) \ +{ \ + sorry ("profiler support for CR16"); \ +} + +/* Trampolines for nested functions - NOT SUPPORTED. */ +#define TRAMPOLINE_SIZE 16 + +/* ADDRESSING MODES. */ + +#define CONSTANT_ADDRESS_P(X) \ + (GET_CODE (X) == LABEL_REF \ + || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST \ + || GET_CODE (X) == CONST_INT) + +#define MAX_REGS_PER_ADDRESS 2 + +#define HAVE_POST_INCREMENT 0 +#define HAVE_POST_DECREMENT 0 +#define HAVE_POST_MODIFY_DISP 0 +#define HAVE_POST_MODIFY_REG 0 + +#ifdef REG_OK_STRICT +#define CR16_REG_OK_FOR_BASE_P(X) CR16_REGNO_OK_FOR_BASE_P (REGNO (X)) +#define REG_MODE_OK_FOR_BASE_P(X, MODE) \ + REGNO_MODE_OK_FOR_BASE_P (REGNO(X), MODE) +#define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) +#else /* not REG_OK_STRICT. */ +#define CR16_REG_OK_FOR_BASE_P(X) 1 +#define REG_MODE_OK_FOR_BASE_P(X, MODE) 1 +#define REG_OK_FOR_INDEX_P(X) 1 +#endif /* not REG_OK_STRICT. */ + +/* Assume best case (branch predicted). */ +#define BRANCH_COST(speed_p, predictable_p) 2 + +#define SLOW_BYTE_ACCESS 1 + +/* It is as good or better to call a constant function address than to + call an address kept in a register. */ +#define NO_FUNCTION_CSE + +/* Dividing the output into sections. */ + +#define TEXT_SECTION_ASM_OP "\t.section\t.text" + +#define DATA_SECTION_ASM_OP "\t.section\t.data" + +#define BSS_SECTION_ASM_OP "\t.section\t.bss" + +/* Position independent code (PIC). */ +/* NEAR_PIC for -fpic option. */ + +#define NEAR_PIC 1 + +/* FAR_PIC for -fPIC option. */ + +#define FAR_PIC 2 + +#define PIC_OFFSET_TABLE_REGNUM 12 + +#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X) + +/* Assembler format. */ + +/* Character to start a comment. */ +#define ASM_COMMENT_START "#" + +#define GLOBAL_ASM_OP "\t.globl\t" + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(STREAM, NAME) \ + asm_fprintf (STREAM, "%U%s", (*targetm.strip_name_encoding) (NAME)); + +#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYMBOL) \ + do \ + { \ + const char *rn = XSTR (SYMBOL, 0); \ + assemble_name (STREAM, rn); \ + if (SYMBOL_REF_FUNCTION_P (SYMBOL)) \ + { \ + fprintf ((STREAM), "@c"); \ + } \ + } \ + while (0) + +#undef ASM_APP_ON +#define ASM_APP_ON "#APP\n" + +#undef ASM_APP_OFF +#define ASM_APP_OFF "#NO_APP\n" + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP "\t.section\t.init" + +#undef FINI_SECTION_ASM_OP +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +/* Instruction output. */ + +#define REGISTER_NAMES \ + { \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "r13", "ra", "sp" \ + } + +/* Output of dispatch tables. */ + +/* Revisit. No PC relative case as label expressions are not + properly supported in binutils else we could have done this: + #define CASE_VECTOR_PC_RELATIVE (optimize_size ? 1 : 0). */ +#define CASE_VECTOR_PC_RELATIVE 0 + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + ((GET_MODE (BODY) == QImode) \ + ? fprintf ((FILE), "\t.byte (.L%d-.L%d) >> 1\n", \ + VALUE, REL) \ + : fprintf ((FILE), "\t.word (.L%d-.L%d) >> 1\n", \ + VALUE, REL)) + +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ + asm_fprintf ((STREAM), "\t.long\t.L%d@c\n", (VALUE)) + +/* Alignment in assembler file. */ + +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + asm_fprintf ((STREAM), "\t.align\t%d\n", 1 << (POWER)) + +/* Miscellaneous parameters. */ + +#define CASE_VECTOR_MODE Pmode + +#define MOVE_MAX 4 + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define STORE_FLAG_VALUE 1 + +#define Pmode SImode + +#define FUNCTION_MODE QImode + +/* Define this boolean macro(s) to indicate whether or not your architecture + has (un)conditional branches that can span all of memory. It is used in + conjunction with an optimization that partitions hot and cold basic blocks + into separate sections of the executable. + CR16 contains branch instructions that span whole address space. */ +#define HAS_LONG_COND_BRANCH 1 +#define HAS_LONG_UNCOND_BRANCH 1 + +#endif /* End of GCC_CR16_H. */ diff --git a/gcc-4.9/gcc/config/cr16/cr16.md b/gcc-4.9/gcc/config/cr16/cr16.md new file mode 100644 index 000000000..fb2fc9bc5 --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/cr16.md @@ -0,0 +1,1084 @@ +;; GCC machine description for CR16. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; Contributed by KPIT Cummins Infosystems Limited. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register numbers +(define_constants + [(SP_REGNUM 15); Stack pointer + (RA_REGNUM 14); Return address + ] +) + +;; Predicates & Constraints +(include "predicates.md") +(include "constraints.md") + +;; UNSPEC usage +(define_constants + [(UNSPEC_PIC_ADDR 0) + (UNSPEC_PIC_LOAD_ADDR 1) + (UNSPEC_LIBRARY_OFFSET 2) + (UNSPEC_SH_LIB_PUSH_R12 3) + (UNSPEC_SH_LIB_POP_R12 4) + (UNSPEC_RETURN_ADDR 5) + ] +) + +;; Attributes +(define_attr "length" "" (const_int 2)) + +(define_asm_attributes + [(set_attr "length" "2")] +) + +;; Mode Macro Definitions +(define_mode_iterator CR16IM [QI HI SI]) +(define_mode_iterator LONG [SI SF]) +(define_mode_iterator ALLMTD [QI HI SI SF DI DF]) +(define_mode_iterator DOUBLE [DI DF]) +(define_mode_iterator SHORT [QI HI]) +(define_mode_attr tIsa [(QI "b") (HI "w") (SI "d") (SF "d")]) +(define_mode_attr lImmArith [(QI "4") (HI "4") (SI "6") (SF "6")]) +(define_mode_attr lImmArithD [(QI "4") (HI "4") (SI "6") (SF "6") (DI "12") (DF "12")]) +(define_mode_attr iF [(QI "i") (HI "i") (SI "i") (SF "F")]) +(define_mode_attr iFD [(DI "i") (DF "F")]) +(define_mode_attr LL [(QI "L") (HI "L")]) +(define_mode_attr shImmBits [(QI "3") (HI "4") (SI "5")]) + +; In QI mode we push 2 bytes instead of 1 byte. +(define_mode_attr pushCnstr [(QI "X") (HI "<") (SI "<") (SF "<") (DI "<") (DF "<")]) + +; tpush will be used to generate the 'number of registers to push' in the +; push instruction. +(define_mode_attr tpush [(QI "1") (HI "1") (SI "2") (SF "2") (DI "4") (DF "4")]) + +;; Code Macro Definitions +(define_code_attr sIsa [(sign_extend "") (zero_extend "u")]) +(define_code_attr sPat [(sign_extend "s") (zero_extend "u")]) +(define_code_attr szPat [(sign_extend "") (zero_extend "zero_")]) +(define_code_attr szIsa [(sign_extend "x") (zero_extend "z")]) + +(define_code_iterator sz_xtnd [ sign_extend zero_extend]) +(define_code_iterator any_cond [eq ne gt gtu lt ltu ge geu le leu]) +(define_code_iterator plusminus [plus minus]) + +(define_code_attr plusminus_insn [(plus "add") (minus "sub")]) +(define_code_attr plusminus_flag [(plus "PLUS") (minus "MINUS")]) +(define_code_attr comm [(plus "%") (minus "")]) + +(define_code_iterator any_logic [and ior xor]) +(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) +(define_code_attr any_logic_insn [(and "and") (ior "ior") (xor "xor")]) +(define_code_attr any_logic_flag [(and "AND") (ior "IOR") (xor "XOR")]) + +(define_mode_iterator QH [QI HI]) +(define_mode_attr qh [(QI "qi") (HI "hi")]) +(define_mode_attr QHsz [(QI "2,2,2") (HI "2,2,4")]) +(define_mode_attr QHsuffix [(QI "b") (HI "w")]) + + +;; Function Prologue and Epilogue +(define_expand "prologue" + [(const_int 0)] + "" + { + cr16_expand_prologue (); + DONE; + } +) + +(define_insn "push_for_prologue" + [(set (reg:SI SP_REGNUM) + (minus:SI (reg:SI SP_REGNUM) + (match_operand:SI 0 "immediate_operand" "i")))] + "reload_completed" + { + return cr16_prepare_push_pop_string (0); + } + [(set_attr "length" "4")] +) + +(define_expand "epilogue" + [(return)] + "" + { + cr16_expand_epilogue (); + DONE; + } +) + +(define_insn "pop_and_popret_return" + [(set (reg:SI SP_REGNUM) + (plus:SI (reg:SI SP_REGNUM) + (match_operand:SI 0 "immediate_operand" "i"))) + (use (reg:SI RA_REGNUM)) + (return)] + "reload_completed" + { + return cr16_prepare_push_pop_string (1); + } + [(set_attr "length" "4")] +) + +(define_insn "popret_RA_return" + [(use (reg:SI RA_REGNUM)) + (return)] + "reload_completed" + "popret\tra" + [(set_attr "length" "2")] +) + +;; Arithmetic Instruction Patterns + +;; Addition-Subtraction "adddi3/subdi3" insns. +(define_insn "di3" + [(set (match_operand:DI 0 "register_operand" "=r") + (plusminus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "register_operand" "r")))] + "" + { + return cr16_emit_add_sub_di (operands, ); + }) + +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (plus:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0") + (match_operand:SI 2 "reg_si_int_operand" "r,M,N,O,i")))] + "" + "addd\t%2, %0" + [(set_attr "length" "2,2,4,4,6")] +) + +;; Addition-Subtraction "addhi3/subhi3" insns. +(define_insn "hi3" + [(set (match_operand:HI 0 "register_operand" "=c,c,c") + (plusminus:HI (match_operand:HI 1 "register_operand" "0,0,0") + (match_operand:HI 2 "reg_hi_int_operand" "c,M,N")))] + "" + "w\t%2, %0" + [(set_attr "length" "2,2,4")] +) + +;; Addition-Subtraction "addqi3/subqi3" insns. +(define_insn "qi3" + [(set (match_operand:QI 0 "register_operand" "=c,c") + (plusminus:QI (match_operand:QI 1 "register_operand" "0,0") + (match_operand:QI 2 "reg_qi_int_operand" "c,M")))] + "" + "b\t%2, %0" + [(set_attr "length" "2,2")] +) + +;; Subtract Instruction +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (minus:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "reg_si_int_operand" "r,i")))] + "" + "subd\t%2, %0" + [(set_attr "length" "4,6")] +) + +;; Multiply and Accumulate Instructions "smachisi3/umachisi3" +(define_insn "maddhisi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI + (mult:SI (sz_xtnd:SI (match_operand:HI 1 "register_operand" "r")) + (sz_xtnd:SI (match_operand:HI 2 "register_operand" "r"))) + (match_operand:SI 3 "register_operand" "0")))] + "TARGET_MAC" + "macw\t%1, %2, %0" + [(set_attr "length" "2")] +) + +;; Multiply Instructions +(define_insn "mulhi3" + [(set (match_operand:HI 0 "register_operand" "=c,c,c") + (mult:HI (match_operand:HI 1 "register_operand" "%0,0,0") + (match_operand:HI 2 "reg_or_int_operand" "c,M,N")))] + "" + "mulw\t%2, %0" + [(set_attr "length" "2,2,4")] +) + +(define_insn "mulqihi3" + [(set (match_operand:HI 0 "register_operand" "=c") + (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "c"))))] + "" + "mulsb\t%2, %0" + [(set_attr "length" "2")] +) + +;; Bit Set/Clear Instructions +(define_expand "insv" + [(set (zero_extract (match_operand 0 "memory_operand" "") + (match_operand 1 "immediate_operand" "") + (match_operand 2 "immediate_operand" "")) + (match_operand 3 "immediate_operand" ""))] + "TARGET_BIT_OPS" + { + if (INTVAL (operands[1]) != 1) + FAIL; + if (INTVAL (operands[2]) < 0 || INTVAL (operands[2]) > 15) + FAIL; + if (INTVAL (operands[3]) == 1) + { + if (GET_MODE (operands[0]) == QImode) + { + emit_insn (gen_set_bitqi (operands[0], operands[2])); + DONE; + } + else if (GET_MODE (operands[0]) == HImode) + { + emit_insn (gen_set_bithi (operands[0], operands[2])); + DONE; + } + } + if (INTVAL (operands[3]) == 0) + { + if (GET_MODE (operands[0]) == QImode) + { + emit_insn (gen_clr_bitqi (operands[0], operands[2])); + DONE; + } + else if (GET_MODE (operands[0]) == HImode) + { + emit_insn (gen_clr_bithi (operands[0], operands[2])); + DONE; + } + } + } +) + +(define_insn "set_bit" + [(set (zero_extract:SHORT (match_operand:SHORT 0 "memory_operand" "+m") + (const_int 1) + (match_operand 1 "immediate_operand" "i")) + (const_int 1))] + "TARGET_BIT_OPS" + "sbit\t%1,%0" + [(set_attr "length" "2")] +) + +(define_insn "clr_bit" + [(set (zero_extract:SHORT (match_operand:SHORT 0 "memory_operand" "+m") + (const_int 1) + (match_operand 1 "immediate_operand" "i")) + (const_int 0))] + "TARGET_BIT_OPS" + "cbit\t%1,%0" + [(set_attr "length" "2")] +) + +(define_insn "set_bit_mem" + [(set (match_operand:SHORT 0 "bit_operand" "=m") + (ior:SHORT (match_dup 0) + (match_operand:SHORT 1 "one_bit_operand" "i")) + )] + "TARGET_BIT_OPS" + "sbit\t$%s1,%0" + [(set_attr "length" "2")] +) + +(define_insn "clear_bit_mem" + [(set (match_operand:SHORT 0 "bit_operand" "=m") + (and:SHORT (match_dup 0) + (match_operand:SHORT 1 "rev_one_bit_operand" "i")) + )] + "TARGET_BIT_OPS" + "cbit\t$%r1,%0" + [(set_attr "length" "2")] +) + +;; Logical Instructions - and/ior/xor "anddi3/iordi3/xordi3" +(define_insn "di3" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_logic:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "register_operand" "r")))] + "" + { + return cr16_emit_logical_di (operands, ); + }) + +; Logical and/ior/xor "andsi3/iorsi3/xorsi3" +(define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (any_logic:SI (match_operand:SI 1 "register_operand" "%0,0,0,0") + (match_operand:SI 2 "reg_si_int_operand" "r,M,N,i")))] + "" + "d\t%2, %0" + [(set_attr "length" "2,2,4,6")] +) + +; Logical and/ior/xor in HImode "andhi3/iorhi3/xorhi3" +; Logical and/ior/xor in QImode "andqi3/iorqi3/xorqi3" +(define_insn "3" + [(set (match_operand:QH 0 "register_operand" "=c,c,c") + (any_logic:QH (match_operand:QH 1 "register_operand" "%0,0,0") + (match_operand:QH 2 "reg_hi_int_operand" "c,M,N")))] + "" + "\t%2, %0" + [(set_attr "length" "")] +) + +;; Sign and Zero Extend Instructions +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sz_xtnd:SI (match_operand:HI 1 "register_operand" "r")))] + "" + "movw\t%1, %0" + [(set_attr "length" "4")] +) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (sz_xtnd:HI (match_operand:QI 1 "register_operand" "r")))] + "" + "movb\t%1, %0" + [(set_attr "length" "4")] +) + +;; One's Complement +(define_insn "one_cmpldi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (not:DI (match_operand:DI 1 "register_operand" "0")))] + "" + { + rtx xoperand ; + int reg0 = REGNO (operands[0]); + + xoperand = gen_rtx_REG (SImode, reg0 + 2); + output_asm_insn ("xord\t$-1, %0", operands); + output_asm_insn ("xord\t$-1, %0", &xoperand); + return "" ; + } + [(set_attr "length" "12")] +) + +(define_insn "one_cmpl2" + [(set (match_operand:CR16IM 0 "register_operand" "=r") + (not:CR16IM (match_operand:CR16IM 1 "register_operand" "0")))] + "" + "xor\t$-1, %0" + [(set_attr "length" "2")] +) + +;; Arithmetic Left and Right Shift Instructions +(define_insn "ashlqi3" + [(set (match_operand:QI 0 "register_operand" "=c,c") + (ashift:QI (match_operand:QI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "c,I")))] + "" + "ashub\t%2, %0" + [(set_attr "length" "2,2")] +) + +(define_insn "ashlhi3" + [(set (match_operand:HI 0 "register_operand" "=c,c") + (ashift:HI (match_operand:HI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "c,J")))] + "" + "ashuw\t%2, %0" + [(set_attr "length" "2,2")] +) + +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ashift:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "r,K")))] + "" + "ashud\t%2, %0" + [(set_attr "length" "2,2")] +) + +(define_expand "ashr3" + [(set (match_operand:CR16IM 0 "register_operand" "") + (ashiftrt:CR16IM (match_operand:CR16IM 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT) + { + /* If the constant is not in range, try placing it in a reg */ + if (!UNSIGNED_INT_FITS_N_BITS(INTVAL (operands[2]),)) + operands[2] = copy_to_mode_reg(QImode, operands[2]); + } + + if (GET_CODE (operands[2]) != CONST_INT) + operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2])); + } +) + +(define_insn "ashrqi3_imm_insn" + [(set (match_operand:QI 0 "register_operand" "=c") + (ashiftrt:QI (match_operand:QI 1 "register_operand" "0") + (match_operand:QI 2 "shift_qi_imm_operand" "i")))] + "" + "ashub\t$%n2, %0" + [(set_attr "length" "2")] +) + +(define_insn "ashrhi3_imm_insn" + [(set (match_operand:HI 0 "register_operand" "=c") + (ashiftrt:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "shift_hi_imm_operand" "i")))] + "" + "ashuw\t$%n2, %0" + [(set_attr "length" "2")] +) + +(define_insn "ashrsi3_imm_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "shift_si_imm_operand" "i")))] + "" + "ashud\t$%n2, %0" + [(set_attr "length" "2")] +) + +(define_insn "ashrqi3_neg_insn" + [(set (match_operand:QI 0 "register_operand" "=c") + (ashiftrt:QI (match_operand:QI 1 "register_operand" "0") + (neg:QI (match_operand:QI 2 "register_operand" "c"))))] + "" + "ashub\t%2,%0" + [(set_attr "length" "2")] +) + +(define_insn "ashrhi3_neg_insn" + [(set (match_operand:HI 0 "register_operand" "=c") + (ashiftrt:HI (match_operand:HI 1 "register_operand" "0") + (neg:QI (match_operand:QI 2 "register_operand" "c"))))] + "" + "ashuw\t%2,%0" + [(set_attr "length" "2")] +) + +(define_insn "ashrdi3_neg_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (neg:QI (match_operand:QI 2 "register_operand" "r"))))] + "" + "ashud\t%2,%0" + [(set_attr "length" "2")] +) + +(define_expand "lshr3" + [(set (match_operand:CR16IM 0 "register_operand" "") + (lshiftrt:CR16IM (match_operand:CR16IM 1 "register_operand" "") + (match_operand:QI 2 "reg_or_int_operand" "")))] + "" + { + if (GET_CODE (operands[2]) == CONST_INT) + { + /* If the constant is not in range, try placing it in a reg */ + if (!UNSIGNED_INT_FITS_N_BITS(INTVAL (operands[2]),)) + operands[2] = copy_to_mode_reg(QImode, operands[2]); + } + + if (GET_CODE (operands[2]) != CONST_INT) + operands[2] = gen_rtx_NEG (QImode, negate_rtx (QImode, operands[2])); + } +) + +(define_insn "lshrqi3_imm_insn" + [(set (match_operand:QI 0 "register_operand" "=c") + (lshiftrt:QI (match_operand:QI 1 "register_operand" "0") + (match_operand:QI 2 "shift_qi_operand" "Q")))] + "" + "lshb\t$%n2, %0" + [(set_attr "length" "2")] +) + +(define_insn "lshrhi3_imm_insn" + [(set (match_operand:HI 0 "register_operand" "=c") + (lshiftrt:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "shift_hi_operand" "R")))] + "" + "lshw\t$%n2, %0" + [(set_attr "length" "2")] +) + +(define_insn "lshrsi3_imm_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "shift_si_operand" "S")))] + "" + "lshd\t$%n2, %0" + [(set_attr "length" "2")] +) + +(define_insn "lshrqi3_neg_insn" + [(set (match_operand:QI 0 "register_operand" "=c") + (lshiftrt:QI (match_operand:QI 1 "register_operand" "0") + (neg:QI (match_operand:QI 2 "register_operand" "c"))))] + "" + "lshb\t%2,%0" + [(set_attr "length" "2")] +) + +(define_insn "lshrhi3_neg_insn" + [(set (match_operand:HI 0 "register_operand" "=c") + (lshiftrt:HI (match_operand:HI 1 "register_operand" "0") + (neg:QI (match_operand:QI 2 "register_operand" "c"))))] + "" + "lshw\t%2,%0" + [(set_attr "length" "2")] +) + +(define_insn "lshrsi3_neg_insn" + [(set (match_operand:SI 0 "register_operand" "=r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (neg:QI (match_operand:QI 2 "register_operand" "r"))))] + "" + "lshd\t%2,%0" + [(set_attr "length" "2")] +) + +;; Move Instructions + +;; Move any non-immediate operand 0 to a general operand 1. +;; This applies only before starting the reload process +;; Operand 0 is not a register operand of type mode MODE +;; If Operand 0 is a push operand of type mode MODE +;; then, if Operand 1 is a non-SP register +;; then, Operand 1 = copy_to_mode_reg (mode, Operand 1) +;; endif +;; else +;; if Operand 1 is either register or 4-bit immediate constant +;; then, Operand 1 = copy_to_mode_reg (mode, Operand 1) +;; endif +;; endif +;; +;; What does copy_to_mode_reg (mode, rtx val) do? +;; Copy the value into new temp reg and return the reg where the +;; mode of the new reg is always mode MODE when value is constant +;; +;; Why should copy_to_mode_reg be called? +;; All sorts of move are nor supported by CR16. Therefore, +;; when unsupported move is encountered, the additional instructions +;; will be introduced for the purpose. +;; +;; A new move insn is inserted for Op 1 when one of the following +;; conditions is met. +;; Case 1: Op 0 is push_operand +;; Op 1 is SP register +;; +;; Case 2: Op 0 is not push_operand +;; Op 1 is neither register nor unsigned 4-bit immediate + +(define_expand "mov" + [(set (match_operand:ALLMTD 0 "nonimmediate_operand" "") + (match_operand:ALLMTD 1 "general_operand" ""))] + "" + { + if (!(reload_in_progress || reload_completed)) + { + /* Only if Op0 is a register operand. */ + if (!register_operand (operands[0], mode)) + { + if (push_operand (operands[0], mode)) + { + /* Use copy_to_mode_reg only if the register needs + to be pushed is SP as CR16 does not support pushing SP. */ + if (!nosp_reg_operand (operands[1], mode)) + operands[1] = copy_to_mode_reg (mode, operands[1]); + } + else + { + /* Use copy_to_mode_reg if op1 is not register operand + subject to conditions inside. */ + if (!register_operand (operands[1], mode)) + { + /* CR16 does not support moving immediate to SI or SF + type memory. */ + if (mode == SImode || mode == SFmode || + mode == DImode || mode == DFmode) + operands[1] = copy_to_mode_reg (mode, operands[1]); + else + /* moving imm4 is supported by CR16 instruction. */ + if (!u4bits_operand (operands[1], mode)) + operands[1] = copy_to_mode_reg (mode, operands[1]); + } + } + } + + /* If operand-1 is a symbol, convert it into a BRO or GOT Format. */ + if (flag_pic && ! legitimate_pic_operand_p (operands[1])) + { + operands[1] = legitimize_pic_address (operands[1], mode, 0); + } + } + } +) + +; ALLMT : QI,HI,SI,SF +; pushCnstr : Push constraints +; QI : X +; HI,SI,SF,DI,DF : < +; b : All non-sp registers +; tpush : Push count +; QI,HI : 1 +; SI,SF : 2 +; DI,DF : 4 +(define_insn "push_internal" + [(set (match_operand:ALLMTD 0 "push_operand" "=") + (match_operand:ALLMTD 1 "nosp_reg_operand" "b"))] + "" + "push\t$,%p1" + [(set_attr "length" "2")] +) + +; (DI, DF) move +(define_insn "*mov_double" + [(set (match_operand:DOUBLE 0 "nonimmediate_operand" "=r, r, r, m") + (match_operand:DOUBLE 1 "general_operand" "r, , m, r"))] + "register_operand (operands[0], DImode) + || register_operand (operands[0], DFmode) + || register_operand (operands[1], DImode) + || register_operand (operands[1], DFmode)" + { + if (0 == which_alternative) { + rtx xoperands[2] ; + int reg0 = REGNO (operands[0]); + int reg1 = REGNO (operands[1]); + + xoperands[0] = gen_rtx_REG (SImode, reg0 + 2); + xoperands[1] = gen_rtx_REG (SImode, reg1 + 2); + if ((reg1 + 2) != reg0) + { + output_asm_insn ("movd\t%1, %0", operands); + output_asm_insn ("movd\t%1, %0", xoperands); + } + else + { + output_asm_insn ("movd\t%1, %0", xoperands); + output_asm_insn ("movd\t%1, %0", operands); + }} + + else if (1 == which_alternative) { + rtx lo_operands[2] ; + rtx hi_operands[2] ; + + lo_operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + hi_operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 2); + lo_operands[1] = simplify_gen_subreg (SImode, operands[1], + VOIDmode == GET_MODE (operands[1]) + ? DImode : GET_MODE (operands[1]), 0); + hi_operands[1] = simplify_gen_subreg (SImode, operands[1], + VOIDmode == GET_MODE (operands[1]) + ? DImode : GET_MODE (operands[1]), 4); + output_asm_insn ("movd\t%1, %0", lo_operands); + output_asm_insn ("movd\t%1, %0", hi_operands);} + + else if (2 == which_alternative) { + rtx xoperands[2] ; + int reg0 = REGNO (operands[0]), reg1 = -2 ; + rtx addr ; + + if (MEM_P (operands[1])) + addr = XEXP (operands[1], 0); + else + addr = NULL_RTX ; + switch (GET_CODE (addr)) + { + case REG: + case SUBREG: + reg1 = REGNO (addr); + break ; + case PLUS: + switch (GET_CODE (XEXP (addr, 0))) { + case REG: + case SUBREG: + reg1 = REGNO (XEXP (addr, 0)); + break ; + case PLUS: + reg1 = REGNO (XEXP (XEXP (addr, 0), 0)); + break ; + default: + inform (DECL_SOURCE_LOCATION (cfun->decl), "unexpected expression; addr:"); + debug_rtx (addr); + inform (DECL_SOURCE_LOCATION (cfun->decl), "operands[1]:"); + debug_rtx (operands[1]); + inform (DECL_SOURCE_LOCATION (cfun->decl), "generated code might now work\n"); + break ;} + break ; + default: + break ; + } + + xoperands[0] = gen_rtx_REG (SImode, reg0 + 2); + xoperands[1] = offset_address (operands[1], GEN_INT (4), 2); + gcc_assert ((reg0 + 1) != reg1); + if (reg0 != reg1 && (reg1 + 1) != reg0) + { + output_asm_insn ("loadd\t%1, %0", operands); + output_asm_insn ("loadd\t%1, %0", xoperands); + } + else + { + output_asm_insn ("loadd\t%1, %0", xoperands); + output_asm_insn ("loadd\t%1, %0", operands); + }} + else + { + rtx xoperands[2] ; + xoperands[0] = offset_address (operands[0], GEN_INT (4), 2); + xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 2); + output_asm_insn ("stord\t%1, %0", operands); + output_asm_insn ("stord\t%1, %0", xoperands); + } + return "" ; + } + [(set_attr "length" "4, , , ")] +) + +; All long (SI, SF) register move, load and store operations +; The print_operand will take care of printing the register pair +; when mode is SI/SF and register is in SHORT_REGS +(define_insn "*mov_long" + [(set (match_operand:LONG 0 "nonimmediate_operand" "=r, r, r, m") + (match_operand:LONG 1 "general_operand" "r, , m, r"))] + "register_operand (operands[0], mode) + || register_operand (operands[1], mode)" + "@ + mov\t%1, %0 + mov\t%1, %0 + load\t%1, %0 + stor\t%1, %0" + [(set_attr "length" "2,,,")] +) + +;; All short (QI, HI) register move, load and store operations +(define_insn "*mov_short" + [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r, r, r, m, m") + (match_operand:SHORT 1 "general_operand" "r, , m, r, "))] + "(register_operand (operands[0], mode)) + || (store_operand (operands[0], mode) + && (register_operand (operands[1], mode) + || u4bits_operand (operands[1], mode)))" + "@ + mov\t%1, %0 + mov\t%1, %0 + load\t%1, %0 + stor\t%1, %0 + stor\t%1, %0" + [(set_attr "length" "2,,,,")] +) + +;; Compare Instructions +; Instruction generated compares the operands in reverse order +; Therefore, while printing the asm, the reverse of the +; compare condition shall be printed. +(define_insn "cbranch4" + [(set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(match_operand:CR16IM 1 "register_operand" "r,r") + (match_operand:CR16IM 2 "nonmemory_operand" "r,n")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (cc0))] + "" + "cmp\t%2, %1\;b%d0\t%l3" + [(set_attr "length" "6,6")] +) + +(define_expand "cmp" + [(parallel [(set (cc0) + (compare (match_operand:CR16IM 0 "register_operand" "") + (match_operand:CR16IM 1 "nonmemory_operand" ""))) + (clobber (match_scratch:HI 2 "=r"))] ) ] + "" + "") + +;; Scond Instructions +(define_expand "cstore4" + [(set (cc0) + (compare (match_operand:CR16IM 2 "register_operand" "") + (match_operand:CR16IM 3 "nonmemory_operand" ""))) + (set (match_operand:HI 0 "register_operand") + (match_operator:HI 1 "ordered_comparison_operator" + [(cc0) (const_int 0)]))] + "" + "" +) + +(define_insn "*cmp_insn" + [(set (cc0) + (compare (match_operand:CR16IM 0 "register_operand" "r,r") + (match_operand:CR16IM 1 "nonmemory_operand" "r,n")))] + "" + "cmp\t%1, %0" + [(set_attr "length" "2,4")] +) + +(define_insn "sCOND_internal" + [(set (match_operand:HI 0 "register_operand" "=r") + (match_operator:HI 1 "ordered_comparison_operator" + [(cc0) (const_int 0)]))] + "" + "s%d1\t%0" + [(set_attr "length" "2")] +) + +;; Jumps and Branches +(define_insn "indirect_jump_return" + [(set (pc) + (reg:SI RA_REGNUM)) + (return)] + "reload_completed" + "jump\t (ra)" + [(set_attr "length" "2")] +) + +(define_insn "jump_return" + [(unspec:SI [(const_int 0)] UNSPEC_RETURN_ADDR) + (return)] + "reload_completed" + "jump\t(ra)" + [(set_attr "length" "2")] +) + +(define_insn "indirect_jump" + [(set (pc) + (match_operand:SI 0 "reg_or_sym_operand" "r,i"))] + "" + "@ + jump\t%0 + br\t%a0" + [(set_attr "length" "2,6")] +) + +(define_insn "interrupt_return" + [(unspec_volatile [(const_int 0)] 0) + (return)] + "" + { + return cr16_prepare_push_pop_string (1); + } + [(set_attr "length" "14")] +) + +(define_insn "jump_to_imm" + [(set (pc) + (match_operand 0 "jump_imm_operand" "i"))] + "" + "br\t%c0" + [(set_attr "length" "6")] +) + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "br\t%l0" + [(set_attr "length" "6")] +) + +;; Table Jump +(define_insn "tablejump" + [(set (pc) + (match_operand:SI 0 "register_operand" "r")) + (use (label_ref:SI (match_operand 1 "" "")))] + "!flag_pic" + "jump\t%0" + [(set_attr "length" "2")] +) + +;; Call Instructions +(define_expand "call" + [(call (match_operand:QI 0 "memory_operand" "") + (match_operand 1 "" ""))] + "" + { + if (flag_pic && ! legitimate_pic_operand_p (operands[0])) + { + operands[0] = gen_const_mem (QImode, + legitimize_pic_address (XEXP (operands[0], 0), Pmode, 0)); + emit_call_insn (gen_cr16_call (operands[0], operands[1])); + } + else + emit_call_insn (gen_cr16_call (operands[0], operands[1])); + DONE; + } +) + +(define_expand "cr16_call" + [(parallel + [(call (match_operand:QI 0 "memory_operand" "") + (match_operand 1 "" "")) + (clobber (reg:SI RA_REGNUM))])] + "" + "" +) + +(define_insn "cr16_call_insn_branch_pic" + [(call (mem:QI (match_operand:SI 0 "call_imm_operand" "i")) + (match_operand 1 "" "")) + (clobber (match_operand:SI 2 "register_operand" "+r"))] + "flag_pic == FAR_PIC" + { + if (GET_CODE (operands[0]) != CONST_INT) + return "loadd\t%g0, %2 \n\tjal %2"; + else + return "jal %2"; + } + [(set_attr "length" "8")] +) + +(define_insn "cr16_call_insn_branch" + [(call (mem:QI (match_operand:SI 0 "call_imm_operand" "i")) + (match_operand 1 "" "")) + (clobber (match_operand:SI 2 "register_operand" "+r"))] + "flag_pic == 0 || flag_pic == NEAR_PIC" + { + /* Print the immediate address for bal + 'b' is used instead of 'a' to avoid compiler calling + the GO_IF_LEGITIMATE_ADDRESS which cannot + perform checks on const_int code addresses as it + assumes all const_int are data addresses. + */ + if (GET_CODE (operands[0]) != CONST_INT) + return "bal (ra), %a0"; + else + operands[4] = GEN_INT ((INTVAL (operands[0]))>>1); + return "movd\t%g4,\t(r1,r0)\n\tjal\t(r1,r0)"; + } + [(set_attr "length" "6")] +) + +(define_insn "cr16_call_insn_jump" + [(call (mem:QI (match_operand:SI 0 "register_operand" "r")) + (match_operand 1 "" "")) + (clobber (match_operand:SI 2 "register_operand" "+r"))] + "" + "jal\t%0" + [(set_attr "length" "2")] +) + +;; Call Value Instructions + +(define_expand "call_value" + [(set (match_operand 0 "general_operand" "") + (call (match_operand:QI 1 "memory_operand" "") + (match_operand 2 "" "")))] + "" + { + if (flag_pic && !legitimate_pic_operand_p (operands[1])) + { + operands[1] = gen_const_mem (QImode, + legitimize_pic_address (XEXP (operands[1], 0), Pmode, 0)); + emit_call_insn (gen_cr16_call_value (operands[0], operands[1], operands[2])); + } + else + emit_call_insn (gen_cr16_call_value (operands[0], operands[1], operands[2])); + DONE; + } +) + +(define_expand "cr16_call_value" + [(parallel + [(set (match_operand 0 "general_operand" "") + (call (match_operand 1 "memory_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI RA_REGNUM))])] + "" + "" +) + +(define_insn "cr16_call_value_insn_branch_pic" + [(set (match_operand 0 "" "=g") + (call (mem:QI (match_operand:SI 1 "call_imm_operand" "i")) + (match_operand 2 "" ""))) + (clobber (match_operand:SI 3 "register_operand" "+r"))] + "flag_pic == FAR_PIC" + { + if (GET_CODE (operands[1]) != CONST_INT) + return "loadd\t%g1, %3 \n\tjal %3"; + else + return "jal %3"; + } + [(set_attr "length" "8")] +) + +(define_insn "cr16_call_value_insn_branch" + [(set (match_operand 0 "" "=g") + (call (mem:QI (match_operand:SI 1 "call_imm_operand" "i")) + (match_operand 2 "" ""))) + (clobber (match_operand:SI 3 "register_operand" "+r"))] + "flag_pic == 0 || flag_pic == NEAR_PIC" + { + /* Print the immediate address for bal + 'b' is used instead of 'a' to avoid compiler calling + the GO_IF_LEGITIMATE_ADDRESS which cannot + perform checks on const_int code addresses as it + assumes all const_int are data addresses. + */ + if (GET_CODE (operands[1]) != CONST_INT) + return "bal (ra), %a1"; + else + { + operands[4] = GEN_INT ((INTVAL (operands[1]))>>1); + return "movd\t%g4,\t(r1,r0)\n\tjal\t(r1,r0)"; + } + } + [(set_attr "length" "6")] +) + + +(define_insn "cr16_call_value_insn_jump" + [(set (match_operand 0 "" "=g") + (call (mem:QI (match_operand:SI 1 "register_operand" "r")) + (match_operand 2 "" ""))) + (clobber (match_operand:SI 3 "register_operand" "+r"))] + "" + "jal\t%1" + [(set_attr "length" "2")] +) + + +;; Nop +(define_insn "nop" + [(const_int 0)] + "" + "nop\t" +) + +;; PIC +/* When generating pic, we need to load the symbol offset into a register. + So that the optimizer does not confuse this with a normal symbol load + we use an unspec. The offset will be loaded from a constant pool entry, + since that is the only type of relocation we can use. */ + +(define_insn "unspec_bro_addr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "" "")] UNSPEC_PIC_ADDR))] + "" + "movd \t%f1, %0" + [(set_attr "length" "4")] +) + +(define_insn "unspec_got_addr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "" "")] UNSPEC_PIC_LOAD_ADDR))] + "" + "loadd \t%g1, %0" + [(set_attr "length" "6")] +) diff --git a/gcc-4.9/gcc/config/cr16/cr16.opt b/gcc-4.9/gcc/config/cr16/cr16.opt new file mode 100644 index 000000000..e4433cb5d --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/cr16.opt @@ -0,0 +1,51 @@ +; Options for the National Semiconductor CR16 port of the compiler. +; Copyright (C) 2012-2014 Free Software Foundation, Inc. +; Contributed by KPIT Cummins Infosystems Limited. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License as published +; by the Free Software Foundation; either version 3, or (at your +; option) any later version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +msim +Target +-msim Use simulator runtime + +mbit-ops +Target Report Mask(BIT_OPS) +Generate SBIT, CBIT instructions + +mmac +Target Report Mask(MAC) +Support multiply accumulate instructions + +mdebug-addr +Target RejectNegative Var(TARGET_DEBUG_ADDR) Undocumented + +mdata-model= +Target RejectNegative JoinedOrMissing Var(cr16_data_model) +Treat data references as near, far or medium. medium is default + +mcr16c +Target RejectNegative Mask(CR16C) +Generate code for CR16C architecture + +mcr16cplus +Target RejectNegative InverseMask(CR16C,CR16CP) +Generate code for CR16C+ architecture (Default) + +mint32 +Target RejectNegative Mask(INT32) +Treat integers as 32-bit. + diff --git a/gcc-4.9/gcc/config/cr16/predicates.md b/gcc-4.9/gcc/config/cr16/predicates.md new file mode 100644 index 000000000..d998df9da --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/predicates.md @@ -0,0 +1,225 @@ +;; Predicates of machine description for CR16. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; Contributed by KPIT Cummins Infosystems Limited. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Predicates + +;; Predicates for sbit/cbit instructions +;; bit operand used for the generation of bit insn generation +(define_predicate "bit_operand" + (match_code "mem") +{ + return ((GET_CODE (op) == MEM && OK_FOR_Z (op))); +}) + +;; Unsigned 4-bits constant int or double value. +(define_predicate "u4bits_operand" + (match_code "const_int,const_double") +{ + if (GET_CODE (op) == CONST_DOUBLE) + return cr16_const_double_ok (op); + return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 4)) ? 1 : 0; +}) + +;; Operand is a constant integer where +;; only one bit is set to 1. +(define_predicate "one_bit_operand" + (match_code "const_int") +{ + unsigned int val; + + val = INTVAL (op); + if (mode == QImode) + val &= 0xff; + else if (mode == HImode) + val &= 0xffff; + else + gcc_unreachable(); + + if (val != 0) + return (val & (val - 1)) == 0; /* true if only one bit is set. */ + else + return 0; +}) + +;; Operand is a constant integer where +;; only one bit is set to 0. +(define_predicate "rev_one_bit_operand" + (match_code "const_int") +{ + unsigned int val; + + val = ~INTVAL (op); /* Invert and use. */ + if (mode == QImode) + val &= 0xff; + else if (mode == HImode) + val &= 0xffff; + else + gcc_unreachable(); + + if (val != 0) + return (val & (val - 1)) == 0; /* true if only one bit is set. */ + else + return 0; +}) + +;; Predicates for shift instructions +;; Immediate operand predicate for count in shift operations. +;; Immediate shall be 3-bits in case operand to be operated on +;; is a qi mode operand. +(define_predicate "shift_qi_imm_operand" + (match_code "const_int") +{ + return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 3)) ? 1 : 0; +}) + +;; Immediate shall be 4-bits in case operand to be operated on +;; is a hi mode operand. +(define_predicate "shift_hi_imm_operand" + (match_code "const_int") +{ + return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 4)) ? 1 : 0; +}) + +;; Immediate shall be 3-bits in case operand to be operated on +;; is a si mode operand. +(define_predicate "shift_si_imm_operand" + (match_code "const_int") +{ + return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 5)) ? 1 : 0; +}) + +;; Predicates for jump/call instructions +;; Jump immediate cannot be more than 24-bits +(define_predicate "jump_imm_operand" + (match_code "const_int") +{ + return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 24)) ? 1 : 0; +}) + +;; Call immediate cannot be more than 24-bits +(define_predicate "call_imm_operand" + (match_operand 0 "immediate_operand") +{ + if (GET_CODE (op) != CONST_INT) return 1; + return (UNSIGNED_INT_FITS_N_BITS(INTVAL (op), 24)) ? 1 : 0; +}) + +;; Operand is register or 4-bit immediate operand +(define_predicate "reg_or_u4bits_operand" + (ior (match_operand 0 "u4bits_operand") + (match_operand 0 "register_operand"))) + +;; Operand is a register or symbol reference +(define_predicate "reg_or_sym_operand" + (ior (match_code "symbol_ref") + (match_operand 0 "register_operand"))) + +;; Operand is a non stack pointer register +(define_predicate "nosp_reg_operand" + (and (match_operand 0 "register_operand") + (match_test "REGNO (op) != SP_REGNUM"))) + +(define_predicate "hard_reg_operand" + (and (match_operand 0 "register_operand") + (match_test "REGNO (op) <= 15"))) + +;; Operand is a memory reference and +;; not a push operand. +(define_predicate "store_operand" + (and (match_operand 0 "memory_operand") + (not (match_operand 0 "push_operand")))) + +;; Helper predicate +(define_predicate "reg_or_int_operand" + (ior (match_code "const_int") + (match_operand 0 "register_operand"))) + +;; +;; +;; Atithmetic/logical predicates + +;; QI Helper +(define_predicate "arith_qi_operand" + (match_code "const_int") +{ + return (IN_RAN(INTVAL (op), 0, 15) && ((INTVAL (op) != 9) + || (INTVAL (op) != 11))) ? 1 : 0 ; +}) + +;;QI Reg, subreg(reg) or const_int. +(define_predicate "reg_qi_int_operand" + (ior (match_operand 0 "arith_qi_operand") + (match_operand 0 "register_operand"))) + +;; HI Helper +(define_predicate "arith_hi_operand" + (match_code "const_int") +{ + return (IN_RAN(INTVAL (op), -32768, 32768) ) ? 1 : 0 ; +}) + +;;HI Reg, subreg(reg) or const_int. +(define_predicate "reg_hi_int_operand" + (ior (match_operand 0 "arith_hi_operand") + (match_operand 0 "register_operand"))) + +;;SI Reg, subreg(reg) or const_int. +(define_predicate "reg_si_int_operand" + (ior (match_operand 0 "const_int_operand") + (match_operand 0 "register_operand"))) + +;; +;; Shift predicates + +;; QI Helper +(define_predicate "shift_qi_operand" + (match_code "const_int") +{ + return (IN_RAN(INTVAL (op), 0, 7) ) ? 1 : 0; +}) + +;;QI Reg, subreg(reg) or const_int. +(define_predicate "shift_reg_qi_int_operand" + (ior (match_operand 0 "shift_qi_operand") + (match_operand 0 "register_operand"))) + +;; HI Helper +(define_predicate "shift_hi_operand" + (match_code "const_int") +{ + return (IN_RAN(INTVAL (op), 0, 15) ) ? 1 : 0 ; +}) + +;;HI Reg, subreg(reg) or const_int. +(define_predicate "shift_reg_hi_int_operand" + (ior (match_operand 0 "shift_hi_operand") + (match_operand 0 "register_operand"))) + +;; SI Helper +(define_predicate "shift_si_operand" + (match_code "const_int") +{ + return (IN_RAN(INTVAL (op), 0, 31) ) ? 1 : 0; +}) + +;;SI Reg, subreg(reg) or const_int. +(define_predicate "shift_reg_si_int_operand" + (ior (match_operand 0 "shift_si_operand") + (match_operand 0 "register_operand"))) diff --git a/gcc-4.9/gcc/config/cr16/t-cr16 b/gcc-4.9/gcc/config/cr16/t-cr16 new file mode 100644 index 000000000..835841fc9 --- /dev/null +++ b/gcc-4.9/gcc/config/cr16/t-cr16 @@ -0,0 +1,25 @@ +# CR16 Target Makefile +# Copyright (C) 2012-2014 Free Software Foundation, Inc. +# Contributed by KPIT Cummins Infosystems Limited. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published +# by the Free Software Foundation; either version 3, or (at your +# option) any later version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +# License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = fPIC mint32 +MULTILIB_DIRNAMES = far-pic int32 +MULTILIB_MATCHES = +MULTILIB_EXTRA_OPTS = mcr16cplus mdata-model=far + diff --git a/gcc-4.9/gcc/config/cris/constraints.md b/gcc-4.9/gcc/config/cris/constraints.md new file mode 100644 index 000000000..651fbedb0 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/constraints.md @@ -0,0 +1,164 @@ +;; Constraint definitions for CRIS. +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints. +(define_register_constraint "a" "ACR_REGS" + "@internal") + +(define_register_constraint "b" "GENNONACR_REGS" + "@internal") + +(define_register_constraint "h" "MOF_REGS" + "@internal") + +(define_register_constraint "x" "SPECIAL_REGS" + "@internal") + +(define_register_constraint "c" "CC0_REGS" + "@internal") + +;; Integer constraints. +(define_constraint "I" + "MOVEQ, CMPQ, ANDQ, ORQ." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -32, 31)"))) + +(define_constraint "J" + "ADDQ, SUBQ." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 63)"))) + +(define_constraint "Kc" + "ASRQ, BTSTQ, LSRQ, LSLQ." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 31)"))) + +(define_constraint "Kp" + "A power of two." + (and (match_code "const_int") + (match_test "exact_log2 (ival) >= 0"))) + +(define_constraint "L" + "A 16-bit signed number." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -32768, 32767)"))) + +(define_constraint "M" + "The constant 0 for CLEAR." + (and (match_code "const_int") + (match_test "ival == 0"))) + +(define_constraint "N" + "A negative ADDQ or SUBQ." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -63, -1)"))) + +(define_constraint "O" + "Quickened ints, QI and HI." + (and (match_code "const_int") + (ior (match_test "IN_RANGE (ival, (65535 - 31), 65535)") + (match_test "IN_RANGE (ival, (255 - 31), 255)")))) + +(define_constraint "P" + "A 16-bit number signed *or* unsigned." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -32768, 65535)"))) + +;; Floating-point constant constraints. +(define_constraint "G" + "The floating point zero constant" + (and (match_code "const_double") + (match_test "GET_MODE_CLASS (mode) == MODE_FLOAT") + (match_test "op == CONST0_RTX (mode)"))) + +;; Memory constraints. + +;; Just an indirect register (happens to also be "all" slottable +;; memory addressing modes not covered by other constraints, i.e. '>'). +(define_memory_constraint "Q" + "@internal" + (and (match_code "mem") + (match_test "cris_base_p (XEXP (op, 0), reload_in_progress + || reload_completed)"))) + +;; Extra constraints. +(define_constraint "R" + "An operand to BDAP or BIAP." + ;; A BIAP; r.S? + (ior (match_test "cris_biap_index_p (op, reload_in_progress + || reload_completed)") + ;; A [reg] or (int) [reg], maybe with post-increment. + (match_test "cris_bdap_index_p (op, reload_in_progress + || reload_completed)") + (match_test "cris_constant_index_p (op)"))) + +(define_constraint "T" + "Memory three-address operand." + ;; All are indirect-memory: + (and (match_code "mem") + ;; Double indirect: [[reg]] or [[reg+]]? + (ior (and (match_code "mem" "0") + (match_test "cris_base_or_autoincr_p (XEXP (XEXP (op, 0), 0), + reload_in_progress + || reload_completed)")) + ;; Just an explicit indirect reference: [const]? + (match_test "CONSTANT_P (XEXP (op, 0))") + ;; Something that is indexed; [...+...]? + (and (match_code "plus" "0") + ;; A BDAP constant: [reg+(8|16|32)bit offset]? + (ior (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 0), + reload_in_progress + || reload_completed)") + (match_test "cris_constant_index_p (XEXP (XEXP (op, 0), 1))")) + ;; A BDAP register: [reg+[reg(+)].S]? + (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 0), + reload_in_progress + || reload_completed)") + (match_test "cris_bdap_index_p (XEXP (XEXP (op, 0), 1), + reload_in_progress + || reload_completed)")) + ;; Same, but with swapped arguments (no canonical + ;; ordering between e.g. REG and MEM as of LAST_UPDATED + ;; "Thu May 12 03:59:11 UTC 2005"). + (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 1), + reload_in_progress + || reload_completed)") + (match_test "cris_bdap_index_p (XEXP (XEXP (op, 0), 0), + reload_in_progress + || reload_completed)")) + ;; A BIAP: [reg+reg.S] (MULT comes first). + (and (match_test "cris_base_p (XEXP (XEXP (op, 0), 1), + reload_in_progress + || reload_completed)") + (match_test "cris_biap_index_p (XEXP (XEXP (op, 0), 0), + reload_in_progress + || reload_completed)"))))))) + +(define_constraint "S" + "PIC-constructs for symbols." + (and (match_test "flag_pic") + (match_code "const") + (match_test "cris_valid_pic_const (op, false)"))) + +(define_constraint "U" + "@internal" + (and (match_test "flag_pic") + (match_test "CONSTANT_P (op)") + (match_operand 0 "cris_nonmemory_operand_or_callable_symbol"))) + diff --git a/gcc-4.9/gcc/config/cris/cris-protos.h b/gcc-4.9/gcc/config/cris/cris-protos.h new file mode 100644 index 000000000..0fdcafe52 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/cris-protos.h @@ -0,0 +1,67 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Axis Communications. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Prototypes for the CRIS port. */ + +extern bool cris_simple_epilogue (void); +#ifdef RTX_CODE +extern const char *cris_op_str (rtx); +extern void cris_notice_update_cc (rtx, rtx); +extern bool cris_reload_address_legitimized (rtx, enum machine_mode, int, int, int); +extern int cris_side_effect_mode_ok (enum rtx_code, rtx *, int, int, + int, int, int); +extern bool cris_cc0_user_requires_cmp (rtx); +extern rtx cris_return_addr_rtx (int, rtx); +extern rtx cris_split_movdx (rtx *); +extern int cris_legitimate_pic_operand (rtx); +extern enum cris_pic_symbol_type cris_pic_symbol_type_of (const_rtx); +extern bool cris_valid_pic_const (const_rtx, bool); +extern bool cris_constant_index_p (const_rtx); +extern bool cris_base_p (const_rtx, bool); +extern bool cris_base_or_autoincr_p (const_rtx, bool); +extern bool cris_bdap_index_p (const_rtx, bool); +extern bool cris_biap_index_p (const_rtx, bool); +extern bool cris_legitimate_address_p (enum machine_mode, rtx, bool); +extern bool cris_store_multiple_op_p (rtx); +extern bool cris_movem_load_rest_p (rtx, int); +extern void cris_asm_output_symbol_ref (FILE *, rtx); +extern int cris_cfun_uses_pic_table (void); +extern void cris_asm_output_case_end (FILE *, int, rtx); +extern rtx cris_gen_movem_load (rtx, rtx, int); +extern rtx cris_emit_movem_store (rtx, rtx, int, bool); +extern void cris_expand_pic_call_address (rtx *); +extern void cris_order_for_addsi3 (rtx *, int); +extern void cris_emit_trap_for_misalignment (rtx); +#endif /* RTX_CODE */ +extern void cris_asm_output_label_ref (FILE *, char *); +extern void cris_asm_output_ident (const char *); +extern void cris_expand_prologue (void); +extern void cris_expand_epilogue (void); +extern void cris_expand_return (bool); +extern bool cris_return_address_on_stack_for_return (void); +extern bool cris_return_address_on_stack (void); +extern void cris_pragma_expand_mul (struct cpp_reader *); + +/* Need one that returns an int; usable in expressions. */ +extern int cris_fatal (char *); + +extern int cris_initial_elimination_offset (int, int); + +extern void cris_init_expanders (void); diff --git a/gcc-4.9/gcc/config/cris/cris.c b/gcc-4.9/gcc/config/cris/cris.c new file mode 100644 index 000000000..209f127a6 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/cris.c @@ -0,0 +1,4359 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Axis Communications. Written by Hans-Peter Nilsson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "varasm.h" +#include "stor-layout.h" +#include "calls.h" +#include "stmt.h" +#include "expr.h" +#include "except.h" +#include "function.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "reload.h" +#include "tm_p.h" +#include "debug.h" +#include "output.h" +#include "tm-constrs.h" +#include "target.h" +#include "target-def.h" +#include "ggc.h" +#include "optabs.h" +#include "df.h" +#include "opts.h" +#include "cgraph.h" + +/* Usable when we have an amount to add or subtract, and want the + optimal size of the insn. */ +#define ADDITIVE_SIZE_MODIFIER(size) \ + ((size) <= 63 ? "q" : (size) <= 255 ? "u.b" : (size) <= 65535 ? "u.w" : ".d") + +#define LOSE_AND_RETURN(msgid, x) \ + do \ + { \ + cris_operand_lossage (msgid, x); \ + return; \ + } while (0) + +enum cris_retinsn_type + { CRIS_RETINSN_UNKNOWN = 0, CRIS_RETINSN_RET, CRIS_RETINSN_JUMP }; + +/* Per-function machine data. */ +struct GTY(()) machine_function + { + int needs_return_address_on_stack; + + /* This is the number of registers we save in the prologue due to + stdarg. */ + int stdarg_regs; + + enum cris_retinsn_type return_type; + }; + +/* This little fix suppresses the 'u' or 's' when '%e' in assembly + pattern. */ +static char cris_output_insn_is_bound = 0; + +/* In code for output macros, this is how we know whether e.g. constant + goes in code or in a static initializer. */ +static int in_code = 0; + +/* Fix for reg_overlap_mentioned_p. */ +static int cris_reg_overlap_mentioned_p (rtx, rtx); + +static enum machine_mode cris_promote_function_mode (const_tree, enum machine_mode, + int *, const_tree, int); + +static unsigned int cris_atomic_align_for_mode (enum machine_mode); + +static void cris_print_base (rtx, FILE *); + +static void cris_print_index (rtx, FILE *); + +static void cris_output_addr_const (FILE *, rtx); + +static struct machine_function * cris_init_machine_status (void); + +static rtx cris_struct_value_rtx (tree, int); + +static void cris_setup_incoming_varargs (cumulative_args_t, enum machine_mode, + tree type, int *, int); + +static int cris_initial_frame_pointer_offset (void); + +static void cris_operand_lossage (const char *, rtx); + +static int cris_reg_saved_in_regsave_area (unsigned int, bool); + +static void cris_print_operand (FILE *, rtx, int); + +static void cris_print_operand_address (FILE *, rtx); + +static bool cris_print_operand_punct_valid_p (unsigned char code); + +static bool cris_output_addr_const_extra (FILE *, rtx); + +static void cris_conditional_register_usage (void); + +static void cris_asm_output_mi_thunk + (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); + +static void cris_file_start (void); +static void cris_init_libfuncs (void); + +static reg_class_t cris_preferred_reload_class (rtx, reg_class_t); + +static int cris_register_move_cost (enum machine_mode, reg_class_t, reg_class_t); +static int cris_memory_move_cost (enum machine_mode, reg_class_t, bool); +static bool cris_rtx_costs (rtx, int, int, int, int *, bool); +static int cris_address_cost (rtx, enum machine_mode, addr_space_t, bool); +static bool cris_pass_by_reference (cumulative_args_t, enum machine_mode, + const_tree, bool); +static int cris_arg_partial_bytes (cumulative_args_t, enum machine_mode, + tree, bool); +static rtx cris_function_arg (cumulative_args_t, enum machine_mode, + const_tree, bool); +static rtx cris_function_incoming_arg (cumulative_args_t, + enum machine_mode, const_tree, bool); +static void cris_function_arg_advance (cumulative_args_t, enum machine_mode, + const_tree, bool); +static tree cris_md_asm_clobbers (tree, tree, tree); + +static void cris_option_override (void); + +static bool cris_frame_pointer_required (void); + +static void cris_asm_trampoline_template (FILE *); +static void cris_trampoline_init (rtx, tree, rtx); + +static rtx cris_function_value(const_tree, const_tree, bool); +static rtx cris_libcall_value (enum machine_mode, const_rtx); +static bool cris_function_value_regno_p (const unsigned int); +static void cris_file_end (void); + +/* This is the parsed result of the "-max-stack-stackframe=" option. If + it (still) is zero, then there was no such option given. */ +int cris_max_stackframe = 0; + +/* This is the parsed result of the "-march=" option, if given. */ +int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION; + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.dword\t" +#undef TARGET_ASM_ALIGNED_DI_OP +#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" + +/* We need to define these, since the 2byte, 4byte, 8byte op:s are only + available in ELF. These "normal" pseudos do not have any alignment + constraints or side-effects. */ +#undef TARGET_ASM_UNALIGNED_HI_OP +#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP + +#undef TARGET_ASM_UNALIGNED_SI_OP +#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP + +#undef TARGET_ASM_UNALIGNED_DI_OP +#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP + +#undef TARGET_PRINT_OPERAND +#define TARGET_PRINT_OPERAND cris_print_operand +#undef TARGET_PRINT_OPERAND_ADDRESS +#define TARGET_PRINT_OPERAND_ADDRESS cris_print_operand_address +#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P +#define TARGET_PRINT_OPERAND_PUNCT_VALID_P cris_print_operand_punct_valid_p +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA cris_output_addr_const_extra + +#undef TARGET_CONDITIONAL_REGISTER_USAGE +#define TARGET_CONDITIONAL_REGISTER_USAGE cris_conditional_register_usage + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK cris_asm_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall + +#undef TARGET_ASM_FILE_START +#define TARGET_ASM_FILE_START cris_file_start +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END cris_file_end + +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS cris_init_libfuncs + +#undef TARGET_LEGITIMATE_ADDRESS_P +#define TARGET_LEGITIMATE_ADDRESS_P cris_legitimate_address_p + +#undef TARGET_PREFERRED_RELOAD_CLASS +#define TARGET_PREFERRED_RELOAD_CLASS cris_preferred_reload_class + +#undef TARGET_REGISTER_MOVE_COST +#define TARGET_REGISTER_MOVE_COST cris_register_move_cost +#undef TARGET_MEMORY_MOVE_COST +#define TARGET_MEMORY_MOVE_COST cris_memory_move_cost +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS cris_rtx_costs +#undef TARGET_ADDRESS_COST +#define TARGET_ADDRESS_COST cris_address_cost + +#undef TARGET_PROMOTE_FUNCTION_MODE +#define TARGET_PROMOTE_FUNCTION_MODE cris_promote_function_mode + +#undef TARGET_ATOMIC_ALIGN_FOR_MODE +#define TARGET_ATOMIC_ALIGN_FOR_MODE cris_atomic_align_for_mode + +#undef TARGET_STRUCT_VALUE_RTX +#define TARGET_STRUCT_VALUE_RTX cris_struct_value_rtx +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS cris_setup_incoming_varargs +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE cris_pass_by_reference +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES cris_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG cris_function_arg +#undef TARGET_FUNCTION_INCOMING_ARG +#define TARGET_FUNCTION_INCOMING_ARG cris_function_incoming_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE cris_function_arg_advance +#undef TARGET_MD_ASM_CLOBBERS +#define TARGET_MD_ASM_CLOBBERS cris_md_asm_clobbers +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED cris_frame_pointer_required + +#undef TARGET_OPTION_OVERRIDE +#define TARGET_OPTION_OVERRIDE cris_option_override + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE cris_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT cris_trampoline_init + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE cris_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE cris_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P cris_function_value_regno_p + +struct gcc_target targetm = TARGET_INITIALIZER; + +/* Helper for cris_load_multiple_op and cris_ret_movem_op. */ + +bool +cris_movem_load_rest_p (rtx op, int offs) +{ + unsigned int reg_count = XVECLEN (op, 0) - offs; + rtx src_addr; + int i; + rtx elt; + int setno; + int regno_dir = 1; + unsigned int regno = 0; + + /* Perform a quick check so we don't blow up below. FIXME: Adjust for + other than (MEM reg). */ + if (reg_count <= 1 + || GET_CODE (XVECEXP (op, 0, offs)) != SET + || !REG_P (SET_DEST (XVECEXP (op, 0, offs))) + || !MEM_P (SET_SRC (XVECEXP (op, 0, offs)))) + return false; + + /* Check a possible post-inc indicator. */ + if (GET_CODE (SET_SRC (XVECEXP (op, 0, offs + 1))) == PLUS) + { + rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 0); + rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, offs + 1)), 1); + + reg_count--; + + if (reg_count == 1 + || !REG_P (reg) + || !REG_P (SET_DEST (XVECEXP (op, 0, offs + 1))) + || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, offs + 1))) + || !CONST_INT_P (inc) + || INTVAL (inc) != (HOST_WIDE_INT) reg_count * 4) + return false; + i = offs + 2; + } + else + i = offs + 1; + + if (!TARGET_V32) + { + regno_dir = -1; + regno = reg_count - 1; + } + + elt = XVECEXP (op, 0, offs); + src_addr = XEXP (SET_SRC (elt), 0); + + if (GET_CODE (elt) != SET + || !REG_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) != regno + || !MEM_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || !memory_address_p (SImode, src_addr)) + return false; + + for (setno = 1; i < XVECLEN (op, 0); setno++, i++) + { + rtx elt = XVECEXP (op, 0, i); + regno += regno_dir; + + if (GET_CODE (elt) != SET + || !REG_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode + || REGNO (SET_DEST (elt)) != regno + || !MEM_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS + || ! rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr) + || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1)) + || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != setno * 4) + return false; + } + + return true; +} + +/* Worker function for predicate for the parallel contents in a movem + to-memory. */ + +bool +cris_store_multiple_op_p (rtx op) +{ + int reg_count = XVECLEN (op, 0); + rtx dest; + rtx dest_addr; + rtx dest_base; + int i; + rtx elt; + int setno; + int regno_dir = 1; + int regno = 0; + int offset = 0; + + /* Perform a quick check so we don't blow up below. FIXME: Adjust for + other than (MEM reg) and (MEM (PLUS reg const)). */ + if (reg_count <= 1) + return false; + + elt = XVECEXP (op, 0, 0); + + if (GET_CODE (elt) != SET) + return false; + + dest = SET_DEST (elt); + + if (!REG_P (SET_SRC (elt)) || !MEM_P (dest)) + return false; + + dest_addr = XEXP (dest, 0); + + /* Check a possible post-inc indicator. */ + if (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS) + { + rtx reg = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 0); + rtx inc = XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1); + + reg_count--; + + if (reg_count == 1 + || !REG_P (reg) + || !REG_P (SET_DEST (XVECEXP (op, 0, 1))) + || REGNO (reg) != REGNO (SET_DEST (XVECEXP (op, 0, 1))) + || !CONST_INT_P (inc) + /* Support increment by number of registers, and by the offset + of the destination, if it has the form (MEM (PLUS reg + offset)). */ + || !((REG_P (dest_addr) + && REGNO (dest_addr) == REGNO (reg) + && INTVAL (inc) == (HOST_WIDE_INT) reg_count * 4) + || (GET_CODE (dest_addr) == PLUS + && REG_P (XEXP (dest_addr, 0)) + && REGNO (XEXP (dest_addr, 0)) == REGNO (reg) + && CONST_INT_P (XEXP (dest_addr, 1)) + && INTVAL (XEXP (dest_addr, 1)) == INTVAL (inc)))) + return false; + + i = 2; + } + else + i = 1; + + if (!TARGET_V32) + { + regno_dir = -1; + regno = reg_count - 1; + } + + if (GET_CODE (elt) != SET + || !REG_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) != (unsigned int) regno + || !MEM_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode) + return false; + + if (REG_P (dest_addr)) + { + dest_base = dest_addr; + offset = 0; + } + else if (GET_CODE (dest_addr) == PLUS + && REG_P (XEXP (dest_addr, 0)) + && CONST_INT_P (XEXP (dest_addr, 1))) + { + dest_base = XEXP (dest_addr, 0); + offset = INTVAL (XEXP (dest_addr, 1)); + } + else + return false; + + for (setno = 1; i < XVECLEN (op, 0); setno++, i++) + { + rtx elt = XVECEXP (op, 0, i); + regno += regno_dir; + + if (GET_CODE (elt) != SET + || !REG_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != SImode + || REGNO (SET_SRC (elt)) != (unsigned int) regno + || !MEM_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != SImode + || GET_CODE (XEXP (SET_DEST (elt), 0)) != PLUS + || ! rtx_equal_p (XEXP (XEXP (SET_DEST (elt), 0), 0), dest_base) + || !CONST_INT_P (XEXP (XEXP (SET_DEST (elt), 0), 1)) + || INTVAL (XEXP (XEXP (SET_DEST (elt), 0), 1)) != setno * 4 + offset) + return false; + } + + return true; +} + +/* The TARGET_CONDITIONAL_REGISTER_USAGE worker. */ + +static void +cris_conditional_register_usage (void) +{ + /* FIXME: This isn't nice. We should be able to use that register for + something else if the PIC table isn't needed. */ + if (flag_pic) + fixed_regs[PIC_OFFSET_TABLE_REGNUM] + = call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + + /* Allow use of ACR (PC in pre-V32) and tweak order. */ + if (TARGET_V32) + { + static const int reg_alloc_order_v32[] = REG_ALLOC_ORDER_V32; + unsigned int i; + + fixed_regs[CRIS_ACR_REGNUM] = 0; + + for (i = 0; + i < sizeof (reg_alloc_order_v32)/sizeof (reg_alloc_order_v32[0]); + i++) + reg_alloc_order[i] = reg_alloc_order_v32[i]; + } + + if (TARGET_HAS_MUL_INSNS) + fixed_regs[CRIS_MOF_REGNUM] = 0; + + /* On early versions, we must use the 16-bit condition-code register, + which has another name. */ + if (cris_cpu_version < 8) + reg_names[CRIS_CC0_REGNUM] = "ccr"; +} + +/* Return crtl->uses_pic_offset_table. For use in cris.md, + since some generated files do not include function.h. */ + +int +cris_cfun_uses_pic_table (void) +{ + return crtl->uses_pic_offset_table; +} + +/* Given an rtx, return the text string corresponding to the CODE of X. + Intended for use in the assembly language output section of a + define_insn. */ + +const char * +cris_op_str (rtx x) +{ + cris_output_insn_is_bound = 0; + switch (GET_CODE (x)) + { + case PLUS: + return "add"; + break; + + case MINUS: + return "sub"; + break; + + case MULT: + /* This function is for retrieving a part of an instruction name for + an operator, for immediate output. If that ever happens for + MULT, we need to apply TARGET_MUL_BUG in the caller. Make sure + we notice. */ + internal_error ("MULT case in cris_op_str"); + break; + + case DIV: + return "div"; + break; + + case AND: + return "and"; + break; + + case IOR: + return "or"; + break; + + case XOR: + return "xor"; + break; + + case NOT: + return "not"; + break; + + case ASHIFT: + return "lsl"; + break; + + case LSHIFTRT: + return "lsr"; + break; + + case ASHIFTRT: + return "asr"; + break; + + case UMIN: + /* Used to control the sign/zero-extend character for the 'E' modifier. + BOUND has none. */ + cris_output_insn_is_bound = 1; + return "bound"; + break; + + default: + return "Unknown operator"; + break; + } +} + +/* Emit an error message when we're in an asm, and a fatal error for + "normal" insns. Formatted output isn't easily implemented, since we + use output_operand_lossage to output the actual message and handle the + categorization of the error. */ + +static void +cris_operand_lossage (const char *msgid, rtx op) +{ + debug_rtx (op); + output_operand_lossage ("%s", msgid); +} + +/* Print an index part of an address to file. */ + +static void +cris_print_index (rtx index, FILE *file) +{ + /* Make the index "additive" unless we'll output a negative number, in + which case the sign character is free (as in free beer). */ + if (!CONST_INT_P (index) || INTVAL (index) >= 0) + putc ('+', file); + + if (REG_P (index)) + fprintf (file, "$%s.b", reg_names[REGNO (index)]); + else if (CONSTANT_P (index)) + cris_output_addr_const (file, index); + else if (GET_CODE (index) == MULT) + { + fprintf (file, "$%s.", + reg_names[REGNO (XEXP (index, 0))]); + + putc (INTVAL (XEXP (index, 1)) == 2 ? 'w' : 'd', file); + } + else if (GET_CODE (index) == SIGN_EXTEND && MEM_P (XEXP (index, 0))) + { + rtx inner = XEXP (index, 0); + rtx inner_inner = XEXP (inner, 0); + + if (GET_CODE (inner_inner) == POST_INC) + { + fprintf (file, "[$%s+].", + reg_names[REGNO (XEXP (inner_inner, 0))]); + putc (GET_MODE (inner) == HImode ? 'w' : 'b', file); + } + else + { + fprintf (file, "[$%s].", reg_names[REGNO (inner_inner)]); + + putc (GET_MODE (inner) == HImode ? 'w' : 'b', file); + } + } + else if (MEM_P (index)) + { + rtx inner = XEXP (index, 0); + if (GET_CODE (inner) == POST_INC) + fprintf (file, "[$%s+].d", reg_names[REGNO (XEXP (inner, 0))]); + else + fprintf (file, "[$%s].d", reg_names[REGNO (inner)]); + } + else + cris_operand_lossage ("unexpected index-type in cris_print_index", + index); +} + +/* Print a base rtx of an address to file. */ + +static void +cris_print_base (rtx base, FILE *file) +{ + if (REG_P (base)) + fprintf (file, "$%s", reg_names[REGNO (base)]); + else if (GET_CODE (base) == POST_INC) + { + gcc_assert (REGNO (XEXP (base, 0)) != CRIS_ACR_REGNUM); + fprintf (file, "$%s+", reg_names[REGNO (XEXP (base, 0))]); + } + else + cris_operand_lossage ("unexpected base-type in cris_print_base", + base); +} + +/* Usable as a guard in expressions. */ + +int +cris_fatal (char *arg) +{ + internal_error (arg); + + /* We'll never get here; this is just to appease compilers. */ + return 0; +} + +/* Return nonzero if REGNO is an ordinary register that *needs* to be + saved together with other registers, possibly by a MOVEM instruction, + or is saved for target-independent reasons. There may be + target-dependent reasons to save the register anyway; this is just a + wrapper for a complicated conditional. */ + +static int +cris_reg_saved_in_regsave_area (unsigned int regno, bool got_really_used) +{ + return + (((df_regs_ever_live_p (regno) + && !call_used_regs[regno]) + || (regno == PIC_OFFSET_TABLE_REGNUM + && (got_really_used + /* It is saved anyway, if there would be a gap. */ + || (flag_pic + && df_regs_ever_live_p (regno + 1) + && !call_used_regs[regno + 1])))) + && (regno != FRAME_POINTER_REGNUM || !frame_pointer_needed) + && regno != CRIS_SRP_REGNUM) + || (crtl->calls_eh_return + && (regno == EH_RETURN_DATA_REGNO (0) + || regno == EH_RETURN_DATA_REGNO (1) + || regno == EH_RETURN_DATA_REGNO (2) + || regno == EH_RETURN_DATA_REGNO (3))); +} + +/* The PRINT_OPERAND worker. */ + +static void +cris_print_operand (FILE *file, rtx x, int code) +{ + rtx operand = x; + + /* Size-strings corresponding to MULT expressions. */ + static const char *const mults[] = { "BAD:0", ".b", ".w", "BAD:3", ".d" }; + + /* New code entries should just be added to the switch below. If + handling is finished, just return. If handling was just a + modification of the operand, the modified operand should be put in + "operand", and then do a break to let default handling + (zero-modifier) output the operand. */ + + switch (code) + { + case 'b': + /* Print the unsigned supplied integer as if it were signed + and < 0, i.e print 255 or 65535 as -1, 254, 65534 as -2, etc. */ + if (!satisfies_constraint_O (x)) + LOSE_AND_RETURN ("invalid operand for 'b' modifier", x); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + INTVAL (x)| (INTVAL (x) <= 255 ? ~255 : ~65535)); + return; + + case 'x': + /* Print assembler code for operator. */ + fprintf (file, "%s", cris_op_str (operand)); + return; + + case 'o': + { + /* A movem modifier working on a parallel; output the register + name. */ + int regno; + + if (GET_CODE (x) != PARALLEL) + LOSE_AND_RETURN ("invalid operand for 'o' modifier", x); + + /* The second item can be (set reg (plus reg const)) to denote a + postincrement. */ + regno + = (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS + ? XVECLEN (x, 0) - 2 + : XVECLEN (x, 0) - 1); + + fprintf (file, "$%s", reg_names [regno]); + } + return; + + case 'O': + { + /* A similar movem modifier; output the memory operand. */ + rtx addr; + + if (GET_CODE (x) != PARALLEL) + LOSE_AND_RETURN ("invalid operand for 'O' modifier", x); + + /* The lowest mem operand is in the first item, but perhaps it + needs to be output as postincremented. */ + addr = MEM_P (SET_SRC (XVECEXP (x, 0, 0))) + ? XEXP (SET_SRC (XVECEXP (x, 0, 0)), 0) + : XEXP (SET_DEST (XVECEXP (x, 0, 0)), 0); + + /* The second item can be a (set reg (plus reg const)) to denote + a modification. */ + if (GET_CODE (SET_SRC (XVECEXP (x, 0, 1))) == PLUS) + { + /* It's a post-increment, if the address is a naked (reg). */ + if (REG_P (addr)) + addr = gen_rtx_POST_INC (SImode, addr); + else + { + /* Otherwise, it's a side-effect; RN=RN+M. */ + fprintf (file, "[$%s=$%s%s%d]", + reg_names [REGNO (SET_DEST (XVECEXP (x, 0, 1)))], + reg_names [REGNO (XEXP (addr, 0))], + INTVAL (XEXP (addr, 1)) < 0 ? "" : "+", + (int) INTVAL (XEXP (addr, 1))); + return; + } + } + output_address (addr); + } + return; + + case 'p': + /* Adjust a power of two to its log2. */ + if (!CONST_INT_P (x) || exact_log2 (INTVAL (x)) < 0 ) + LOSE_AND_RETURN ("invalid operand for 'p' modifier", x); + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + + case 's': + /* For an integer, print 'b' or 'w' if <= 255 or <= 65535 + respectively. This modifier also terminates the inhibiting + effects of the 'x' modifier. */ + cris_output_insn_is_bound = 0; + if (GET_MODE (x) == VOIDmode && CONST_INT_P (x)) + { + if (INTVAL (x) >= 0) + { + if (INTVAL (x) <= 255) + putc ('b', file); + else if (INTVAL (x) <= 65535) + putc ('w', file); + else + putc ('d', file); + } + else + putc ('d', file); + return; + } + + /* For a non-integer, print the size of the operand. */ + putc ((GET_MODE (x) == SImode || GET_MODE (x) == SFmode) + ? 'd' : GET_MODE (x) == HImode ? 'w' + : GET_MODE (x) == QImode ? 'b' + /* If none of the above, emit an erroneous size letter. */ + : 'X', + file); + return; + + case 'z': + /* Const_int: print b for -127 <= x <= 255, + w for -32768 <= x <= 65535, else die. */ + if (!CONST_INT_P (x) + || INTVAL (x) < -32768 || INTVAL (x) > 65535) + LOSE_AND_RETURN ("invalid operand for 'z' modifier", x); + putc (INTVAL (x) >= -128 && INTVAL (x) <= 255 ? 'b' : 'w', file); + return; + + case 'Z': + /* If this is a GOT-symbol, print the size-letter corresponding to + -fpic/-fPIC. For everything else, print "d". */ + putc ((flag_pic == 1 + && GET_CODE (x) == CONST + && GET_CODE (XEXP (x, 0)) == UNSPEC + && XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREAD) + ? 'w' : 'd', file); + return; + + case '#': + /* Output a 'nop' if there's nothing for the delay slot. + This method stolen from the sparc files. */ + if (dbr_sequence_length () == 0) + fputs ("\n\tnop", file); + return; + + case '!': + /* Output directive for alignment padded with "nop" insns. + Optimizing for size, it's plain 4-byte alignment, otherwise we + align the section to a cache-line (32 bytes) and skip at max 2 + bytes, i.e. we skip if it's the last insn on a cache-line. The + latter is faster by a small amount (for two test-programs 99.6% + and 99.9%) and larger by a small amount (ditto 100.1% and + 100.2%). This is supposed to be the simplest yet performance- + wise least intrusive way to make sure the immediately following + (supposed) muls/mulu insn isn't located at the end of a + cache-line. */ + if (TARGET_MUL_BUG) + fputs (optimize_size + ? ".p2alignw 2,0x050f\n\t" + : ".p2alignw 5,0x050f,2\n\t", file); + return; + + case ':': + /* The PIC register. */ + if (! flag_pic) + internal_error ("invalid use of ':' modifier"); + fprintf (file, "$%s", reg_names [PIC_OFFSET_TABLE_REGNUM]); + return; + + case 'H': + /* Print high (most significant) part of something. */ + switch (GET_CODE (operand)) + { + case CONST_INT: + /* If we're having 64-bit HOST_WIDE_INTs, the whole (DImode) + value is kept here, and so may be other than 0 or -1. */ + fprintf (file, HOST_WIDE_INT_PRINT_DEC, + INTVAL (operand_subword (operand, 1, 0, DImode))); + return; + + case CONST_DOUBLE: + /* High part of a long long constant. */ + if (GET_MODE (operand) == VOIDmode) + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_HIGH (x)); + return; + } + else + LOSE_AND_RETURN ("invalid operand for 'H' modifier", x); + + case REG: + /* Print reg + 1. Check that there's not an attempt to print + high-parts of registers like stack-pointer or higher, except + for SRP (where the "high part" is MOF). */ + if (REGNO (operand) > STACK_POINTER_REGNUM - 2 + && (REGNO (operand) != CRIS_SRP_REGNUM + || CRIS_SRP_REGNUM + 1 != CRIS_MOF_REGNUM + || fixed_regs[CRIS_MOF_REGNUM] != 0)) + LOSE_AND_RETURN ("bad register", operand); + fprintf (file, "$%s", reg_names[REGNO (operand) + 1]); + return; + + case MEM: + /* Adjust memory address to high part. */ + { + rtx adj_mem = operand; + int size + = GET_MODE_BITSIZE (GET_MODE (operand)) / BITS_PER_UNIT; + + /* Adjust so we can use two SImode in DImode. + Calling adj_offsettable_operand will make sure it is an + offsettable address. Don't do this for a postincrement + though; it should remain as it was. */ + if (GET_CODE (XEXP (adj_mem, 0)) != POST_INC) + adj_mem + = adjust_address (adj_mem, GET_MODE (adj_mem), size / 2); + + output_address (XEXP (adj_mem, 0)); + return; + } + + default: + LOSE_AND_RETURN ("invalid operand for 'H' modifier", x); + } + + case 'L': + /* Strip the MEM expression. */ + operand = XEXP (operand, 0); + break; + + case 'e': + /* Like 'E', but ignore state set by 'x'. FIXME: Use code + iterators and attributes in cris.md to avoid the need for %x + and %E (and %e) and state passed between those modifiers. */ + cris_output_insn_is_bound = 0; + /* FALL THROUGH. */ + case 'E': + /* Print 's' if operand is SIGN_EXTEND or 'u' if ZERO_EXTEND unless + cris_output_insn_is_bound is nonzero. */ + if (GET_CODE (operand) != SIGN_EXTEND + && GET_CODE (operand) != ZERO_EXTEND + && !CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'e' modifier", x); + + if (cris_output_insn_is_bound) + { + cris_output_insn_is_bound = 0; + return; + } + + putc (GET_CODE (operand) == SIGN_EXTEND + || (CONST_INT_P (operand) && INTVAL (operand) < 0) + ? 's' : 'u', file); + return; + + case 'm': + /* Print the size letter of the inner element. We can do it by + calling ourselves with the 's' modifier. */ + if (GET_CODE (operand) != SIGN_EXTEND && GET_CODE (operand) != ZERO_EXTEND) + LOSE_AND_RETURN ("invalid operand for 'm' modifier", x); + cris_print_operand (file, XEXP (operand, 0), 's'); + return; + + case 'M': + /* Print the least significant part of operand. */ + if (GET_CODE (operand) == CONST_DOUBLE) + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x)); + return; + } + else if (HOST_BITS_PER_WIDE_INT > 32 && CONST_INT_P (operand)) + { + fprintf (file, HOST_WIDE_INT_PRINT_HEX, + INTVAL (x) & ((unsigned int) 0x7fffffff * 2 + 1)); + return; + } + /* Otherwise the least significant part equals the normal part, + so handle it normally. */ + break; + + case 'A': + /* When emitting an add for the high part of a DImode constant, we + want to use addq for 0 and adds.w for -1. */ + if (!CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'A' modifier", x); + fprintf (file, INTVAL (operand) < 0 ? "adds.w" : "addq"); + return; + + case 'P': + /* For const_int operands, print the additive mnemonic and the + modified operand (byte-sized operands don't save anything): + N=MIN_INT..-65536: add.d N + -65535..-64: subu.w -N + -63..-1: subq -N + 0..63: addq N + 64..65535: addu.w N + 65536..MAX_INT: add.d N. + (Emitted mnemonics are capitalized to simplify testing.) + For anything else (N.B: only register is valid), print "add.d". */ + if (REG_P (operand)) + { + fprintf (file, "Add.d "); + + /* Deal with printing the operand by dropping through to the + normal path. */ + break; + } + else + { + int val; + gcc_assert (CONST_INT_P (operand)); + + val = INTVAL (operand); + if (!IN_RANGE (val, -65535, 65535)) + fprintf (file, "Add.d %d", val); + else if (val <= -64) + fprintf (file, "Subu.w %d", -val); + else if (val <= -1) + fprintf (file, "Subq %d", -val); + else if (val <= 63) + fprintf (file, "Addq %d", val); + else if (val <= 65535) + fprintf (file, "Addu.w %d", val); + return; + } + break; + + case 'q': + /* If the operand is an integer -31..31, print "q" else ".d". */ + if (CONST_INT_P (operand) && IN_RANGE (INTVAL (operand), -31, 31)) + fprintf (file, "q"); + else + fprintf (file, ".d"); + return; + + case 'd': + /* If this is a GOT symbol, force it to be emitted as :GOT and + :GOTPLT regardless of -fpic (i.e. not as :GOT16, :GOTPLT16). + Avoid making this too much of a special case. */ + if (flag_pic == 1 && CONSTANT_P (operand)) + { + int flag_pic_save = flag_pic; + + flag_pic = 2; + cris_output_addr_const (file, operand); + flag_pic = flag_pic_save; + return; + } + break; + + case 'D': + /* When emitting an sub for the high part of a DImode constant, we + want to use subq for 0 and subs.w for -1. */ + if (!CONST_INT_P (operand)) + LOSE_AND_RETURN ("invalid operand for 'D' modifier", x); + fprintf (file, INTVAL (operand) < 0 ? "subs.w" : "subq"); + return; + + case 'S': + /* Print the operand as the index-part of an address. + Easiest way out is to use cris_print_index. */ + cris_print_index (operand, file); + return; + + case 'T': + /* Print the size letter for an operand to a MULT, which must be a + const_int with a suitable value. */ + if (!CONST_INT_P (operand) || INTVAL (operand) > 4) + LOSE_AND_RETURN ("invalid operand for 'T' modifier", x); + fprintf (file, "%s", mults[INTVAL (operand)]); + return; + + case 'u': + /* Print "u.w" if a GOT symbol and flag_pic == 1, else ".d". */ + if (flag_pic == 1 + && GET_CODE (operand) == CONST + && GET_CODE (XEXP (operand, 0)) == UNSPEC + && XINT (XEXP (operand, 0), 1) == CRIS_UNSPEC_GOTREAD) + fprintf (file, "u.w"); + else + fprintf (file, ".d"); + return; + + case 0: + /* No code, print as usual. */ + break; + + default: + LOSE_AND_RETURN ("invalid operand modifier letter", x); + } + + /* Print an operand as without a modifier letter. */ + switch (GET_CODE (operand)) + { + case REG: + if (REGNO (operand) > 15 + && REGNO (operand) != CRIS_MOF_REGNUM + && REGNO (operand) != CRIS_SRP_REGNUM + && REGNO (operand) != CRIS_CC0_REGNUM) + internal_error ("internal error: bad register: %d", REGNO (operand)); + fprintf (file, "$%s", reg_names[REGNO (operand)]); + return; + + case MEM: + output_address (XEXP (operand, 0)); + return; + + case CONST_DOUBLE: + if (GET_MODE (operand) == VOIDmode) + /* A long long constant. */ + output_addr_const (file, operand); + else + { + /* Only single precision is allowed as plain operands the + moment. FIXME: REAL_VALUE_FROM_CONST_DOUBLE isn't + documented. */ + REAL_VALUE_TYPE r; + long l; + + /* FIXME: Perhaps check overflow of the "single". */ + REAL_VALUE_FROM_CONST_DOUBLE (r, operand); + REAL_VALUE_TO_TARGET_SINGLE (r, l); + + fprintf (file, "0x%lx", l); + } + return; + + case UNSPEC: + /* Fall through. */ + case CONST: + cris_output_addr_const (file, operand); + return; + + case MULT: + case ASHIFT: + { + /* For a (MULT (reg X) const_int) we output "rX.S". */ + int i = CONST_INT_P (XEXP (operand, 1)) + ? INTVAL (XEXP (operand, 1)) : INTVAL (XEXP (operand, 0)); + rtx reg = CONST_INT_P (XEXP (operand, 1)) + ? XEXP (operand, 0) : XEXP (operand, 1); + + if (!REG_P (reg) + || (!CONST_INT_P (XEXP (operand, 0)) + && !CONST_INT_P (XEXP (operand, 1)))) + LOSE_AND_RETURN ("unexpected multiplicative operand", x); + + cris_print_base (reg, file); + fprintf (file, ".%c", + i == 0 || (i == 1 && GET_CODE (operand) == MULT) ? 'b' + : i == 4 ? 'd' + : (i == 2 && GET_CODE (operand) == MULT) || i == 1 ? 'w' + : 'd'); + return; + } + + default: + /* No need to handle all strange variants, let output_addr_const + do it for us. */ + if (CONSTANT_P (operand)) + { + cris_output_addr_const (file, operand); + return; + } + + LOSE_AND_RETURN ("unexpected operand", x); + } +} + +static bool +cris_print_operand_punct_valid_p (unsigned char code) +{ + return (code == '#' || code == '!' || code == ':'); +} + +/* The PRINT_OPERAND_ADDRESS worker. */ + +static void +cris_print_operand_address (FILE *file, rtx x) +{ + /* All these were inside MEM:s so output indirection characters. */ + putc ('[', file); + + if (CONSTANT_ADDRESS_P (x)) + cris_output_addr_const (file, x); + else if (cris_base_or_autoincr_p (x, true)) + cris_print_base (x, file); + else if (GET_CODE (x) == PLUS) + { + rtx x1, x2; + + x1 = XEXP (x, 0); + x2 = XEXP (x, 1); + if (cris_base_p (x1, true)) + { + cris_print_base (x1, file); + cris_print_index (x2, file); + } + else if (cris_base_p (x2, true)) + { + cris_print_base (x2, file); + cris_print_index (x1, file); + } + else + LOSE_AND_RETURN ("unrecognized address", x); + } + else if (MEM_P (x)) + { + /* A DIP. Output more indirection characters. */ + putc ('[', file); + cris_print_base (XEXP (x, 0), file); + putc (']', file); + } + else + LOSE_AND_RETURN ("unrecognized address", x); + + putc (']', file); +} + +/* The RETURN_ADDR_RTX worker. + We mark that the return address is used, either by EH or + __builtin_return_address, for use by the function prologue and + epilogue. FIXME: This isn't optimal; we just use the mark in the + prologue and epilogue to say that the return address is to be stored + in the stack frame. We could return SRP for leaf-functions and use the + initial-value machinery. */ + +rtx +cris_return_addr_rtx (int count, rtx frameaddr ATTRIBUTE_UNUSED) +{ + cfun->machine->needs_return_address_on_stack = 1; + + /* The return-address is stored just above the saved frame-pointer (if + present). Apparently we can't eliminate from the frame-pointer in + that direction, so use the incoming args (maybe pretended) pointer. */ + return count == 0 + ? gen_rtx_MEM (Pmode, plus_constant (Pmode, virtual_incoming_args_rtx, -4)) + : NULL_RTX; +} + +/* Accessor used in cris.md:return because cfun->machine isn't available + there. */ + +bool +cris_return_address_on_stack (void) +{ + return df_regs_ever_live_p (CRIS_SRP_REGNUM) + || cfun->machine->needs_return_address_on_stack; +} + +/* Accessor used in cris.md:return because cfun->machine isn't available + there. */ + +bool +cris_return_address_on_stack_for_return (void) +{ + return cfun->machine->return_type == CRIS_RETINSN_RET ? false + : cris_return_address_on_stack (); +} + +/* This used to be the INITIAL_FRAME_POINTER_OFFSET worker; now only + handles FP -> SP elimination offset. */ + +static int +cris_initial_frame_pointer_offset (void) +{ + int regno; + + /* Initial offset is 0 if we don't have a frame pointer. */ + int offs = 0; + bool got_really_used = false; + + if (crtl->uses_pic_offset_table) + { + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), + NULL_RTX); + pop_topmost_sequence (); + } + + /* And 4 for each register pushed. */ + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + offs += 4; + + /* And then, last, we add the locals allocated. */ + offs += get_frame_size (); + + /* And more; the accumulated args size. */ + offs += crtl->outgoing_args_size; + + /* Then round it off, in case we use aligned stack. */ + if (TARGET_STACK_ALIGN) + offs = TARGET_ALIGN_BY_32 ? (offs + 3) & ~3 : (offs + 1) & ~1; + + return offs; +} + +/* The INITIAL_ELIMINATION_OFFSET worker. + Calculate the difference between imaginary registers such as frame + pointer and the stack pointer. Used to eliminate the frame pointer + and imaginary arg pointer. */ + +int +cris_initial_elimination_offset (int fromreg, int toreg) +{ + int fp_sp_offset + = cris_initial_frame_pointer_offset (); + + /* We should be able to use regs_ever_live and related prologue + information here, or alpha should not as well. */ + bool return_address_on_stack = cris_return_address_on_stack (); + + /* Here we act as if the frame-pointer were needed. */ + int ap_fp_offset = 4 + (return_address_on_stack ? 4 : 0); + + if (fromreg == ARG_POINTER_REGNUM + && toreg == FRAME_POINTER_REGNUM) + return ap_fp_offset; + + /* Between the frame pointer and the stack are only "normal" stack + variables and saved registers. */ + if (fromreg == FRAME_POINTER_REGNUM + && toreg == STACK_POINTER_REGNUM) + return fp_sp_offset; + + /* We need to balance out the frame pointer here. */ + if (fromreg == ARG_POINTER_REGNUM + && toreg == STACK_POINTER_REGNUM) + return ap_fp_offset + fp_sp_offset - 4; + + gcc_unreachable (); +} + +/* Nonzero if X is a hard reg that can be used as an index. */ +static inline bool +reg_ok_for_base_p (const_rtx x, bool strict) +{ + return ((! strict && ! HARD_REGISTER_P (x)) + || REGNO_OK_FOR_BASE_P (REGNO (x))); +} + +/* Nonzero if X is a hard reg that can be used as an index. */ +static inline bool +reg_ok_for_index_p (const_rtx x, bool strict) +{ + return reg_ok_for_base_p (x, strict); +} + +/* No symbol can be used as an index (or more correct, as a base) together + with a register with PIC; the PIC register must be there. */ + +bool +cris_constant_index_p (const_rtx x) +{ + return (CONSTANT_P (x) && (!flag_pic || cris_valid_pic_const (x, true))); +} + +/* True if X is a valid base register. */ + +bool +cris_base_p (const_rtx x, bool strict) +{ + return (REG_P (x) && reg_ok_for_base_p (x, strict)); +} + +/* True if X is a valid index register. */ + +static inline bool +cris_index_p (const_rtx x, bool strict) +{ + return (REG_P (x) && reg_ok_for_index_p (x, strict)); +} + +/* True if X is a valid base register with or without autoincrement. */ + +bool +cris_base_or_autoincr_p (const_rtx x, bool strict) +{ + return (cris_base_p (x, strict) + || (GET_CODE (x) == POST_INC + && cris_base_p (XEXP (x, 0), strict) + && REGNO (XEXP (x, 0)) != CRIS_ACR_REGNUM)); +} + +/* True if X is a valid (register) index for BDAP, i.e. [Rs].S or [Rs+].S. */ + +bool +cris_bdap_index_p (const_rtx x, bool strict) +{ + return ((MEM_P (x) + && GET_MODE (x) == SImode + && cris_base_or_autoincr_p (XEXP (x, 0), strict)) + || (GET_CODE (x) == SIGN_EXTEND + && MEM_P (XEXP (x, 0)) + && (GET_MODE (XEXP (x, 0)) == HImode + || GET_MODE (XEXP (x, 0)) == QImode) + && cris_base_or_autoincr_p (XEXP (XEXP (x, 0), 0), strict))); +} + +/* True if X is a valid (register) index for BIAP, i.e. Rd.m. */ + +bool +cris_biap_index_p (const_rtx x, bool strict) +{ + return (cris_index_p (x, strict) + || (GET_CODE (x) == MULT + && cris_index_p (XEXP (x, 0), strict) + && cris_scale_int_operand (XEXP (x, 1), VOIDmode))); +} + +/* Worker function for TARGET_LEGITIMATE_ADDRESS_P. + + A PIC operand looks like a normal symbol here. At output we dress it + in "[rPIC+symbol:GOT]" (global symbol) or "rPIC+symbol:GOTOFF" (local + symbol) so we exclude all addressing modes where we can't replace a + plain "symbol" with that. A global PIC symbol does not fit anywhere + here (but is thankfully a general_operand in itself). A local PIC + symbol is valid for the plain "symbol + offset" case. */ + +bool +cris_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ + const_rtx x1, x2; + + if (cris_base_or_autoincr_p (x, strict)) + return true; + else if (TARGET_V32) + /* Nothing else is valid then. */ + return false; + else if (cris_constant_index_p (x)) + return true; + /* Indexed? */ + else if (GET_CODE (x) == PLUS) + { + x1 = XEXP (x, 0); + x2 = XEXP (x, 1); + /* BDAP o, Rd. */ + if ((cris_base_p (x1, strict) && cris_constant_index_p (x2)) + || (cris_base_p (x2, strict) && cris_constant_index_p (x1)) + /* BDAP Rs[+], Rd. */ + || (GET_MODE_SIZE (mode) <= UNITS_PER_WORD + && ((cris_base_p (x1, strict) + && cris_bdap_index_p (x2, strict)) + || (cris_base_p (x2, strict) + && cris_bdap_index_p (x1, strict)) + /* BIAP.m Rs, Rd */ + || (cris_base_p (x1, strict) + && cris_biap_index_p (x2, strict)) + || (cris_base_p (x2, strict) + && cris_biap_index_p (x1, strict))))) + return true; + } + else if (MEM_P (x)) + { + /* DIP (Rs). Reject [[reg+]] and [[reg]] for DImode (long long). */ + if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD + && cris_base_or_autoincr_p (XEXP (x, 0), strict)) + return true; + } + + return false; +} + +/* Worker function for LEGITIMIZE_RELOAD_ADDRESS. */ + +bool +cris_reload_address_legitimized (rtx x, + enum machine_mode mode ATTRIBUTE_UNUSED, + int opnum ATTRIBUTE_UNUSED, + int itype, + int ind_levels ATTRIBUTE_UNUSED) +{ + enum reload_type type = (enum reload_type) itype; + rtx op0, op1; + rtx *op1p; + + if (GET_CODE (x) != PLUS) + return false; + + if (TARGET_V32) + return false; + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + op1p = &XEXP (x, 1); + + if (!REG_P (op1)) + return false; + + if (GET_CODE (op0) == SIGN_EXTEND && MEM_P (XEXP (op0, 0))) + { + rtx op00 = XEXP (op0, 0); + rtx op000 = XEXP (op00, 0); + rtx *op000p = &XEXP (op00, 0); + + if ((GET_MODE (op00) == HImode || GET_MODE (op00) == QImode) + && (REG_P (op000) + || (GET_CODE (op000) == POST_INC && REG_P (XEXP (op000, 0))))) + { + bool something_reloaded = false; + + if (GET_CODE (op000) == POST_INC + && REG_P (XEXP (op000, 0)) + && REGNO (XEXP (op000, 0)) > CRIS_LAST_GENERAL_REGISTER) + /* No, this gets too complicated and is too rare to care + about trying to improve on the general code Here. + As the return-value is an all-or-nothing indicator, we + punt on the other register too. */ + return false; + + if ((REG_P (op000) + && REGNO (op000) > CRIS_LAST_GENERAL_REGISTER)) + { + /* The address of the inner mem is a pseudo or wrong + reg: reload that. */ + push_reload (op000, NULL_RTX, op000p, NULL, GENERAL_REGS, + GET_MODE (x), VOIDmode, 0, 0, opnum, type); + something_reloaded = true; + } + + if (REGNO (op1) > CRIS_LAST_GENERAL_REGISTER) + { + /* Base register is a pseudo or wrong reg: reload it. */ + push_reload (op1, NULL_RTX, op1p, NULL, GENERAL_REGS, + GET_MODE (x), VOIDmode, 0, 0, + opnum, type); + something_reloaded = true; + } + + gcc_assert (something_reloaded); + + return true; + } + } + + return false; +} + + +/* Worker function for TARGET_PREFERRED_RELOAD_CLASS. + + It seems like gcc (2.7.2 and 2.9x of 2000-03-22) may send "NO_REGS" as + the class for a constant (testcase: __Mul in arit.c). To avoid forcing + out a constant into the constant pool, we will trap this case and + return something a bit more sane. FIXME: Check if this is a bug. + Beware that we must not "override" classes that can be specified as + constraint letters, or else asm operands using them will fail when + they need to be reloaded. FIXME: Investigate whether that constitutes + a bug. */ + +static reg_class_t +cris_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) +{ + if (rclass != ACR_REGS + && rclass != MOF_REGS + && rclass != MOF_SRP_REGS + && rclass != SRP_REGS + && rclass != CC0_REGS + && rclass != SPECIAL_REGS) + return GENERAL_REGS; + + return rclass; +} + +/* Worker function for TARGET_REGISTER_MOVE_COST. */ + +static int +cris_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t from, reg_class_t to) +{ + /* Can't move to and from a SPECIAL_REGS register, so we have to say + their move cost within that class is higher. How about 7? That's 3 + for a move to a GENERAL_REGS register, 3 for the move from the + GENERAL_REGS register, and 1 for the increased register pressure. + Also, it's higher than the memory move cost, as it should. + We also do this for ALL_REGS, since we don't want that class to be + preferred (even to memory) at all where GENERAL_REGS doesn't fit. + Whenever it's about to be used, it's for SPECIAL_REGS. If we don't + present a higher cost for ALL_REGS than memory, a SPECIAL_REGS may be + used when a GENERAL_REGS should be used, even if there are call-saved + GENERAL_REGS left to allocate. This is because the fall-back when + the most preferred register class isn't available, isn't the next + (or next good) wider register class, but the *most widest* register + class. FIXME: pre-IRA comment, perhaps obsolete now. */ + + if ((reg_classes_intersect_p (from, SPECIAL_REGS) + && reg_classes_intersect_p (to, SPECIAL_REGS)) + || from == ALL_REGS || to == ALL_REGS) + return 7; + + /* Make moves to/from SPECIAL_REGS slightly more expensive, as we + generally prefer GENERAL_REGS. */ + if (reg_classes_intersect_p (from, SPECIAL_REGS) + || reg_classes_intersect_p (to, SPECIAL_REGS)) + return 3; + + return 2; +} + +/* Worker function for TARGET_MEMORY_MOVE_COST. + + This isn't strictly correct for v0..3 in buswidth-8bit mode, but should + suffice. */ + +static int +cris_memory_move_cost (enum machine_mode mode, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + if (mode == QImode + || mode == HImode) + return 4; + else + return 6; +} + +/* Worker for cris_notice_update_cc; handles the "normal" cases. + FIXME: this code is historical; its functionality should be + refactored to look at insn attributes and moved to + cris_notice_update_cc. Except, we better lose cc0 entirely. */ + +static void +cris_normal_notice_update_cc (rtx exp, rtx insn) +{ + /* "Normal" means, for: + (set (cc0) (...)): + CC is (...). + + (set (reg) (...)): + CC is (reg) and (...) - unless (...) is 0 or reg is a special + register or (v32 and (...) is -32..-1), then CC does not change. + CC_NO_OVERFLOW unless (...) is reg or mem. + + (set (mem) (...)): + CC does not change. + + (set (pc) (...)): + CC does not change. + + (parallel + (set (reg1) (mem (bdap/biap))) + (set (reg2) (bdap/biap))): + CC is (reg1) and (mem (reg2)) + + (parallel + (set (mem (bdap/biap)) (reg1)) [or 0] + (set (reg2) (bdap/biap))): + CC does not change. + + (where reg and mem includes strict_low_parts variants thereof) + + For all others, assume CC is clobbered. + Note that we do not have to care about setting CC_NO_OVERFLOW, + since the overflow flag is set to 0 (i.e. right) for + instructions where it does not have any sane sense, but where + other flags have meanings. (This includes shifts; the carry is + not set by them). + + Note that there are other parallel constructs we could match, + but we don't do that yet. */ + + if (GET_CODE (exp) == SET) + { + /* FIXME: Check when this happens. It looks like we should + actually do a CC_STATUS_INIT here to be safe. */ + if (SET_DEST (exp) == pc_rtx) + return; + + /* Record CC0 changes, so we do not have to output multiple + test insns. */ + if (SET_DEST (exp) == cc0_rtx) + { + CC_STATUS_INIT; + + if (GET_CODE (SET_SRC (exp)) == COMPARE + && XEXP (SET_SRC (exp), 1) == const0_rtx) + cc_status.value1 = XEXP (SET_SRC (exp), 0); + else + cc_status.value1 = SET_SRC (exp); + + /* Handle flags for the special btstq on one bit. */ + if (GET_CODE (cc_status.value1) == ZERO_EXTRACT + && XEXP (cc_status.value1, 1) == const1_rtx) + { + if (CONST_INT_P (XEXP (cc_status.value1, 0))) + /* Using cmpq. */ + cc_status.flags = CC_INVERTED; + else + /* A one-bit btstq. */ + cc_status.flags = CC_Z_IN_NOT_N; + } + + else if (GET_CODE (SET_SRC (exp)) == COMPARE) + { + if (!REG_P (XEXP (SET_SRC (exp), 0)) + && XEXP (SET_SRC (exp), 1) != const0_rtx) + /* For some reason gcc will not canonicalize compare + operations, reversing the sign by itself if + operands are in wrong order. */ + /* (But NOT inverted; eq is still eq.) */ + cc_status.flags = CC_REVERSED; + + /* This seems to be overlooked by gcc. FIXME: Check again. + FIXME: Is it really safe? */ + cc_status.value2 + = gen_rtx_MINUS (GET_MODE (SET_SRC (exp)), + XEXP (SET_SRC (exp), 0), + XEXP (SET_SRC (exp), 1)); + } + return; + } + else if (REG_P (SET_DEST (exp)) + || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART + && REG_P (XEXP (SET_DEST (exp), 0)))) + { + /* A register is set; normally CC is set to show that no + test insn is needed. Catch the exceptions. */ + + /* If not to cc0, then no "set"s in non-natural mode give + ok cc0... */ + if (GET_MODE_SIZE (GET_MODE (SET_DEST (exp))) > UNITS_PER_WORD + || GET_MODE_CLASS (GET_MODE (SET_DEST (exp))) == MODE_FLOAT) + { + /* ... except add:s and sub:s in DImode. */ + if (GET_MODE (SET_DEST (exp)) == DImode + && (GET_CODE (SET_SRC (exp)) == PLUS + || GET_CODE (SET_SRC (exp)) == MINUS)) + { + CC_STATUS_INIT; + cc_status.value1 = SET_DEST (exp); + cc_status.value2 = SET_SRC (exp); + + if (cris_reg_overlap_mentioned_p (cc_status.value1, + cc_status.value2)) + cc_status.value2 = 0; + + /* Add and sub may set V, which gets us + unoptimizable results in "gt" and "le" condition + codes. */ + cc_status.flags |= CC_NO_OVERFLOW; + + return; + } + } + else if (SET_SRC (exp) == const0_rtx + || (REG_P (SET_SRC (exp)) + && (REGNO (SET_SRC (exp)) + > CRIS_LAST_GENERAL_REGISTER)) + || (TARGET_V32 + && REG_P (SET_DEST (exp)) + && satisfies_constraint_I (SET_SRC (exp)))) + { + /* There's no CC0 change for this case. Just check + for overlap. */ + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + + return; + } + else + { + CC_STATUS_INIT; + cc_status.value1 = SET_DEST (exp); + cc_status.value2 = SET_SRC (exp); + + if (cris_reg_overlap_mentioned_p (cc_status.value1, + cc_status.value2)) + cc_status.value2 = 0; + + /* Some operations may set V, which gets us + unoptimizable results in "gt" and "le" condition + codes. */ + if (GET_CODE (SET_SRC (exp)) == PLUS + || GET_CODE (SET_SRC (exp)) == MINUS + || GET_CODE (SET_SRC (exp)) == NEG) + cc_status.flags |= CC_NO_OVERFLOW; + + /* For V32, nothing with a register destination sets + C and V usefully. */ + if (TARGET_V32) + cc_status.flags |= CC_NO_OVERFLOW; + + return; + } + } + else if (MEM_P (SET_DEST (exp)) + || (GET_CODE (SET_DEST (exp)) == STRICT_LOW_PART + && MEM_P (XEXP (SET_DEST (exp), 0)))) + { + /* When SET to MEM, then CC is not changed (except for + overlap). */ + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + + return; + } + } + else if (GET_CODE (exp) == PARALLEL) + { + if (GET_CODE (XVECEXP (exp, 0, 0)) == SET + && GET_CODE (XVECEXP (exp, 0, 1)) == SET + && REG_P (XEXP (XVECEXP (exp, 0, 1), 0))) + { + if (REG_P (XEXP (XVECEXP (exp, 0, 0), 0)) + && MEM_P (XEXP (XVECEXP (exp, 0, 0), 1))) + { + CC_STATUS_INIT; + + /* For "move.S [rx=ry+o],rz", say CC reflects + value1=rz and value2=[rx] */ + cc_status.value1 = XEXP (XVECEXP (exp, 0, 0), 0); + cc_status.value2 + = replace_equiv_address (XEXP (XVECEXP (exp, 0, 0), 1), + XEXP (XVECEXP (exp, 0, 1), 0)); + + /* Huh? A side-effect cannot change the destination + register. */ + if (cris_reg_overlap_mentioned_p (cc_status.value1, + cc_status.value2)) + internal_error ("internal error: sideeffect-insn affecting main effect"); + + /* For V32, moves to registers don't set C and V. */ + if (TARGET_V32) + cc_status.flags |= CC_NO_OVERFLOW; + return; + } + else if ((REG_P (XEXP (XVECEXP (exp, 0, 0), 1)) + || XEXP (XVECEXP (exp, 0, 0), 1) == const0_rtx) + && MEM_P (XEXP (XVECEXP (exp, 0, 0), 0))) + { + /* For "move.S rz,[rx=ry+o]" and "clear.S [rx=ry+o]", + say flags are not changed, except for overlap. */ + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + + return; + } + } + } + + /* If we got here, the case wasn't covered by the code above. */ + CC_STATUS_INIT; +} + +/* This function looks into the pattern to see how this insn affects + condition codes. + + Used when to eliminate test insns before a condition-code user, + such as a "scc" insn or a conditional branch. This includes + checking if the entities that cc was updated by, are changed by the + operation. + + Currently a jumble of the old peek-inside-the-insn and the newer + check-cc-attribute methods. */ + +void +cris_notice_update_cc (rtx exp, rtx insn) +{ + enum attr_cc attrval = get_attr_cc (insn); + + /* Check if user specified "-mcc-init" as a bug-workaround. Remember + to still set CC_REVERSED as below, since that's required by some + compare insn alternatives. (FIXME: GCC should do this virtual + operand swap by itself.) A test-case that may otherwise fail is + gcc.c-torture/execute/20000217-1.c -O0 and -O1. */ + if (TARGET_CCINIT) + { + CC_STATUS_INIT; + + if (attrval == CC_REV) + cc_status.flags = CC_REVERSED; + return; + } + + /* Slowly, we're converting to using attributes to control the setting + of condition-code status. */ + switch (attrval) + { + case CC_NONE: + /* Even if it is "none", a setting may clobber a previous + cc-value, so check. */ + if (GET_CODE (exp) == SET) + { + if (cc_status.value1 + && modified_in_p (cc_status.value1, insn)) + cc_status.value1 = 0; + + if (cc_status.value2 + && modified_in_p (cc_status.value2, insn)) + cc_status.value2 = 0; + } + return; + + case CC_CLOBBER: + CC_STATUS_INIT; + return; + + case CC_REV: + case CC_NOOV32: + case CC_NORMAL: + cris_normal_notice_update_cc (exp, insn); + + /* The "test" insn doesn't clear (carry and) overflow on V32. We + can change bge => bpl and blt => bmi by passing on to the cc0 + user that V should not be considered; bgt and ble are taken + care of by other methods (see {tst,cmp}{si,hi,qi}). */ + if (attrval == CC_NOOV32 && TARGET_V32) + cc_status.flags |= CC_NO_OVERFLOW; + return; + + default: + internal_error ("unknown cc_attr value"); + } + + CC_STATUS_INIT; +} + +/* Return != 0 if the return sequence for the current function is short, + like "ret" or "jump [sp+]". Prior to reloading, we can't tell if + registers must be saved, so return 0 then. */ + +bool +cris_simple_epilogue (void) +{ + unsigned int regno; + unsigned int reglimit = STACK_POINTER_REGNUM; + bool got_really_used = false; + + if (! reload_completed + || frame_pointer_needed + || get_frame_size () != 0 + || crtl->args.pretend_args_size + || crtl->args.size + || crtl->outgoing_args_size + || crtl->calls_eh_return + + /* If we're not supposed to emit prologue and epilogue, we must + not emit return-type instructions. */ + || !TARGET_PROLOGUE_EPILOGUE) + return false; + + /* Can't return from stacked return address with v32. */ + if (TARGET_V32 && cris_return_address_on_stack ()) + return false; + + if (crtl->uses_pic_offset_table) + { + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX); + pop_topmost_sequence (); + } + + /* No simple epilogue if there are saved registers. */ + for (regno = 0; regno < reglimit; regno++) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + return false; + + return true; +} + +/* Emit checking that MEM is aligned for an access in MODE, failing + that, executing a "break 8" (or call to abort, if "break 8" is + disabled). */ + +void +cris_emit_trap_for_misalignment (rtx mem) +{ + rtx addr, reg, ok_label, andop, jmp; + int natural_alignment; + gcc_assert (MEM_P (mem)); + + natural_alignment = GET_MODE_SIZE (GET_MODE (mem)); + addr = XEXP (mem, 0); + reg = force_reg (Pmode, addr); + ok_label = gen_label_rtx (); + + /* This will yield a btstq without a separate register used, usually - + with the exception for PRE hoisting the "and" but not the branch + around the trap: see testsuite/gcc.target/cris/sync-3s.c. */ + andop = gen_rtx_AND (Pmode, reg, GEN_INT (natural_alignment - 1)); + emit_cmp_and_jump_insns (force_reg (SImode, andop), const0_rtx, EQ, + NULL_RTX, Pmode, 1, ok_label); + jmp = get_last_insn (); + gcc_assert (JUMP_P (jmp)); + + predict_insn_def (jmp, PRED_NORETURN, TAKEN); + expand_builtin_trap (); + emit_label (ok_label); +} + +/* Expand a return insn (just one insn) marked as using SRP or stack + slot depending on parameter ON_STACK. */ + +void +cris_expand_return (bool on_stack) +{ + /* FIXME: emit a parallel with a USE for SRP or the stack-slot, to + tell "ret" from "jump [sp+]". Some, but not all, other parts of + GCC expect just (return) to do the right thing when optimizing, so + we do that until they're fixed. Currently, all return insns in a + function must be the same (not really a limiting factor) so we need + to check that it doesn't change half-way through. */ + emit_jump_insn (ret_rtx); + + CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_RET || !on_stack); + CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_JUMP || on_stack); + + cfun->machine->return_type + = on_stack ? CRIS_RETINSN_JUMP : CRIS_RETINSN_RET; +} + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +cris_rtx_costs (rtx x, int code, int outer_code, int opno, int *total, + bool speed) +{ + switch (code) + { + case CONST_INT: + { + HOST_WIDE_INT val = INTVAL (x); + if (val == 0) + *total = 0; + else if (val < 32 && val >= -32) + *total = 1; + /* Eight or 16 bits are a word and cycle more expensive. */ + else if (val <= 32767 && val >= -32768) + *total = 2; + /* A 32-bit constant (or very seldom, unsigned 16 bits) costs + another word. FIXME: This isn't linear to 16 bits. */ + else + *total = 4; + return true; + } + + case LABEL_REF: + *total = 6; + return true; + + case CONST: + case SYMBOL_REF: + *total = 6; + return true; + + case CONST_DOUBLE: + if (x != CONST0_RTX (GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x))) + *total = 12; + else + /* Make 0.0 cheap, else test-insns will not be used. */ + *total = 0; + return true; + + case MULT: + /* If we have one arm of an ADDI, make sure it gets the cost of + one insn, i.e. zero cost for this operand, and just the cost + of the PLUS, as the insn is created by combine from a PLUS + and an ASHIFT, and the MULT cost below would make the + combined value be larger than the separate insns. The insn + validity is checked elsewhere by combine. + + FIXME: this case is a stop-gap for 4.3 and 4.4, this whole + function should be rewritten. */ + if (outer_code == PLUS && cris_biap_index_p (x, false)) + { + *total = 0; + return true; + } + + /* Identify values that are no powers of two. Powers of 2 are + taken care of already and those values should not be changed. */ + if (!CONST_INT_P (XEXP (x, 1)) + || exact_log2 (INTVAL (XEXP (x, 1)) < 0)) + { + /* If we have a multiply insn, then the cost is between + 1 and 2 "fast" instructions. */ + if (TARGET_HAS_MUL_INSNS) + { + *total = COSTS_N_INSNS (1) + COSTS_N_INSNS (1) / 2; + return true; + } + + /* Estimate as 4 + 4 * #ofbits. */ + *total = COSTS_N_INSNS (132); + return true; + } + return false; + + case UDIV: + case MOD: + case UMOD: + case DIV: + if (!CONST_INT_P (XEXP (x, 1)) + || exact_log2 (INTVAL (XEXP (x, 1)) < 0)) + { + /* Estimate this as 4 + 8 * #of bits. */ + *total = COSTS_N_INSNS (260); + return true; + } + return false; + + case AND: + if (CONST_INT_P (XEXP (x, 1)) + /* Two constants may actually happen before optimization. */ + && !CONST_INT_P (XEXP (x, 0)) + && !satisfies_constraint_I (XEXP (x, 1))) + { + *total + = (rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, + opno, speed) + 2 + + 2 * GET_MODE_NUNITS (GET_MODE (XEXP (x, 0)))); + return true; + } + return false; + + case ZERO_EXTRACT: + if (outer_code != COMPARE) + return false; + /* fall through */ + + case ZERO_EXTEND: case SIGN_EXTEND: + *total = rtx_cost (XEXP (x, 0), (enum rtx_code) outer_code, opno, speed); + return true; + + default: + return false; + } +} + +/* The ADDRESS_COST worker. */ + +static int +cris_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) +{ + /* The metric to use for the cost-macros is unclear. + The metric used here is (the number of cycles needed) / 2, + where we consider equal a cycle for a word of code and a cycle to + read memory. FIXME: Adding "+ 1" to all values would avoid + returning 0, as tree-ssa-loop-ivopts.c as of r128272 "normalizes" + 0 to 1, thereby giving equal costs to [rN + rM] and [rN]. + Unfortunately(?) such a hack would expose other pessimizations, + at least with g++.dg/tree-ssa/ivopts-1.C, adding insns to the + loop there, without apparent reason. */ + + /* The cheapest addressing modes get 0, since nothing extra is needed. */ + if (cris_base_or_autoincr_p (x, false)) + return 0; + + /* An indirect mem must be a DIP. This means two bytes extra for code, + and 4 bytes extra for memory read, i.e. (2 + 4) / 2. */ + if (MEM_P (x)) + return (2 + 4) / 2; + + /* Assume (2 + 4) / 2 for a single constant; a dword, since it needs + an extra DIP prefix and 4 bytes of constant in most cases. */ + if (CONSTANT_P (x)) + return (2 + 4) / 2; + + /* Handle BIAP and BDAP prefixes. */ + if (GET_CODE (x) == PLUS) + { + rtx tem1 = XEXP (x, 0); + rtx tem2 = XEXP (x, 1); + + /* Local extended canonicalization rule: the first operand must + be REG, unless it's an operation (MULT). */ + if (!REG_P (tem1) && GET_CODE (tem1) != MULT) + tem1 = tem2, tem2 = XEXP (x, 0); + + /* We'll "assume" we have canonical RTX now. */ + gcc_assert (REG_P (tem1) || GET_CODE (tem1) == MULT); + + /* A BIAP is 2 extra bytes for the prefix insn, nothing more. We + recognize the typical MULT which is always in tem1 because of + insn canonicalization. */ + if ((GET_CODE (tem1) == MULT && cris_biap_index_p (tem1, false)) + || REG_P (tem2)) + return 2 / 2; + + /* A BDAP (quick) is 2 extra bytes. Any constant operand to the + PLUS is always found in tem2. */ + if (CONST_INT_P (tem2) && INTVAL (tem2) < 128 && INTVAL (tem2) >= -128) + return 2 / 2; + + /* A BDAP -32768 .. 32767 is like BDAP quick, but with 2 extra + bytes. */ + if (satisfies_constraint_L (tem2)) + return (2 + 2) / 2; + + /* A BDAP with some other constant is 2 bytes extra. */ + if (CONSTANT_P (tem2)) + return (2 + 2 + 2) / 2; + + /* BDAP with something indirect should have a higher cost than + BIAP with register. FIXME: Should it cost like a MEM or more? */ + return (2 + 2 + 2) / 2; + } + + /* What else? Return a high cost. It matters only for valid + addressing modes. */ + return 10; +} + +/* Check various objections to the side-effect. Used in the test-part + of an anonymous insn describing an insn with a possible side-effect. + Returns nonzero if the implied side-effect is ok. + + code : PLUS or MULT + ops : An array of rtx:es. lreg, rreg, rval, + The variables multop and other_op are indexes into this, + or -1 if they are not applicable. + lreg : The register that gets assigned in the side-effect. + rreg : One register in the side-effect expression + rval : The other register, or an int. + multop : An integer to multiply rval with. + other_op : One of the entities of the main effect, + whose mode we must consider. */ + +int +cris_side_effect_mode_ok (enum rtx_code code, rtx *ops, + int lreg, int rreg, int rval, + int multop, int other_op) +{ + /* Find what value to multiply with, for rx =ry + rz * n. */ + int mult = multop < 0 ? 1 : INTVAL (ops[multop]); + + rtx reg_rtx = ops[rreg]; + rtx val_rtx = ops[rval]; + + /* The operands may be swapped. Canonicalize them in reg_rtx and + val_rtx, where reg_rtx always is a reg (for this constraint to + match). */ + if (! cris_base_p (reg_rtx, reload_in_progress || reload_completed)) + reg_rtx = val_rtx, val_rtx = ops[rreg]; + + /* Don't forget to check that reg_rtx really is a reg. If it isn't, + we have no business. */ + if (! cris_base_p (reg_rtx, reload_in_progress || reload_completed)) + return 0; + + /* Don't do this when -mno-split. */ + if (!TARGET_SIDE_EFFECT_PREFIXES) + return 0; + + /* The mult expression may be hidden in lreg. FIXME: Add more + commentary about that. */ + if (GET_CODE (val_rtx) == MULT) + { + mult = INTVAL (XEXP (val_rtx, 1)); + val_rtx = XEXP (val_rtx, 0); + code = MULT; + } + + /* First check the "other operand". */ + if (other_op >= 0) + { + if (GET_MODE_SIZE (GET_MODE (ops[other_op])) > UNITS_PER_WORD) + return 0; + + /* Check if the lvalue register is the same as the "other + operand". If so, the result is undefined and we shouldn't do + this. FIXME: Check again. */ + if ((cris_base_p (ops[lreg], reload_in_progress || reload_completed) + && cris_base_p (ops[other_op], + reload_in_progress || reload_completed) + && REGNO (ops[lreg]) == REGNO (ops[other_op])) + || rtx_equal_p (ops[other_op], ops[lreg])) + return 0; + } + + /* Do not accept frame_pointer_rtx as any operand. */ + if (ops[lreg] == frame_pointer_rtx || ops[rreg] == frame_pointer_rtx + || ops[rval] == frame_pointer_rtx + || (other_op >= 0 && ops[other_op] == frame_pointer_rtx)) + return 0; + + if (code == PLUS + && ! cris_base_p (val_rtx, reload_in_progress || reload_completed)) + { + + /* Do not allow rx = rx + n if a normal add or sub with same size + would do. */ + if (rtx_equal_p (ops[lreg], reg_rtx) + && CONST_INT_P (val_rtx) + && (INTVAL (val_rtx) <= 63 && INTVAL (val_rtx) >= -63)) + return 0; + + /* Check allowed cases, like [r(+)?].[bwd] and const. */ + if (CONSTANT_P (val_rtx)) + return 1; + + if (MEM_P (val_rtx) + && cris_base_or_autoincr_p (XEXP (val_rtx, 0), + reload_in_progress || reload_completed)) + return 1; + + if (GET_CODE (val_rtx) == SIGN_EXTEND + && MEM_P (XEXP (val_rtx, 0)) + && cris_base_or_autoincr_p (XEXP (XEXP (val_rtx, 0), 0), + reload_in_progress || reload_completed)) + return 1; + + /* If we got here, it's not a valid addressing mode. */ + return 0; + } + else if (code == MULT + || (code == PLUS + && cris_base_p (val_rtx, + reload_in_progress || reload_completed))) + { + /* Do not allow rx = rx + ry.S, since it doesn't give better code. */ + if (rtx_equal_p (ops[lreg], reg_rtx) + || (mult == 1 && rtx_equal_p (ops[lreg], val_rtx))) + return 0; + + /* Do not allow bad multiply-values. */ + if (mult != 1 && mult != 2 && mult != 4) + return 0; + + /* Only allow r + ... */ + if (! cris_base_p (reg_rtx, reload_in_progress || reload_completed)) + return 0; + + /* If we got here, all seems ok. + (All checks need to be done above). */ + return 1; + } + + /* If we get here, the caller got its initial tests wrong. */ + internal_error ("internal error: cris_side_effect_mode_ok with bad operands"); +} + +/* Whether next_cc0_user of insn is LE or GT or requires a real compare + insn for other reasons. */ + +bool +cris_cc0_user_requires_cmp (rtx insn) +{ + rtx cc0_user = NULL; + rtx body; + rtx set; + + gcc_assert (insn != NULL); + + if (!TARGET_V32) + return false; + + cc0_user = next_cc0_user (insn); + if (cc0_user == NULL) + return false; + + body = PATTERN (cc0_user); + set = single_set (cc0_user); + + /* Users can be sCC and bCC. */ + if (JUMP_P (cc0_user) + && GET_CODE (body) == SET + && SET_DEST (body) == pc_rtx + && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE + && XEXP (XEXP (SET_SRC (body), 0), 0) == cc0_rtx) + { + return + GET_CODE (XEXP (SET_SRC (body), 0)) == GT + || GET_CODE (XEXP (SET_SRC (body), 0)) == LE; + } + else if (set) + { + return + GET_CODE (SET_SRC (body)) == GT + || GET_CODE (SET_SRC (body)) == LE; + } + + gcc_unreachable (); +} + +/* The function reg_overlap_mentioned_p in CVS (still as of 2001-05-16) + does not handle the case where the IN operand is strict_low_part; it + does handle it for X. Test-case in Axis-20010516. This function takes + care of that for THIS port. FIXME: strict_low_part is going away + anyway. */ + +static int +cris_reg_overlap_mentioned_p (rtx x, rtx in) +{ + /* The function reg_overlap_mentioned now handles when X is + strict_low_part, but not when IN is a STRICT_LOW_PART. */ + if (GET_CODE (in) == STRICT_LOW_PART) + in = XEXP (in, 0); + + return reg_overlap_mentioned_p (x, in); +} + +/* Return TRUE iff X is a CONST valid for e.g. indexing. + ANY_OPERAND is 0 if X is in a CALL_P insn or movsi, 1 + elsewhere. */ + +bool +cris_valid_pic_const (const_rtx x, bool any_operand) +{ + gcc_assert (flag_pic); + + switch (GET_CODE (x)) + { + case CONST_INT: + case CONST_DOUBLE: + return true; + default: + ; + } + + if (GET_CODE (x) != CONST) + return false; + + x = XEXP (x, 0); + + /* Handle (const (plus (unspec .. UNSPEC_GOTREL) (const_int ...))). */ + if (GET_CODE (x) == PLUS + && GET_CODE (XEXP (x, 0)) == UNSPEC + && (XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_GOTREL + || XINT (XEXP (x, 0), 1) == CRIS_UNSPEC_PCREL) + && CONST_INT_P (XEXP (x, 1))) + x = XEXP (x, 0); + + if (GET_CODE (x) == UNSPEC) + switch (XINT (x, 1)) + { + /* A PCREL operand is only valid for call and movsi. */ + case CRIS_UNSPEC_PLT_PCREL: + case CRIS_UNSPEC_PCREL: + return !any_operand; + + case CRIS_UNSPEC_PLT_GOTREL: + case CRIS_UNSPEC_PLTGOTREAD: + case CRIS_UNSPEC_GOTREAD: + case CRIS_UNSPEC_GOTREL: + return true; + default: + gcc_unreachable (); + } + + return cris_pic_symbol_type_of (x) == cris_no_symbol; +} + +/* Helper function to find the right PIC-type symbol to generate, + given the original (non-PIC) representation. */ + +enum cris_pic_symbol_type +cris_pic_symbol_type_of (const_rtx x) +{ + switch (GET_CODE (x)) + { + case SYMBOL_REF: + return SYMBOL_REF_LOCAL_P (x) + ? cris_rel_symbol : cris_got_symbol; + + case LABEL_REF: + return cris_rel_symbol; + + case CONST: + return cris_pic_symbol_type_of (XEXP (x, 0)); + + case PLUS: + case MINUS: + { + enum cris_pic_symbol_type t1 = cris_pic_symbol_type_of (XEXP (x, 0)); + enum cris_pic_symbol_type t2 = cris_pic_symbol_type_of (XEXP (x, 1)); + + gcc_assert (t1 == cris_no_symbol || t2 == cris_no_symbol); + + if (t1 == cris_got_symbol || t2 == cris_got_symbol) + return cris_got_symbol_needing_fixup; + + return t1 != cris_no_symbol ? t1 : t2; + } + + case CONST_INT: + case CONST_DOUBLE: + return cris_no_symbol; + + case UNSPEC: + /* Likely an offsettability-test attempting to add a constant to + a GOTREAD symbol, which can't be handled. */ + return cris_invalid_pic_symbol; + + default: + fatal_insn ("unrecognized supposed constant", x); + } + + gcc_unreachable (); +} + +/* The LEGITIMATE_PIC_OPERAND_P worker. */ + +int +cris_legitimate_pic_operand (rtx x) +{ + /* Symbols are not valid PIC operands as-is; just constants. */ + return cris_valid_pic_const (x, true); +} + +/* Queue an .ident string in the queue of top-level asm statements. + If the front-end is done, we must be being called from toplev.c. + In that case, do nothing. */ +void +cris_asm_output_ident (const char *string) +{ + if (cgraph_state != CGRAPH_STATE_PARSING) + return; + + default_asm_output_ident_directive (string); +} + +/* The ASM_OUTPUT_CASE_END worker. */ + +void +cris_asm_output_case_end (FILE *stream, int num, rtx table) +{ + /* Step back, over the label for the table, to the actual casejump and + assert that we find only what's expected. */ + rtx whole_jump_insn = prev_nonnote_nondebug_insn (table); + gcc_assert (whole_jump_insn != NULL_RTX && LABEL_P (whole_jump_insn)); + whole_jump_insn = prev_nonnote_nondebug_insn (whole_jump_insn); + gcc_assert (whole_jump_insn != NULL_RTX + && (JUMP_P (whole_jump_insn) + || (TARGET_V32 && INSN_P (whole_jump_insn) + && GET_CODE (PATTERN (whole_jump_insn)) == SEQUENCE))); + /* Get the pattern of the casejump, so we can extract the default label. */ + whole_jump_insn = PATTERN (whole_jump_insn); + + if (TARGET_V32) + { + /* This can be a SEQUENCE, meaning the delay-slot of the jump is + filled. We also output the offset word a little differently. */ + rtx parallel_jump + = (GET_CODE (whole_jump_insn) == SEQUENCE + ? PATTERN (XVECEXP (whole_jump_insn, 0, 0)) : whole_jump_insn); + + asm_fprintf (stream, + "\t.word %LL%d-.%s\n", + CODE_LABEL_NUMBER (XEXP (XEXP (XEXP (XVECEXP + (parallel_jump, 0, 0), + 1), 2), 0)), + (TARGET_PDEBUG ? "; default" : "")); + return; + } + + asm_fprintf (stream, + "\t.word %LL%d-%LL%d%s\n", + CODE_LABEL_NUMBER (XEXP + (XEXP + (XEXP (XVECEXP (whole_jump_insn, 0, 0), 1), + 2), 0)), + num, + (TARGET_PDEBUG ? "; default" : "")); +} + +/* The TARGET_OPTION_OVERRIDE worker. + As is the norm, this also parses -mfoo=bar type parameters. */ + +static void +cris_option_override (void) +{ + if (cris_max_stackframe_str) + { + cris_max_stackframe = atoi (cris_max_stackframe_str); + + /* Do some sanity checking. */ + if (cris_max_stackframe < 0 || cris_max_stackframe > 0x20000000) + internal_error ("-max-stackframe=%d is not usable, not between 0 and %d", + cris_max_stackframe, 0x20000000); + } + + /* Let "-metrax4" and "-metrax100" change the cpu version. */ + if (TARGET_SVINTO && cris_cpu_version < CRIS_CPU_SVINTO) + cris_cpu_version = CRIS_CPU_SVINTO; + else if (TARGET_ETRAX4_ADD && cris_cpu_version < CRIS_CPU_ETRAX4) + cris_cpu_version = CRIS_CPU_ETRAX4; + + /* Parse -march=... and its synonym, the deprecated -mcpu=... */ + if (cris_cpu_str) + { + cris_cpu_version + = (*cris_cpu_str == 'v' ? atoi (cris_cpu_str + 1) : -1); + + if (strcmp ("etrax4", cris_cpu_str) == 0) + cris_cpu_version = 3; + + if (strcmp ("svinto", cris_cpu_str) == 0 + || strcmp ("etrax100", cris_cpu_str) == 0) + cris_cpu_version = 8; + + if (strcmp ("ng", cris_cpu_str) == 0 + || strcmp ("etrax100lx", cris_cpu_str) == 0) + cris_cpu_version = 10; + + if (cris_cpu_version < 0 || cris_cpu_version > 32) + error ("unknown CRIS version specification in -march= or -mcpu= : %s", + cris_cpu_str); + + /* Set the target flags. */ + if (cris_cpu_version >= CRIS_CPU_ETRAX4) + target_flags |= MASK_ETRAX4_ADD; + + /* If this is Svinto or higher, align for 32 bit accesses. */ + if (cris_cpu_version >= CRIS_CPU_SVINTO) + target_flags + |= (MASK_SVINTO | MASK_ALIGN_BY_32 + | MASK_STACK_ALIGN | MASK_CONST_ALIGN + | MASK_DATA_ALIGN); + + /* Note that we do not add new flags when it can be completely + described with a macro that uses -mcpu=X. So + TARGET_HAS_MUL_INSNS is (cris_cpu_version >= CRIS_CPU_NG). */ + } + + if (cris_tune_str) + { + int cris_tune + = (*cris_tune_str == 'v' ? atoi (cris_tune_str + 1) : -1); + + if (strcmp ("etrax4", cris_tune_str) == 0) + cris_tune = 3; + + if (strcmp ("svinto", cris_tune_str) == 0 + || strcmp ("etrax100", cris_tune_str) == 0) + cris_tune = 8; + + if (strcmp ("ng", cris_tune_str) == 0 + || strcmp ("etrax100lx", cris_tune_str) == 0) + cris_tune = 10; + + if (cris_tune < 0 || cris_tune > 32) + error ("unknown CRIS cpu version specification in -mtune= : %s", + cris_tune_str); + + if (cris_tune >= CRIS_CPU_SVINTO) + /* We have currently nothing more to tune than alignment for + memory accesses. */ + target_flags + |= (MASK_STACK_ALIGN | MASK_CONST_ALIGN + | MASK_DATA_ALIGN | MASK_ALIGN_BY_32); + } + + if (cris_cpu_version >= CRIS_CPU_V32) + target_flags &= ~(MASK_SIDE_EFFECT_PREFIXES|MASK_MUL_BUG); + + if (flag_pic) + { + /* Use error rather than warning, so invalid use is easily + detectable. Still change to the values we expect, to avoid + further errors. */ + if (! TARGET_LINUX) + { + error ("-fPIC and -fpic are not supported in this configuration"); + flag_pic = 0; + } + + /* Turn off function CSE. We need to have the addresses reach the + call expanders to get PLT-marked, as they could otherwise be + compared against zero directly or indirectly. After visiting the + call expanders they will then be cse:ed, as the call expanders + force_reg the addresses, effectively forcing flag_no_function_cse + to 0. */ + flag_no_function_cse = 1; + } + + /* Set the per-function-data initializer. */ + init_machine_status = cris_init_machine_status; +} + +/* The TARGET_ASM_OUTPUT_MI_THUNK worker. */ + +static void +cris_asm_output_mi_thunk (FILE *stream, + tree thunkdecl ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, + tree funcdecl) +{ + /* Make sure unwind info is emitted for the thunk if needed. */ + final_start_function (emit_barrier (), stream, 1); + + if (delta > 0) + fprintf (stream, "\tadd%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n", + ADDITIVE_SIZE_MODIFIER (delta), delta, + reg_names[CRIS_FIRST_ARG_REG]); + else if (delta < 0) + fprintf (stream, "\tsub%s " HOST_WIDE_INT_PRINT_DEC ",$%s\n", + ADDITIVE_SIZE_MODIFIER (-delta), -delta, + reg_names[CRIS_FIRST_ARG_REG]); + + if (flag_pic) + { + const char *name = XSTR (XEXP (DECL_RTL (funcdecl), 0), 0); + + name = (* targetm.strip_name_encoding) (name); + + if (TARGET_V32) + { + fprintf (stream, "\tba "); + assemble_name (stream, name); + fprintf (stream, "%s\n", CRIS_PLT_PCOFFSET_SUFFIX); + } + else + { + fprintf (stream, "add.d "); + assemble_name (stream, name); + fprintf (stream, "%s,$pc\n", CRIS_PLT_PCOFFSET_SUFFIX); + } + } + else + { + fprintf (stream, "jump "); + assemble_name (stream, XSTR (XEXP (DECL_RTL (funcdecl), 0), 0)); + fprintf (stream, "\n"); + + if (TARGET_V32) + fprintf (stream, "\tnop\n"); + } + + final_end_function (); +} + +/* Boilerplate emitted at start of file. + + NO_APP *only at file start* means faster assembly. It also means + comments are not allowed. In some cases comments will be output + for debugging purposes. Make sure they are allowed then. */ +static void +cris_file_start (void) +{ + /* These expressions can vary at run time, so we cannot put + them into TARGET_INITIALIZER. */ + targetm.asm_file_start_app_off = !(TARGET_PDEBUG || flag_print_asm_name); + + default_file_start (); +} + +/* Output that goes at the end of the file, similarly. */ + +static void +cris_file_end (void) +{ + /* For CRIS, the default is to assume *no* executable stack, so output + an executable-stack-note only when needed. */ + if (TARGET_LINUX && trampolines_created) + file_end_indicate_exec_stack (); +} + +/* Rename the function calls for integer multiply and divide. */ +static void +cris_init_libfuncs (void) +{ + set_optab_libfunc (smul_optab, SImode, "__Mul"); + set_optab_libfunc (sdiv_optab, SImode, "__Div"); + set_optab_libfunc (udiv_optab, SImode, "__Udiv"); + set_optab_libfunc (smod_optab, SImode, "__Mod"); + set_optab_libfunc (umod_optab, SImode, "__Umod"); + + /* Atomic data being unaligned is unfortunately a reality. + Deal with it. */ + if (TARGET_ATOMICS_MAY_CALL_LIBFUNCS) + { + set_optab_libfunc (sync_compare_and_swap_optab, SImode, + "__cris_atcmpxchgr32"); + set_optab_libfunc (sync_compare_and_swap_optab, HImode, + "__cris_atcmpxchgr16"); + } +} + +/* The INIT_EXPANDERS worker sets the per-function-data initializer and + mark functions. */ + +void +cris_init_expanders (void) +{ + /* Nothing here at the moment. */ +} + +/* Zero initialization is OK for all current fields. */ + +static struct machine_function * +cris_init_machine_status (void) +{ + return ggc_alloc_cleared_machine_function (); +} + +/* Split a 2 word move (DI or presumably DF) into component parts. + Originally a copy of gen_split_move_double in m32r.c. */ + +rtx +cris_split_movdx (rtx *operands) +{ + enum machine_mode mode = GET_MODE (operands[0]); + rtx dest = operands[0]; + rtx src = operands[1]; + rtx val; + + /* We used to have to handle (SUBREG (MEM)) here, but that should no + longer happen; after reload there are no SUBREGs any more, and we're + only called after reload. */ + CRIS_ASSERT (GET_CODE (dest) != SUBREG && GET_CODE (src) != SUBREG); + + start_sequence (); + if (REG_P (dest)) + { + int dregno = REGNO (dest); + + /* Reg-to-reg copy. */ + if (REG_P (src)) + { + int sregno = REGNO (src); + + int reverse = (dregno == sregno + 1); + + /* We normally copy the low-numbered register first. However, if + the first register operand 0 is the same as the second register of + operand 1, we must copy in the opposite order. */ + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, reverse, TRUE, mode), + operand_subword (src, reverse, TRUE, mode))); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, !reverse, TRUE, mode), + operand_subword (src, !reverse, TRUE, mode))); + } + /* Constant-to-reg copy. */ + else if (CONST_INT_P (src) || GET_CODE (src) == CONST_DOUBLE) + { + rtx words[2]; + split_double (src, &words[0], &words[1]); + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 0, TRUE, mode), + words[0])); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 1, TRUE, mode), + words[1])); + } + /* Mem-to-reg copy. */ + else if (MEM_P (src)) + { + /* If the high-address word is used in the address, we must load it + last. Otherwise, load it first. */ + rtx addr = XEXP (src, 0); + int reverse + = (refers_to_regno_p (dregno, dregno + 1, addr, NULL) != 0); + + /* The original code implies that we can't do + move.x [rN+],rM move.x [rN],rM+1 + when rN is dead, because of REG_NOTES damage. That is + consistent with what I've seen, so don't try it. + + We have two different cases here; if the addr is POST_INC, + just pass it through, otherwise add constants. */ + + if (GET_CODE (addr) == POST_INC) + { + rtx mem; + rtx insn; + + /* Whenever we emit insns with post-incremented + addresses ourselves, we must add a post-inc note + manually. */ + mem = change_address (src, SImode, addr); + insn + = gen_rtx_SET (VOIDmode, + operand_subword (dest, 0, TRUE, mode), mem); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + + mem = copy_rtx (mem); + insn + = gen_rtx_SET (VOIDmode, + operand_subword (dest, 1, TRUE, mode), mem); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + } + else + { + /* Make sure we don't get any other addresses with + embedded postincrements. They should be stopped in + GO_IF_LEGITIMATE_ADDRESS, but we're here for your + safety. */ + if (side_effects_p (addr)) + fatal_insn ("unexpected side-effects in address", addr); + + emit_insn (gen_rtx_SET + (VOIDmode, + operand_subword (dest, reverse, TRUE, mode), + change_address + (src, SImode, + plus_constant (Pmode, addr, + reverse * UNITS_PER_WORD)))); + emit_insn (gen_rtx_SET + (VOIDmode, + operand_subword (dest, ! reverse, TRUE, mode), + change_address + (src, SImode, + plus_constant (Pmode, addr, + (! reverse) * + UNITS_PER_WORD)))); + } + } + else + internal_error ("unknown src"); + } + /* Reg-to-mem copy or clear mem. */ + else if (MEM_P (dest) + && (REG_P (src) + || src == const0_rtx + || src == CONST0_RTX (DFmode))) + { + rtx addr = XEXP (dest, 0); + + if (GET_CODE (addr) == POST_INC) + { + rtx mem; + rtx insn; + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + mem = change_address (dest, SImode, addr); + insn + = gen_rtx_SET (VOIDmode, + mem, operand_subword (src, 0, TRUE, mode)); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + + mem = copy_rtx (mem); + insn + = gen_rtx_SET (VOIDmode, + mem, + operand_subword (src, 1, TRUE, mode)); + insn = emit_insn (insn); + if (GET_CODE (XEXP (mem, 0)) == POST_INC) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, XEXP (XEXP (mem, 0), 0), + REG_NOTES (insn)); + } + else + { + /* Make sure we don't get any other addresses with embedded + postincrements. They should be stopped in + GO_IF_LEGITIMATE_ADDRESS, but we're here for your safety. */ + if (side_effects_p (addr)) + fatal_insn ("unexpected side-effects in address", addr); + + emit_insn (gen_rtx_SET + (VOIDmode, + change_address (dest, SImode, addr), + operand_subword (src, 0, TRUE, mode))); + + emit_insn (gen_rtx_SET + (VOIDmode, + change_address (dest, SImode, + plus_constant (Pmode, addr, + UNITS_PER_WORD)), + operand_subword (src, 1, TRUE, mode))); + } + } + + else + internal_error ("unknown dest"); + + val = get_insns (); + end_sequence (); + return val; +} + +/* The expander for the prologue pattern name. */ + +void +cris_expand_prologue (void) +{ + int regno; + int size = get_frame_size (); + /* Shorten the used name for readability. */ + int cfoa_size = crtl->outgoing_args_size; + int last_movem_reg = -1; + int framesize = 0; + rtx mem, insn; + int return_address_on_stack = cris_return_address_on_stack (); + int got_really_used = false; + int n_movem_regs = 0; + int pretend = crtl->args.pretend_args_size; + + /* Don't do anything if no prologues or epilogues are wanted. */ + if (!TARGET_PROLOGUE_EPILOGUE) + return; + + CRIS_ASSERT (size >= 0); + + if (crtl->uses_pic_offset_table) + { + /* A reference may have been optimized out (like the abort () in + fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that + it's still used. */ + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX); + pop_topmost_sequence (); + } + + /* Align the size to what's best for the CPU model. */ + if (TARGET_STACK_ALIGN) + size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1; + + if (pretend) + { + /* See also cris_setup_incoming_varargs where + cfun->machine->stdarg_regs is set. There are other setters of + crtl->args.pretend_args_size than stdarg handling, like + for an argument passed with parts in R13 and stack. We must + not store R13 into the pretend-area for that case, as GCC does + that itself. "Our" store would be marked as redundant and GCC + will attempt to remove it, which will then be flagged as an + internal error; trying to remove a frame-related insn. */ + int stdarg_regs = cfun->machine->stdarg_regs; + + framesize += pretend; + + for (regno = CRIS_FIRST_ARG_REG + CRIS_MAX_ARGS_IN_REGS - 1; + stdarg_regs > 0; + regno--, pretend -= 4, stdarg_regs--) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -4))); + /* FIXME: When dwarf2 frame output and unless asynchronous + exceptions, make dwarf2 bundle together all stack + adjustments like it does for registers between stack + adjustments. */ + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_varargs_alias_set ()); + insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno)); + + /* Note the absence of RTX_FRAME_RELATED_P on the above insn: + the value isn't restored, so we don't want to tell dwarf2 + that it's been stored to stack, else EH handling info would + get confused. */ + } + + /* For other setters of crtl->args.pretend_args_size, we + just adjust the stack by leaving the remaining size in + "pretend", handled below. */ + } + + /* Save SRP if not a leaf function. */ + if (return_address_on_stack) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 - pretend))); + pretend = 0; + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM)); + RTX_FRAME_RELATED_P (insn) = 1; + framesize += 4; + } + + /* Set up the frame pointer, if needed. */ + if (frame_pointer_needed) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -4 - pretend))); + pretend = 0; + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (mem, frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + framesize += 4; + } + + /* Between frame-pointer and saved registers lie the area for local + variables. If we get here with "pretended" size remaining, count + it into the general stack size. */ + size += pretend; + + /* Get a contiguous sequence of registers, starting with R0, that need + to be saved. */ + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + { + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + { + n_movem_regs++; + + /* Check if movem may be used for registers so far. */ + if (regno == last_movem_reg + 1) + /* Yes, update next expected register. */ + last_movem_reg = regno; + else + { + /* We cannot use movem for all registers. We have to flush + any movem:ed registers we got so far. */ + if (last_movem_reg != -1) + { + int n_saved + = (n_movem_regs == 1) ? 1 : last_movem_reg + 1; + + /* It is a win to use a side-effect assignment for + 64 <= size <= 128. But side-effect on movem was + not usable for CRIS v0..3. Also only do it if + side-effects insns are allowed. */ + if ((last_movem_reg + 1) * 4 + size >= 64 + && (last_movem_reg + 1) * 4 + size <= 128 + && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1) + && TARGET_SIDE_EFFECT_PREFIXES) + { + mem + = gen_rtx_MEM (SImode, + plus_constant (Pmode, stack_pointer_rtx, + -(n_saved * 4 + size))); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn + = cris_emit_movem_store (mem, GEN_INT (n_saved), + -(n_saved * 4 + size), + true); + } + else + { + insn + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -(n_saved * 4 + size))); + insn = emit_insn (insn); + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = cris_emit_movem_store (mem, GEN_INT (n_saved), + 0, true); + } + + framesize += n_saved * 4 + size; + last_movem_reg = -1; + size = 0; + } + + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -4 - size))); + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (mem, gen_rtx_raw_REG (SImode, regno)); + RTX_FRAME_RELATED_P (insn) = 1; + + framesize += 4 + size; + size = 0; + } + } + } + + /* Check after, if we could movem all registers. This is the normal case. */ + if (last_movem_reg != -1) + { + int n_saved + = (n_movem_regs == 1) ? 1 : last_movem_reg + 1; + + /* Side-effect on movem was not usable for CRIS v0..3. Also only + do it if side-effects insns are allowed. */ + if ((last_movem_reg + 1) * 4 + size >= 64 + && (last_movem_reg + 1) * 4 + size <= 128 + && (cris_cpu_version >= CRIS_CPU_SVINTO || n_saved == 1) + && TARGET_SIDE_EFFECT_PREFIXES) + { + mem + = gen_rtx_MEM (SImode, + plus_constant (Pmode, stack_pointer_rtx, + -(n_saved * 4 + size))); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = cris_emit_movem_store (mem, GEN_INT (n_saved), + -(n_saved * 4 + size), true); + } + else + { + insn + = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + -(n_saved * 4 + size))); + insn = emit_insn (insn); + RTX_FRAME_RELATED_P (insn) = 1; + + mem = gen_rtx_MEM (SImode, stack_pointer_rtx); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = cris_emit_movem_store (mem, GEN_INT (n_saved), 0, true); + } + + framesize += n_saved * 4 + size; + /* We have to put outgoing argument space after regs. */ + if (cfoa_size) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -cfoa_size))); + RTX_FRAME_RELATED_P (insn) = 1; + framesize += cfoa_size; + } + } + else if ((size + cfoa_size) > 0) + { + insn = emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, + stack_pointer_rtx, + -(cfoa_size + size)))); + RTX_FRAME_RELATED_P (insn) = 1; + framesize += size + cfoa_size; + } + + /* Set up the PIC register, if it is used. */ + if (got_really_used) + { + rtx got + = gen_rtx_UNSPEC (SImode, gen_rtvec (1, const0_rtx), CRIS_UNSPEC_GOT); + emit_move_insn (pic_offset_table_rtx, got); + + /* FIXME: This is a cover-up for flow2 messing up; it doesn't + follow exceptional paths and tries to delete the GOT load as + unused, if it isn't used on the non-exceptional paths. Other + ports have similar or other cover-ups, or plain bugs marking + the GOT register load as maybe-dead. To see this, remove the + line below and try libsupc++/vec.cc or a trivial + "static void y (); void x () {try {y ();} catch (...) {}}". */ + emit_use (pic_offset_table_rtx); + } + + if (cris_max_stackframe && framesize > cris_max_stackframe) + warning (0, "stackframe too big: %d bytes", framesize); +} + +/* The expander for the epilogue pattern. */ + +void +cris_expand_epilogue (void) +{ + int regno; + int size = get_frame_size (); + int last_movem_reg = -1; + int argspace_offset = crtl->outgoing_args_size; + int pretend = crtl->args.pretend_args_size; + rtx mem; + bool return_address_on_stack = cris_return_address_on_stack (); + /* A reference may have been optimized out + (like the abort () in fde_split in unwind-dw2-fde.c, at least 3.2.1) + so check that it's still used. */ + int got_really_used = false; + int n_movem_regs = 0; + + if (!TARGET_PROLOGUE_EPILOGUE) + return; + + if (crtl->uses_pic_offset_table) + { + /* A reference may have been optimized out (like the abort () in + fde_split in unwind-dw2-fde.c, at least 3.2.1) so check that + it's still used. */ + push_topmost_sequence (); + got_really_used + = reg_used_between_p (pic_offset_table_rtx, get_insns (), NULL_RTX); + pop_topmost_sequence (); + } + + /* Align byte count of stack frame. */ + if (TARGET_STACK_ALIGN) + size = TARGET_ALIGN_BY_32 ? (size + 3) & ~3 : (size + 1) & ~1; + + /* Check how many saved regs we can movem. They start at r0 and must + be contiguous. */ + for (regno = 0; + regno < FIRST_PSEUDO_REGISTER; + regno++) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + { + n_movem_regs++; + + if (regno == last_movem_reg + 1) + last_movem_reg = regno; + else + break; + } + + /* If there was only one register that really needed to be saved + through movem, don't use movem. */ + if (n_movem_regs == 1) + last_movem_reg = -1; + + /* Now emit "normal" move insns for all regs higher than the movem + regs. */ + for (regno = FIRST_PSEUDO_REGISTER - 1; + regno > last_movem_reg; + regno--) + if (cris_reg_saved_in_regsave_area (regno, got_really_used)) + { + rtx insn; + + if (argspace_offset) + { + /* There is an area for outgoing parameters located before + the saved registers. We have to adjust for that. */ + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + argspace_offset))); + /* Make sure we only do this once. */ + argspace_offset = 0; + } + + mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (gen_rtx_raw_REG (SImode, regno), mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + + /* If we have any movem-restore, do it now. */ + if (last_movem_reg != -1) + { + rtx insn; + + if (argspace_offset) + { + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + argspace_offset))); + argspace_offset = 0; + } + + mem = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn + = emit_insn (cris_gen_movem_load (mem, + GEN_INT (last_movem_reg + 1), 0)); + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + if (side_effects_p (PATTERN (insn))) + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + + /* If we don't clobber all of the allocated stack area (we've already + deallocated saved registers), GCC might want to schedule loads from + the stack to *after* the stack-pointer restore, which introduces an + interrupt race condition. This happened for the initial-value + SRP-restore for g++.dg/eh/registers1.C (noticed by inspection of + other failure for that test). It also happened for the stack slot + for the return value in (one version of) + linux/fs/dcache.c:__d_lookup, at least with "-O2 + -fno-omit-frame-pointer". */ + + /* Restore frame pointer if necessary. */ + if (frame_pointer_needed) + { + rtx insn; + + emit_insn (gen_cris_frame_deallocated_barrier ()); + + emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); + mem = gen_rtx_MEM (SImode, gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (frame_pointer_rtx, mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + else if ((size + argspace_offset) != 0) + { + emit_insn (gen_cris_frame_deallocated_barrier ()); + + /* If there was no frame-pointer to restore sp from, we must + explicitly deallocate local variables. */ + + /* Handle space for outgoing parameters that hasn't been handled + yet. */ + size += argspace_offset; + + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, size))); + } + + /* If this function has no pushed register parameters + (stdargs/varargs), and if it is not a leaf function, then we have + the return address on the stack. */ + if (return_address_on_stack && pretend == 0) + { + if (TARGET_V32 || crtl->calls_eh_return) + { + rtx mem; + rtx insn; + rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM); + mem = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (srpreg, mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_raw_REG (SImode, + CRIS_STACKADJ_REG))); + cris_expand_return (false); + } + else + cris_expand_return (true); + + return; + } + + /* If we pushed some register parameters, then adjust the stack for + them. */ + if (pretend != 0) + { + /* If SRP is stored on the way, we need to restore it first. */ + if (return_address_on_stack) + { + rtx mem; + rtx srpreg = gen_rtx_raw_REG (SImode, CRIS_SRP_REGNUM); + rtx insn; + + mem = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (mem, get_frame_alias_set ()); + insn = emit_move_insn (srpreg, mem); + + /* Whenever we emit insns with post-incremented addresses + ourselves, we must add a post-inc note manually. */ + REG_NOTES (insn) + = alloc_EXPR_LIST (REG_INC, stack_pointer_rtx, REG_NOTES (insn)); + } + + emit_insn (gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + pretend))); + } + + /* Perform the "physical" unwinding that the EH machinery calculated. */ + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_raw_REG (SImode, + CRIS_STACKADJ_REG))); + cris_expand_return (false); +} + +/* Worker function for generating movem from mem for load_multiple. */ + +rtx +cris_gen_movem_load (rtx src, rtx nregs_rtx, int nprefix) +{ + int nregs = INTVAL (nregs_rtx); + rtvec vec; + int eltno = 1; + int i; + rtx srcreg = XEXP (src, 0); + unsigned int regno = nregs - 1; + int regno_inc = -1; + + if (TARGET_V32) + { + regno = 0; + regno_inc = 1; + } + + if (GET_CODE (srcreg) == POST_INC) + srcreg = XEXP (srcreg, 0); + + CRIS_ASSERT (REG_P (srcreg)); + + /* Don't use movem for just one insn. The insns are equivalent except + for the pipeline hazard (on v32); movem does not forward the loaded + registers so there's a three cycles penalty for their use. */ + if (nregs == 1) + return gen_movsi (gen_rtx_REG (SImode, 0), src); + + vec = rtvec_alloc (nprefix + nregs + + (GET_CODE (XEXP (src, 0)) == POST_INC)); + + if (GET_CODE (XEXP (src, 0)) == POST_INC) + { + RTVEC_ELT (vec, nprefix + 1) + = gen_rtx_SET (VOIDmode, srcreg, + plus_constant (Pmode, srcreg, nregs * 4)); + eltno++; + } + + src = replace_equiv_address (src, srcreg); + RTVEC_ELT (vec, nprefix) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno), src); + regno += regno_inc; + + for (i = 1; i < nregs; i++, eltno++) + { + RTVEC_ELT (vec, nprefix + eltno) + = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regno), + adjust_address_nv (src, SImode, i * 4)); + regno += regno_inc; + } + + return gen_rtx_PARALLEL (VOIDmode, vec); +} + +/* Worker function for generating movem to mem. If FRAME_RELATED, notes + are added that the dwarf2 machinery understands. */ + +rtx +cris_emit_movem_store (rtx dest, rtx nregs_rtx, int increment, + bool frame_related) +{ + int nregs = INTVAL (nregs_rtx); + rtvec vec; + int eltno = 1; + int i; + rtx insn; + rtx destreg = XEXP (dest, 0); + unsigned int regno = nregs - 1; + int regno_inc = -1; + + if (TARGET_V32) + { + regno = 0; + regno_inc = 1; + } + + if (GET_CODE (destreg) == POST_INC) + increment += nregs * 4; + + if (GET_CODE (destreg) == POST_INC || GET_CODE (destreg) == PLUS) + destreg = XEXP (destreg, 0); + + CRIS_ASSERT (REG_P (destreg)); + + /* Don't use movem for just one insn. The insns are equivalent except + for the pipeline hazard (on v32); movem does not forward the loaded + registers so there's a three cycles penalty for use. */ + if (nregs == 1) + { + rtx mov = gen_rtx_SET (VOIDmode, dest, gen_rtx_REG (SImode, 0)); + + if (increment == 0) + { + insn = emit_insn (mov); + if (frame_related) + RTX_FRAME_RELATED_P (insn) = 1; + return insn; + } + + /* If there was a request for a side-effect, create the ordinary + parallel. */ + vec = rtvec_alloc (2); + + RTVEC_ELT (vec, 0) = mov; + RTVEC_ELT (vec, 1) = gen_rtx_SET (VOIDmode, destreg, + plus_constant (Pmode, destreg, + increment)); + if (frame_related) + { + RTX_FRAME_RELATED_P (mov) = 1; + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1; + } + } + else + { + vec = rtvec_alloc (nregs + (increment != 0 ? 1 : 0)); + RTVEC_ELT (vec, 0) + = gen_rtx_SET (VOIDmode, + replace_equiv_address (dest, + plus_constant (Pmode, destreg, + increment)), + gen_rtx_REG (SImode, regno)); + regno += regno_inc; + + /* The dwarf2 info wants this mark on each component in a parallel + that's part of the prologue (though it's optional on the first + component). */ + if (frame_related) + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 0)) = 1; + + if (increment != 0) + { + RTVEC_ELT (vec, 1) + = gen_rtx_SET (VOIDmode, destreg, + plus_constant (Pmode, destreg, + increment != 0 + ? increment : nregs * 4)); + eltno++; + + if (frame_related) + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, 1)) = 1; + + /* Don't call adjust_address_nv on a post-incremented address if + we can help it. */ + if (GET_CODE (XEXP (dest, 0)) == POST_INC) + dest = replace_equiv_address (dest, destreg); + } + + for (i = 1; i < nregs; i++, eltno++) + { + RTVEC_ELT (vec, eltno) + = gen_rtx_SET (VOIDmode, adjust_address_nv (dest, SImode, i * 4), + gen_rtx_REG (SImode, regno)); + if (frame_related) + RTX_FRAME_RELATED_P (RTVEC_ELT (vec, eltno)) = 1; + regno += regno_inc; + } + } + + insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, vec)); + + /* Because dwarf2out.c handles the insns in a parallel as a sequence, + we need to keep the stack adjustment separate, after the + MEM-setters. Else the stack-adjustment in the second component of + the parallel would be mishandled; the offsets for the SETs that + follow it would be wrong. We prepare for this by adding a + REG_FRAME_RELATED_EXPR with the MEM-setting parts in a SEQUENCE + followed by the increment. Note that we have FRAME_RELATED_P on + all the SETs, including the original stack adjustment SET in the + parallel. */ + if (frame_related) + { + if (increment != 0) + { + rtx seq = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (nregs + 1)); + XVECEXP (seq, 0, 0) = copy_rtx (XVECEXP (PATTERN (insn), 0, 0)); + for (i = 1; i < nregs; i++) + XVECEXP (seq, 0, i) + = copy_rtx (XVECEXP (PATTERN (insn), 0, i + 1)); + XVECEXP (seq, 0, nregs) = copy_rtx (XVECEXP (PATTERN (insn), 0, 1)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, seq); + } + + RTX_FRAME_RELATED_P (insn) = 1; + } + + return insn; +} + +/* Worker function for expanding the address for PIC function calls. */ + +void +cris_expand_pic_call_address (rtx *opp) +{ + rtx op = *opp; + + gcc_assert (MEM_P (op)); + op = XEXP (op, 0); + + /* It might be that code can be generated that jumps to 0 (or to a + specific address). Don't die on that. (There is a + testcase.) */ + if (CONSTANT_ADDRESS_P (op) && !CONST_INT_P (op)) + { + enum cris_pic_symbol_type t = cris_pic_symbol_type_of (op); + + CRIS_ASSERT (can_create_pseudo_p ()); + + /* For local symbols (non-PLT), just get the plain symbol + reference into a register. For symbols that can be PLT, make + them PLT. */ + if (t == cris_rel_symbol) + { + /* For v32, we're fine as-is; just PICify the symbol. Forcing + into a register caused performance regression for 3.2.1, + observable in __floatdidf and elsewhere in libgcc. */ + if (TARGET_V32) + { + rtx sym = GET_CODE (op) != CONST ? op : get_related_value (op); + HOST_WIDE_INT offs = get_integer_term (op); + + /* We can't get calls to sym+N, N integer, can we? */ + gcc_assert (offs == 0); + + op = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), + CRIS_UNSPEC_PCREL)); + } + else + op = force_reg (Pmode, op); + } + else if (t == cris_got_symbol) + { + if (TARGET_AVOID_GOTPLT) + { + /* Change a "jsr sym" into (allocate register rM, rO) + "move.d (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_GOTREL)),rM" + "add.d rPIC,rM,rO", "jsr rO" for pre-v32 and + "jsr (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_PCREL))" + for v32. */ + rtx tem, rm, ro; + gcc_assert (can_create_pseudo_p ()); + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), + TARGET_V32 + ? CRIS_UNSPEC_PLT_PCREL + : CRIS_UNSPEC_PLT_GOTREL); + tem = gen_rtx_CONST (Pmode, tem); + if (TARGET_V32) + op = tem; + else + { + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, tem); + ro = gen_reg_rtx (Pmode); + if (expand_binop (Pmode, add_optab, rm, + pic_offset_table_rtx, + ro, 0, OPTAB_LIB_WIDEN) != ro) + internal_error ("expand_binop failed in movsi got"); + op = ro; + } + } + else + { + /* Change a "jsr sym" into (allocate register rM, rO) + "move.d (const (unspec [sym] CRIS_UNSPEC_PLTGOTREAD)),rM" + "add.d rPIC,rM,rO" "jsr [rO]" with the memory access + marked as not trapping and not aliasing. No "move.d + [rO],rP" as that would invite to re-use of a value + that should not be reused. FIXME: Need a peephole2 + for cases when this is cse:d from the call, to change + back to just get the PLT entry address, so we don't + resolve the same symbol over and over (the memory + access of the PLTGOT isn't constant). */ + rtx tem, mem, rm, ro; + + gcc_assert (can_create_pseudo_p ()); + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), + CRIS_UNSPEC_PLTGOTREAD); + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + ro = gen_reg_rtx (Pmode); + if (expand_binop (Pmode, add_optab, rm, + pic_offset_table_rtx, + ro, 0, OPTAB_LIB_WIDEN) != ro) + internal_error ("expand_binop failed in movsi got"); + mem = gen_rtx_MEM (Pmode, ro); + + /* This MEM doesn't alias anything. Whether it aliases + other same symbols is unimportant. */ + set_mem_alias_set (mem, new_alias_set ()); + MEM_NOTRAP_P (mem) = 1; + op = mem; + } + } + else + /* Can't possibly get a GOT-needing-fixup for a function-call, + right? */ + fatal_insn ("unidentifiable call op", op); + + *opp = replace_equiv_address (*opp, op); + } +} + +/* Make sure operands are in the right order for an addsi3 insn as + generated by a define_split. Nothing but REG_P as the first + operand is recognized by addsi3 after reload. OPERANDS contains + the operands, with the first at OPERANDS[N] and the second at + OPERANDS[N+1]. */ + +void +cris_order_for_addsi3 (rtx *operands, int n) +{ + if (!REG_P (operands[n])) + { + rtx tem = operands[n]; + operands[n] = operands[n + 1]; + operands[n + 1] = tem; + } +} + +/* Use from within code, from e.g. PRINT_OPERAND and + PRINT_OPERAND_ADDRESS. Macros used in output_addr_const need to emit + different things depending on whether code operand or constant is + emitted. */ + +static void +cris_output_addr_const (FILE *file, rtx x) +{ + in_code++; + output_addr_const (file, x); + in_code--; +} + +/* Worker function for ASM_OUTPUT_SYMBOL_REF. */ + +void +cris_asm_output_symbol_ref (FILE *file, rtx x) +{ + gcc_assert (GET_CODE (x) == SYMBOL_REF); + + if (flag_pic && in_code > 0) + { + const char *origstr = XSTR (x, 0); + const char *str; + str = (* targetm.strip_name_encoding) (origstr); + assemble_name (file, str); + + /* Sanity check. */ + if (!TARGET_V32 && !crtl->uses_pic_offset_table) + output_operand_lossage ("PIC register isn't set up"); + } + else + assemble_name (file, XSTR (x, 0)); +} + +/* Worker function for ASM_OUTPUT_LABEL_REF. */ + +void +cris_asm_output_label_ref (FILE *file, char *buf) +{ + if (flag_pic && in_code > 0) + { + assemble_name (file, buf); + + /* Sanity check. */ + if (!TARGET_V32 && !crtl->uses_pic_offset_table) + internal_error ("emitting PIC operand, but PIC register " + "isn%'t set up"); + } + else + assemble_name (file, buf); +} + +/* Worker function for TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +cris_output_addr_const_extra (FILE *file, rtx xconst) +{ + switch (GET_CODE (xconst)) + { + rtx x; + + case UNSPEC: + x = XVECEXP (xconst, 0, 0); + CRIS_ASSERT (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == LABEL_REF + || GET_CODE (x) == CONST); + output_addr_const (file, x); + switch (XINT (xconst, 1)) + { + case CRIS_UNSPEC_PCREL: + /* We only get this with -fpic/PIC to tell it apart from an + invalid symbol. We can't tell here, but it should only + be the operand of a call or movsi. */ + gcc_assert (TARGET_V32 && flag_pic); + break; + + case CRIS_UNSPEC_PLT_PCREL: + gcc_assert (TARGET_V32); + fprintf (file, ":PLT"); + break; + + case CRIS_UNSPEC_PLT_GOTREL: + gcc_assert (!TARGET_V32); + fprintf (file, ":PLTG"); + break; + + case CRIS_UNSPEC_GOTREL: + gcc_assert (!TARGET_V32); + fprintf (file, ":GOTOFF"); + break; + + case CRIS_UNSPEC_GOTREAD: + if (flag_pic == 1) + fprintf (file, ":GOT16"); + else + fprintf (file, ":GOT"); + break; + + case CRIS_UNSPEC_PLTGOTREAD: + if (flag_pic == 1) + fprintf (file, CRIS_GOTPLT_SUFFIX "16"); + else + fprintf (file, CRIS_GOTPLT_SUFFIX); + break; + + default: + gcc_unreachable (); + } + return true; + + default: + return false; + } +} + +/* Worker function for TARGET_STRUCT_VALUE_RTX. */ + +static rtx +cris_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED, + int incoming ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (Pmode, CRIS_STRUCT_VALUE_REGNUM); +} + +/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ + +static void +cris_setup_incoming_varargs (cumulative_args_t ca_v, + enum machine_mode mode ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED, + int *pretend_arg_size, + int second_time) +{ + CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); + + if (ca->regs < CRIS_MAX_ARGS_IN_REGS) + { + int stdarg_regs = CRIS_MAX_ARGS_IN_REGS - ca->regs; + cfun->machine->stdarg_regs = stdarg_regs; + *pretend_arg_size = stdarg_regs * 4; + } + + if (TARGET_PDEBUG) + fprintf (asm_out_file, + "\n; VA:: ANSI: %d args before, anon @ #%d, %dtime\n", + ca->regs, *pretend_arg_size, second_time); +} + +/* Return true if TYPE must be passed by invisible reference. + For cris, we pass <= 8 bytes by value, others by reference. */ + +static bool +cris_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + return (targetm.calls.must_pass_in_stack (mode, type) + || CRIS_FUNCTION_ARG_SIZE (mode, type) > 8); +} + +/* A combination of defining TARGET_PROMOTE_FUNCTION_MODE, promoting arguments + and *not* defining TARGET_PROMOTE_PROTOTYPES or PROMOTE_MODE gives the + best code size and speed for gcc, ipps and products in gcc-2.7.2. */ + +enum machine_mode +cris_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, + enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return) +{ + /* Defining PROMOTE_FUNCTION_RETURN in gcc-2.7.2 uncovered bug 981110 (even + when modifying TARGET_FUNCTION_VALUE to return the promoted mode). + Maybe pointless as of now, but let's keep the old behavior. */ + if (for_return == 1) + return mode; + return CRIS_PROMOTED_MODE (mode, *punsignedp, type); +} + +/* Atomic types require alignment to be at least their "natural" size. */ + +static unsigned int +cris_atomic_align_for_mode (enum machine_mode mode) +{ + return GET_MODE_BITSIZE (mode); +} + +/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the + time being. */ + +static rtx +cris_function_value(const_tree type, + const_tree func ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (TYPE_MODE (type), CRIS_FIRST_ARG_REG); +} + +/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the + time being. */ + +static rtx +cris_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG); +} + +/* Let's assume all functions return in r[CRIS_FIRST_ARG_REG] for the + time being. */ + +static bool +cris_function_value_regno_p (const unsigned int regno) +{ + return (regno == CRIS_FIRST_ARG_REG); +} + +static int +cris_arg_partial_bytes (cumulative_args_t ca, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + if (get_cumulative_args (ca)->regs == CRIS_MAX_ARGS_IN_REGS - 1 + && !targetm.calls.must_pass_in_stack (mode, type) + && CRIS_FUNCTION_ARG_SIZE (mode, type) > 4 + && CRIS_FUNCTION_ARG_SIZE (mode, type) <= 8) + return UNITS_PER_WORD; + else + return 0; +} + +static rtx +cris_function_arg_1 (cumulative_args_t ca_v, + enum machine_mode mode ATTRIBUTE_UNUSED, + const_tree type ATTRIBUTE_UNUSED, + bool named, bool incoming) +{ + const CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); + + if ((!incoming || named) && ca->regs < CRIS_MAX_ARGS_IN_REGS) + return gen_rtx_REG (mode, CRIS_FIRST_ARG_REG + ca->regs); + else + return NULL_RTX; +} + +/* Worker function for TARGET_FUNCTION_ARG. + The void_type_node is sent as a "closing" call. */ + +static rtx +cris_function_arg (cumulative_args_t ca, enum machine_mode mode, + const_tree type, bool named) +{ + return cris_function_arg_1 (ca, mode, type, named, false); +} + +/* Worker function for TARGET_FUNCTION_INCOMING_ARG. + + The differences between this and the previous, is that this one checks + that an argument is named, since incoming stdarg/varargs arguments are + pushed onto the stack, and we don't have to check against the "closing" + void_type_node TYPE parameter. */ + +static rtx +cris_function_incoming_arg (cumulative_args_t ca, enum machine_mode mode, + const_tree type, bool named) +{ + return cris_function_arg_1 (ca, mode, type, named, true); +} + +/* Worker function for TARGET_FUNCTION_ARG_ADVANCE. */ + +static void +cris_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); + + ca->regs += (3 + CRIS_FUNCTION_ARG_SIZE (mode, type)) / 4; +} + +/* Worker function for TARGET_MD_ASM_CLOBBERS. */ + +static tree +cris_md_asm_clobbers (tree outputs, tree inputs, tree in_clobbers) +{ + HARD_REG_SET mof_set; + tree clobbers; + tree t; + + CLEAR_HARD_REG_SET (mof_set); + SET_HARD_REG_BIT (mof_set, CRIS_MOF_REGNUM); + + /* For the time being, all asms clobber condition codes. Revisit when + there's a reasonable use for inputs/outputs that mention condition + codes. */ + clobbers + = tree_cons (NULL_TREE, + build_string (strlen (reg_names[CRIS_CC0_REGNUM]), + reg_names[CRIS_CC0_REGNUM]), + in_clobbers); + + for (t = outputs; t != NULL; t = TREE_CHAIN (t)) + { + tree val = TREE_VALUE (t); + + /* The constraint letter for the singleton register class of MOF + is 'h'. If it's mentioned in the constraints, the asm is + MOF-aware and adding it to the clobbers would cause it to have + impossible constraints. */ + if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))), + 'h') != NULL + || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE) + return clobbers; + } + + for (t = inputs; t != NULL; t = TREE_CHAIN (t)) + { + tree val = TREE_VALUE (t); + + if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))), + 'h') != NULL + || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE) + return clobbers; + } + + return tree_cons (NULL_TREE, + build_string (strlen (reg_names[CRIS_MOF_REGNUM]), + reg_names[CRIS_MOF_REGNUM]), + clobbers); +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. + + Really only needed if the stack frame has variable length (alloca + or variable sized local arguments (GNU C extension). See PR39499 and + PR38609 for the reason this isn't just 0. */ + +bool +cris_frame_pointer_required (void) +{ + return !crtl->sp_is_unchanging; +} + +/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE. + + This looks too complicated, and it is. I assigned r7 to be the + static chain register, but it is call-saved, so we have to save it, + and come back to restore it after the call, so we have to save srp... + Anyway, trampolines are rare enough that we can cope with this + somewhat lack of elegance. + (Do not be tempted to "straighten up" whitespace in the asms; the + assembler #NO_APP state mandates strict spacing). */ +/* ??? See the i386 regparm=3 implementation that pushes the static + chain value to the stack in the trampoline, and uses a call-saved + register when called directly. */ + +static void +cris_asm_trampoline_template (FILE *f) +{ + if (TARGET_V32) + { + /* This normally-unused nop insn acts as an instruction to + the simulator to flush its instruction cache. None of + the other instructions in the trampoline template suits + as a trigger for V32. The pc-relative addressing mode + works nicely as a trigger for V10. + FIXME: Have specific V32 template (possibly avoiding the + use of a special instruction). */ + fprintf (f, "\tclearf x\n"); + /* We have to use a register as an intermediate, choosing + semi-randomly R1 (which has to not be the STATIC_CHAIN_REGNUM), + so we can use it for address indirection and jsr target. */ + fprintf (f, "\tmove $r1,$mof\n"); + /* +4 */ + fprintf (f, "\tmove.d 0,$r1\n"); + fprintf (f, "\tmove.d $%s,[$r1]\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\taddq 6,$r1\n"); + fprintf (f, "\tmove $mof,[$r1]\n"); + fprintf (f, "\taddq 6,$r1\n"); + fprintf (f, "\tmove $srp,[$r1]\n"); + /* +20 */ + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + /* +26 */ + fprintf (f, "\tmove.d 0,$r1\n"); + fprintf (f, "\tjsr $r1\n"); + fprintf (f, "\tsetf\n"); + /* +36 */ + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + /* +42 */ + fprintf (f, "\tmove.d 0,$r1\n"); + /* +48 */ + fprintf (f, "\tmove.d 0,$r9\n"); + fprintf (f, "\tjump $r9\n"); + fprintf (f, "\tsetf\n"); + } + else + { + fprintf (f, "\tmove.d $%s,[$pc+20]\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\tmove $srp,[$pc+22]\n"); + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\tjsr 0\n"); + fprintf (f, "\tmove.d 0,$%s\n", reg_names[STATIC_CHAIN_REGNUM]); + fprintf (f, "\tjump 0\n"); + } +} + +/* Implement TARGET_TRAMPOLINE_INIT. */ + +static void +cris_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx tramp = XEXP (m_tramp, 0); + rtx mem; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + if (TARGET_V32) + { + mem = adjust_address (m_tramp, SImode, 6); + emit_move_insn (mem, plus_constant (Pmode, tramp, 38)); + mem = adjust_address (m_tramp, SImode, 22); + emit_move_insn (mem, chain_value); + mem = adjust_address (m_tramp, SImode, 28); + emit_move_insn (mem, fnaddr); + } + else + { + mem = adjust_address (m_tramp, SImode, 10); + emit_move_insn (mem, chain_value); + mem = adjust_address (m_tramp, SImode, 16); + emit_move_insn (mem, fnaddr); + } + + /* Note that there is no need to do anything with the cache for + sake of a trampoline. */ +} + + +#if 0 +/* Various small functions to replace macros. Only called from a + debugger. They might collide with gcc functions or system functions, + so only emit them when '#if 1' above. */ + +enum rtx_code Get_code (rtx); + +enum rtx_code +Get_code (rtx x) +{ + return GET_CODE (x); +} + +const char *Get_mode (rtx); + +const char * +Get_mode (rtx x) +{ + return GET_MODE_NAME (GET_MODE (x)); +} + +rtx Xexp (rtx, int); + +rtx +Xexp (rtx x, int n) +{ + return XEXP (x, n); +} + +rtx Xvecexp (rtx, int, int); + +rtx +Xvecexp (rtx x, int n, int m) +{ + return XVECEXP (x, n, m); +} + +int Get_rtx_len (rtx); + +int +Get_rtx_len (rtx x) +{ + return GET_RTX_LENGTH (GET_CODE (x)); +} + +/* Use upper-case to distinguish from local variables that are sometimes + called next_insn and prev_insn. */ + +rtx Next_insn (rtx); + +rtx +Next_insn (rtx insn) +{ + return NEXT_INSN (insn); +} + +rtx Prev_insn (rtx); + +rtx +Prev_insn (rtx insn) +{ + return PREV_INSN (insn); +} +#endif + +#include "gt-cris.h" + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc-4.9/gcc/config/cris/cris.h b/gcc-4.9/gcc/config/cris/cris.h new file mode 100644 index 000000000..37b562e5d --- /dev/null +++ b/gcc-4.9/gcc/config/cris/cris.h @@ -0,0 +1,1081 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Axis Communications. Written by Hans-Peter Nilsson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* After the first "Node:" comment comes all preprocessor directives and + attached declarations described in the info files, the "Using and + Porting GCC" manual (uapgcc), in the same order as found in the "Target + macros" section in the gcc-2.9x CVS edition of 2000-03-17. FIXME: Not + really, but needs an update anyway. + + There is no generic copy-of-uapgcc comment, you'll have to see uapgcc + for that. If applicable, there is a CRIS-specific comment. The order + of macro definitions follow the order in the manual. Every section in + the manual (node in the info pages) has an introductory `Node: + ' comment. If no macros are defined for a section, only + the section-comment is present. */ + +/* Note that other header files (e.g. config/elfos.h, config/linux.h, + and config/cris/linux.h) are responsible for lots of settings not + repeated below. This file contains general CRIS definitions + and definitions for the cris-*-elf subtarget. */ + +/* We don't want to use gcc_assert for everything, as that can be + compiled out. */ +#define CRIS_ASSERT(x) \ + do { if (!(x)) internal_error ("CRIS-port assertion failed: " #x); } while (0) + +/* Replacement for REG_P since it does not match SUBREGs. Happens for + testcase Axis-20000320 with gcc-2.9x. */ +#define REG_S_P(x) \ + (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (XEXP (x, 0)))) + +/* Last register in main register bank r0..r15. */ +#define CRIS_LAST_GENERAL_REGISTER 15 + +/* Descriptions of registers used for arguments. */ +#define CRIS_FIRST_ARG_REG 10 +#define CRIS_MAX_ARGS_IN_REGS 4 + +/* See also *_REGNUM constants in cris.md. */ + +/* Most of the time, we need the index into the register-names array. + When passing debug-info, we need the real hardware register number. */ +#define CRIS_CANONICAL_SRP_REGNUM (16 + 11) +#define CRIS_CANONICAL_MOF_REGNUM (16 + 7) +/* We have CCR in all models including v10, but that's 16 bits, so let's + prefer the DCCR number, which is a DMA pointer in pre-v8, so we'll + never clash with it for GCC purposes. */ +#define CRIS_CANONICAL_CC0_REGNUM (16 + 13) + +/* When generating PIC, these suffixes are added to the names of non-local + functions when being output. Contrary to other ports, we have offsets + relative to the GOT, not the PC. We might implement PC-relative PLT + semantics later for the general case; they are used in some cases right + now, such as MI thunks. */ +#define CRIS_GOTPLT_SUFFIX ":GOTPLT" +#define CRIS_PLT_GOTOFFSET_SUFFIX ":PLTG" +#define CRIS_PLT_PCOFFSET_SUFFIX ":PLT" + +#define CRIS_FUNCTION_ARG_SIZE(MODE, TYPE) \ + ((MODE) != BLKmode ? GET_MODE_SIZE (MODE) \ + : (unsigned) int_size_in_bytes (TYPE)) + +/* Which CPU version this is. The parsed and adjusted cris_cpu_str. */ +extern int cris_cpu_version; + +/* Changing the order used to be necessary to put the fourth __make_dp + argument (a DImode parameter) in registers, to fit with the libfunc + parameter passing scheme used for intrinsic functions. FIXME: Check + performance. */ +#ifdef IN_LIBGCC2 +#define __make_dp(a,b,c,d) __cris_make_dp(d,a,b,c) +#endif + + +/* Node: Driver */ + +/* Also provide canonical vN definitions when user specifies an alias. */ + +#define CPP_SPEC \ + "%{mtune=*:-D__tune_%* %{mtune=v*:-D__CRIS_arch_tune=%*}\ + %{mtune=etrax4:-D__tune_v3 -D__CRIS_arch_tune=3}\ + %{mtune=etrax100:-D__tune_v8 -D__CRIS_arch_tune=8}\ + %{mtune=svinto:-D__tune_v8 -D__CRIS_arch_tune=8}\ + %{mtune=etrax100lx:-D__tune_v10 -D__CRIS_arch_tune=10}\ + %{mtune=ng:-D__tune_v10 -D__CRIS_arch_tune=10}}\ + %{mcpu=*:-D__arch_%* %{mcpu=v*:-D__CRIS_arch_version=%*}\ + %{mcpu=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\ + %{mcpu=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{mcpu=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{mcpu=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\ + %{mcpu=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\ + %{march=*:-D__arch_%* %{march=v*:-D__CRIS_arch_version=%*}\ + %{march=etrax4:-D__arch_v3 -D__CRIS_arch_version=3}\ + %{march=etrax100:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{march=svinto:-D__arch_v8 -D__CRIS_arch_version=8}\ + %{march=etrax100lx:-D__arch_v10 -D__CRIS_arch_version=10}\ + %{march=ng:-D__arch_v10 -D__CRIS_arch_version=10}}\ + %{metrax100:-D__arch__v8 -D__CRIS_arch_version=8}\ + %{metrax4:-D__arch__v3 -D__CRIS_arch_version=3}\ + %(cpp_subtarget)" + +/* For the cris-*-elf subtarget. */ + +#define CRIS_DEFAULT_TUNE "10" +#define CRIS_ARCH_CPP_DEFAULT +#define CRIS_DEFAULT_ASM_ARCH_OPTION "" + +#ifdef TARGET_CPU_DEFAULT +#if TARGET_CPU_DEFAULT != 32 && TARGET_CPU_DEFAULT != 10 + #error "Due to '()'; e.g. '#define TARGET_CPU_DEFAULT (10)', stringize TARGET_CPU_DEFAULT isn't useful: update manually." +#endif + +#if TARGET_CPU_DEFAULT == 32 +#undef CRIS_DEFAULT_TUNE +#define CRIS_DEFAULT_TUNE "32" +/* To enable use of "generic" cris-axis-elf binutils, always pass the + architecture option to GAS. (We don't do this for non-v32.) */ +#undef CRIS_DEFAULT_ASM_ARCH_OPTION +#define CRIS_DEFAULT_ASM_ARCH_OPTION "--march=v32" +#endif + +#undef CRIS_ARCH_CPP_DEFAULT +#define CRIS_ARCH_CPP_DEFAULT \ + "%{!march=*:\ + %{!metrax*:\ + %{!mcpu=*:\ + %{!mtune=*:-D__tune_v" CRIS_DEFAULT_TUNE "}\ + -D__arch_v"CRIS_DEFAULT_TUNE\ + " -D__CRIS_arch_version=" CRIS_DEFAULT_TUNE "}}}" +#endif + +#define CRIS_CPP_SUBTARGET_SPEC \ + "%{mbest-lib-options:\ + %{!moverride-best-lib-options:\ + %{!march=*:%{!metrax*:%{!mcpu=*:\ + -D__tune_v" CRIS_DEFAULT_TUNE \ + " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}}}}"\ + CRIS_ARCH_CPP_DEFAULT + +/* Override previous definitions (../linux.h). */ +#undef CC1_SPEC +#define CC1_SPEC \ + "%{metrax4:-march=v3}\ + %{metrax100:-march=v8}\ + %{march=*:-march=%*}\ + %{mcpu=*:-mcpu=%*}\ + %(cc1_subtarget)" + +/* For the cris-*-elf subtarget. */ +#define CRIS_CC1_SUBTARGET_SPEC \ + "-melf\ + %{mbest-lib-options:\ + %{!moverride-best-lib-options:\ + %{!march=*:%{!mcpu=*:-mtune=v" CRIS_DEFAULT_TUNE\ + " -D__CRIS_arch_tune=" CRIS_DEFAULT_TUNE "}}\ + %{!finhibit-size-directive:\ + %{!fno-function-sections: -ffunction-sections}\ + %{!fno-data-sections: -fdata-sections}}}}" + +/* This adds to CC1_SPEC. */ +#define CC1PLUS_SPEC "" + +#ifdef HAVE_AS_NO_MUL_BUG_ABORT_OPTION +#define MAYBE_AS_NO_MUL_BUG_ABORT \ + "%{mno-mul-bug-workaround:-no-mul-bug-abort} " +#else +#define MAYBE_AS_NO_MUL_BUG_ABORT +#endif + +/* Override previous definitions (../linux.h). */ +#undef ASM_SPEC +#define ASM_SPEC \ + MAYBE_AS_NO_MUL_BUG_ABORT \ + "%(asm_subtarget)\ + %{march=*:%{mcpu=*:%edo not specify both -march=... and -mcpu=...}}\ + %{march=v0|mcpu=v0|march=v3|mcpu=v3|march=v8|mcpu=v8:--march=v0_v10}\ + %{march=v10|mcpu=v10:--march=v10}\ + %{march=v32|mcpu=v32:--march=v32}" + +/* For the cris-*-elf subtarget. */ +#define CRIS_ASM_SUBTARGET_SPEC \ + "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}" + +/* FIXME: We should propagate the -melf option to make the criself + "emulation" unless a linker script is provided (-T*), but I don't know + how to do that if either of -Ttext, -Tdata or -Tbss is given but no + linker script, as is usually the case. Leave it to the user for the + time being. */ +#undef LINK_SPEC +#define LINK_SPEC \ + "%{v:--verbose}\ + %(link_subtarget)" + +/* For the cris-*-elf subtarget. */ +#define CRIS_LINK_SUBTARGET_SPEC \ + "-mcriself\ + %{sim2:%{!T*:-Tdata 0x4000000 -Tbss 0x8000000}}\ + %{!r:%{O2|O3: --gc-sections}}" + +/* Which library to get. The simulator uses a different library for + the low-level syscalls (implementing the Linux syscall ABI instead + of direct-iron accesses). Default everything with the stub "nosys" + library. */ +/* Override previous definitions (linux.h). */ +#undef LIB_SPEC +#define LIB_SPEC \ + "%{sim*:--start-group -lc -lsyslinux --end-group}\ + %{!sim*:%{g*:-lg}\ + %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p} -lbsp}\ + -lnosys" + +/* Linker startfile options; crt0 flavors. + We need to remove any previous definition (elfos.h). */ +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{sim*:crt1.o%s}%{!sim*:crt0.o%s}\ + crti.o%s crtbegin.o%s" + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#define EXTRA_SPECS \ + {"cpp_subtarget", CRIS_CPP_SUBTARGET_SPEC}, \ + {"cc1_subtarget", CRIS_CC1_SUBTARGET_SPEC}, \ + {"asm_subtarget", CRIS_ASM_SUBTARGET_SPEC}, \ + {"link_subtarget", CRIS_LINK_SUBTARGET_SPEC}, \ + CRIS_SUBTARGET_EXTRA_SPECS + +#define CRIS_SUBTARGET_EXTRA_SPECS + + +/* Node: Run-time Target */ + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("cris"); \ + builtin_define_std ("CRIS"); \ + builtin_define_std ("GNU_CRIS"); \ + builtin_define ("__CRIS_ABI_version=2"); \ + builtin_assert ("cpu=cris"); \ + builtin_assert ("machine=cris"); \ + } \ + while (0) + +/* Previously controlled by target_flags. Note that this is *not* set + for -melinux. */ +#define TARGET_LINUX 0 + +/* For the cris-*-elf subtarget. */ +#define CRIS_SUBTARGET_DEFAULT 0 + +#define CRIS_CPU_BASE 0 +#define CRIS_CPU_ETRAX4 3 /* Just lz added. */ +#define CRIS_CPU_SVINTO 8 /* Added swap, jsrc & Co., 32-bit accesses. */ +#define CRIS_CPU_NG 10 /* Added mul[su]. */ +#define CRIS_CPU_V32 32 /* Major changes. */ + +#ifndef TARGET_CPU_DEFAULT +#define TARGET_CPU_DEFAULT CRIS_CPU_BASE +#endif + +/* Default target_flags if no switches specified. + The alignment-by-32 is to make builtin atomic support for v10 and v32 + work for *-elf for types without specified alignment (like plain + "int"). See top comment in sync.md. */ +#ifndef TARGET_DEFAULT +# if TARGET_CPU_DEFAULT == 32 +# define TARGET_DEFAULT \ + (MASK_STACK_ALIGN \ + + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ + + MASK_ALIGN_BY_32 \ + + MASK_PROLOGUE_EPILOGUE) +# elif TARGET_CPU_DEFAULT == 10 +# define TARGET_DEFAULT \ + (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \ + + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ + + MASK_ALIGN_BY_32 \ + + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG) +# else /* 0 */ +# define TARGET_DEFAULT \ + (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \ + + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ + + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG) +# endif +#endif + +/* Local, providing a default for cris_cpu_version. */ +#define CRIS_DEFAULT_CPU_VERSION TARGET_CPU_DEFAULT + +#define TARGET_HAS_MUL_INSNS (cris_cpu_version >= CRIS_CPU_NG) +#define TARGET_HAS_LZ (cris_cpu_version >= CRIS_CPU_ETRAX4) +#define TARGET_HAS_BREAK (cris_cpu_version >= CRIS_CPU_ETRAX4) +#define TARGET_HAS_SWAP (cris_cpu_version >= CRIS_CPU_SVINTO) +#define TARGET_V32 (cris_cpu_version >= CRIS_CPU_V32) + +/* The "break" instruction was introduced with ETRAX 4. */ +#define TARGET_TRAP_USING_BREAK8 \ + (cris_trap_using_break8 == 2 ? TARGET_HAS_BREAK : cris_trap_using_break8) + +/* Call library functions by default for GNU/Linux. */ +#define TARGET_ATOMICS_MAY_CALL_LIBFUNCS \ + (cris_atomics_calling_libfunc == 2 \ + ? TARGET_LINUX : cris_atomics_calling_libfunc) + +/* The < v10 atomics turn off interrupts, so they don't need alignment. + Incidentally, by default alignment is off there causing variables to + be default unaligned all over, so we'd have to make support + libraries use a proper atomic type (instead of "int"), one we'd + specify as aligned. */ +#define TARGET_TRAP_UNALIGNED_ATOMIC \ + (cris_trap_unaligned_atomic == 2 \ + ? (TARGET_V32 || cris_cpu_version == 10) \ + : cris_trap_unaligned_atomic) + +/* Node: Storage Layout */ + +#define BITS_BIG_ENDIAN 0 + +#define BYTES_BIG_ENDIAN 0 + +/* WORDS_BIG_ENDIAN is not defined in the hardware, but for consistency, + we use little-endianness, and we may also be able to use + post-increment on DImode indirect. */ +#define WORDS_BIG_ENDIAN 0 + +#define UNITS_PER_WORD 4 + +#define CRIS_PROMOTED_MODE(MODE, UNSIGNEDP, TYPE) \ + (GET_MODE_CLASS (MODE) == MODE_INT && GET_MODE_SIZE (MODE) < 4) \ + ? SImode : MODE + +/* We will be using prototype promotion, so they will be 32 bit. */ +#define PARM_BOUNDARY 32 + +/* Stack boundary is guided by -mstack-align, -mno-stack-align, + -malign. + Old comment: (2.1: still valid in 2.7.2?) + Note that to make this macro affect the alignment of stack + locals, a fix was required, and special precautions when handling + the stack pointer in various other macros (TARGET_ASM_FUNCTION_PROLOGUE + et al) were required. See file "function.c". If you would just define + this macro, it would only affect the builtin alloca and variable + local data (non-ANSI, non-K&R, Gnu C extension). */ +#define STACK_BOUNDARY \ + (TARGET_STACK_ALIGN ? (TARGET_ALIGN_BY_32 ? 32 : 16) : 8) + +#define FUNCTION_BOUNDARY 16 + +/* Do not change BIGGEST_ALIGNMENT (when optimizing), as it will affect + strange places, at least in 2.1. */ +#define BIGGEST_ALIGNMENT 8 + +/* If -m16bit, -m16-bit, -malign or -mdata-align, + align everything to 16 bit. */ +#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \ + (TARGET_DATA_ALIGN \ + ? (TARGET_ALIGN_BY_32 \ + ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN) \ + : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN) + +/* Note that CONSTANT_ALIGNMENT has the effect of making gcc believe that + ALL references to constant stuff (in code segment, like strings) has + this alignment. That is a rather rushed assumption. Luckily we do not + care about the "alignment" operand to builtin memcpy (only place where + it counts), so it doesn't affect any bad spots. */ +#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \ + (TARGET_CONST_ALIGN \ + ? (TARGET_ALIGN_BY_32 \ + ? (BASIC_ALIGN < 32 ? 32 : BASIC_ALIGN) \ + : (BASIC_ALIGN < 16 ? 16 : BASIC_ALIGN)) : BASIC_ALIGN) + +/* FIXME: Define LOCAL_ALIGNMENT for word and dword or arrays and + structures (if -mstack-align=), and check that it is good. */ + +#define EMPTY_FIELD_BOUNDARY 8 + +#define STRUCTURE_SIZE_BOUNDARY 8 + +#define STRICT_ALIGNMENT 0 + +/* Remove any previous definition (elfos.h). + ??? If it wasn't for all the other stuff that affects layout of + structures and bit-fields, this could presumably cause incompatibility + with other GNU/Linux ports (i.e. elfos.h users). */ +#undef PCC_BITFIELD_TYPE_MATTERS + +/* This is only used for non-scalars. Strange stuff happens to structs + (FIXME: What?) if we use anything larger than largest actually used + datum size, so lets make it 32. The type "long long" will still work + as usual. We can still have DImode insns, but they will only be used + for scalar data (i.e. long long). */ +#define MAX_FIXED_MODE_SIZE 32 + + +/* Node: Type Layout */ + +/* Note that DOUBLE_TYPE_SIZE is not defined anymore, since the default + value gives a 64-bit double, which is what we now use. */ + +/* For compatibility and historical reasons, a char should be signed. */ +#define DEFAULT_SIGNED_CHAR 1 + +/* Note that WCHAR_TYPE_SIZE is used in cexp.y, + where TARGET_SHORT is not available. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + + +/* Node: Register Basics */ + +/* We count all 16 non-special registers, SRP, a faked argument + pointer register, MOF and CCR/DCCR. */ +#define FIRST_PSEUDO_REGISTER (16 + 1 + 1 + 1 + 1) + +/* For CRIS, these are r15 (pc) and r14 (sp). Register r8 is used as a + frame-pointer, but is not fixed. SRP is not included in general + registers and will not be used automatically. All other special + registers are fixed at the moment. The faked argument pointer register + is fixed too. */ +#define FIXED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1} + +/* Register r9 is used for structure-address, r10-r13 for parameters, + r10- for return values. */ +#define CALL_USED_REGISTERS \ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1} + +/* Node: Allocation Order */ + +/* We need this on CRIS, because call-used regs should be used first, + (so we don't need to push). Else start using registers from r0 and up. + This preference is mainly because if we put call-used-regs from r0 + and up, then we can't use movem to push the rest, (which have to be + saved if we use them, and movem has to start with r0). + Change here if you change which registers to use as call registers. + + The actual need to explicitly prefer call-used registers improved the + situation a lot for 2.1, but might not actually be needed anymore. + Still, this order reflects what GCC should find out by itself, so it + probably does not hurt. + + Order of preference: Call-used-regs first, then r0 and up, last fp & + sp & pc as fillers. + Call-used regs in opposite order, so they will cause less conflict if + a function has few args (<= 3) and it wants a scratch reg. + Use struct-return address first, since very few functions use + structure return values so it is likely to be available. */ +#define REG_ALLOC_ORDER \ + {9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 14, 15, 17, 16, 18, 19} + +/* Use MOF and ACR. Prefer ACR before any other register. Prefer MOF + then SRP after saved registers. The *after* is because they're only + useful for storage, not for things being computed, which is + apparently more common. */ +#define REG_ALLOC_ORDER_V32 \ + {15, 9, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 17, 16, 14, 18, 19} + + +/* Node: Values in Registers */ + +/* The VOIDmode test is so we can omit mode on anonymous insns. FIXME: + Still needed in 2.9x, at least for Axis-20000319. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ + (MODE == VOIDmode \ + ? 1 : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + +/* CRIS permits all registers to hold all modes. Well, except for the + condition-code register. And we can't hold larger-than-register size + modes in the last special register that can hold a full 32 bits. */ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (((MODE) == CCmode \ + || (REGNO) != CRIS_CC0_REGNUM) \ + && (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD \ + || ((REGNO) != CRIS_MOF_REGNUM && (REGNO) != CRIS_ACR_REGNUM))) + +/* Because CCmode isn't covered by the "narrower mode" statement in + tm.texi, we can still say all modes are tieable despite not having an + always 1 HARD_REGNO_MODE_OK. */ +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + + +/* Node: Leaf Functions */ +/* (no definitions) */ + +/* Node: Stack Registers */ +/* (no definitions) */ + + +/* Node: Register Classes */ + +/* We need a separate register class to handle register allocation for + ACR, since it can't be used for post-increment. + + It's not obvious, but having subunions of all movable-between + register classes does really help register allocation (pre-IRA + comment). */ +enum reg_class + { + NO_REGS, + ACR_REGS, MOF_REGS, SRP_REGS, CC0_REGS, + MOF_SRP_REGS, SPECIAL_REGS, + SPEC_ACR_REGS, GENNONACR_REGS, + SPEC_GENNONACR_REGS, GENERAL_REGS, + ALL_REGS, + LIM_REG_CLASSES + }; + +#define N_REG_CLASSES (int) LIM_REG_CLASSES + +#define REG_CLASS_NAMES \ + {"NO_REGS", \ + "ACR_REGS", "MOF_REGS", "SRP_REGS", "CC0_REGS", \ + "MOF_SRP_REGS", "SPECIAL_REGS", \ + "SPEC_ACR_REGS", "GENNONACR_REGS", "SPEC_GENNONACR_REGS", \ + "GENERAL_REGS", "ALL_REGS"} + +#define CRIS_SPECIAL_REGS_CONTENTS \ + ((1 << CRIS_SRP_REGNUM) | (1 << CRIS_MOF_REGNUM) | (1 << CRIS_CC0_REGNUM)) + +/* Count in the faked argument register in GENERAL_REGS. Keep out SRP. */ +#define REG_CLASS_CONTENTS \ + { \ + {0}, \ + {1 << CRIS_ACR_REGNUM}, \ + {1 << CRIS_MOF_REGNUM}, \ + {1 << CRIS_SRP_REGNUM}, \ + {1 << CRIS_CC0_REGNUM}, \ + {(1 << CRIS_MOF_REGNUM) \ + | (1 << CRIS_SRP_REGNUM)}, \ + {CRIS_SPECIAL_REGS_CONTENTS}, \ + {CRIS_SPECIAL_REGS_CONTENTS \ + | (1 << CRIS_ACR_REGNUM)}, \ + {(0xffff | (1 << CRIS_AP_REGNUM)) \ + & ~(1 << CRIS_ACR_REGNUM)}, \ + {(0xffff | (1 << CRIS_AP_REGNUM) \ + | CRIS_SPECIAL_REGS_CONTENTS) \ + & ~(1 << CRIS_ACR_REGNUM)}, \ + {0xffff | (1 << CRIS_AP_REGNUM)}, \ + {0xffff | (1 << CRIS_AP_REGNUM) \ + | CRIS_SPECIAL_REGS_CONTENTS} \ + } + +#define REGNO_REG_CLASS(REGNO) \ + ((REGNO) == CRIS_ACR_REGNUM ? ACR_REGS : \ + (REGNO) == CRIS_MOF_REGNUM ? MOF_REGS : \ + (REGNO) == CRIS_SRP_REGNUM ? SRP_REGS : \ + (REGNO) == CRIS_CC0_REGNUM ? CC0_REGS : \ + GENERAL_REGS) + +#define BASE_REG_CLASS GENERAL_REGS + +#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OCODE, ICODE) \ + ((OCODE) != POST_INC ? BASE_REG_CLASS : GENNONACR_REGS) + +#define INDEX_REG_CLASS GENERAL_REGS + +/* Since it uses reg_renumber, it is safe only once reg_renumber + has been allocated, which happens in reginfo.c during register + allocation. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ + ((REGNO) <= CRIS_LAST_GENERAL_REGISTER \ + || (REGNO) == ARG_POINTER_REGNUM \ + || (unsigned) reg_renumber[REGNO] <= CRIS_LAST_GENERAL_REGISTER \ + || (unsigned) reg_renumber[REGNO] == ARG_POINTER_REGNUM) + +/* REGNO_OK_FOR_BASE_P seems to be obsolete wrt. this one, but not yet + documented as such. */ +#define REGNO_MODE_CODE_OK_FOR_BASE_P(REGNO, MODE, AS, OCODE, ICODE) \ + (REGNO_OK_FOR_BASE_P (REGNO) \ + && ((OCODE) != POST_INC \ + || !((REGNO) == CRIS_ACR_REGNUM \ + || (unsigned) reg_renumber[REGNO] == CRIS_ACR_REGNUM))) + +/* See REGNO_OK_FOR_BASE_P. */ +#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO) + +/* We can't move special registers to and from memory in smaller than + word_mode. We also can't move between special registers. Luckily, + -1, as returned by true_regnum for non-sub/registers, is valid as a + parameter to our REGNO_REG_CLASS, returning GENERAL_REGS, so we get + the effect that any X that isn't a special-register is treated as + a non-empty intersection with GENERAL_REGS. */ +#define SECONDARY_RELOAD_CLASS(CLASS, MODE, X) \ + ((reg_class_subset_p (CLASS, SPECIAL_REGS) \ + && ((GET_MODE_SIZE (MODE) < 4 && MEM_P (X)) \ + || !reg_classes_intersect_p (REGNO_REG_CLASS (true_regnum (X)), \ + GENERAL_REGS))) \ + ? GENERAL_REGS : NO_REGS) + +/* FIXME: Fix regrename.c; it should check validity of replacements, + not just with a silly pass-specific macro. We may miss some + opportunities, but we must stop regrename from creating acr++. */ +#define HARD_REGNO_RENAME_OK(FROM, TO) ((TO) != CRIS_ACR_REGNUM) + +/* For CRIS, this is always the size of MODE in words, + since all registers are the same size. To use omitted modes in + patterns with reload constraints, you must say the widest size + which is allowed for VOIDmode. + FIXME: Does that still apply for gcc-2.9x? Keep poisoned until such + patterns are added back. News: 2001-03-16: Happens as early as the + underscore-test. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ + ((MODE) == VOIDmode \ + ? 1 /* + cris_fatal ("CLASS_MAX_NREGS with VOIDmode") */ \ + : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) + + +/* Node: Frame Layout */ + +#define STACK_GROWS_DOWNWARD +#define FRAME_GROWS_DOWNWARD 1 + +/* It seems to be indicated in the code (at least 2.1) that this is + better a constant, and best 0. */ +#define STARTING_FRAME_OFFSET 0 + +#define FIRST_PARM_OFFSET(FNDECL) 0 + +#define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \ + cris_return_addr_rtx (COUNT, FRAMEADDR) + +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, CRIS_SRP_REGNUM) + +/* FIXME: Any __builtin_eh_return callers must not return anything and + there must not be collisions with incoming parameters. Luckily the + number of __builtin_eh_return callers is limited. For now return + parameter registers in reverse order and hope for the best. */ +#define EH_RETURN_DATA_REGNO(N) \ + (IN_RANGE ((N), 0, 3) ? (CRIS_FIRST_ARG_REG + 3 - (N)) : INVALID_REGNUM) + +/* Store the stack adjustment in the structure-return-address register. */ +#define CRIS_STACKADJ_REG CRIS_STRUCT_VALUE_REGNUM +#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (SImode, CRIS_STACKADJ_REG) + +#define EH_RETURN_HANDLER_RTX \ + cris_return_addr_rtx (0, NULL) + +#define INIT_EXPANDERS cris_init_expanders () + +/* FIXME: Move this to right node (it's not documented properly yet). */ +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (CRIS_SRP_REGNUM) + +/* FIXME: Move this to right node (it's not documented properly yet). + FIXME: Check what alignment we can assume regarding + TARGET_STACK_ALIGN and TARGET_ALIGN_BY_32. */ +#define DWARF_CIE_DATA_ALIGNMENT -1 + +/* If we would ever need an exact mapping between canonical register + number and dwarf frame register, we would either need to include all + registers in the gcc description (with some marked fixed of course), or + an inverse mapping from dwarf register to gcc register. There is one + need in dwarf2out.c:expand_builtin_init_dwarf_reg_sizes. Right now, I + don't see that we need exact correspondence between DWARF *frame* + registers and DBX_REGISTER_NUMBER, so map them onto GCC registers. */ +#define DWARF_FRAME_REGNUM(REG) (REG) + +/* Node: Stack Checking */ +/* (no definitions) FIXME: Check. */ + +/* Node: Frame Registers */ + +#define STACK_POINTER_REGNUM CRIS_SP_REGNUM + +/* Register used for frame pointer. This is also the last of the saved + registers, when a frame pointer is not used. */ +#define FRAME_POINTER_REGNUM CRIS_FP_REGNUM + +/* Faked register, is always eliminated. We need it to eliminate + allocating stack slots for the return address and the frame pointer. */ +#define ARG_POINTER_REGNUM CRIS_AP_REGNUM + +#define STATIC_CHAIN_REGNUM CRIS_STATIC_CHAIN_REGNUM + + +/* Node: Elimination */ + +#define ELIMINABLE_REGS \ + {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = cris_initial_elimination_offset (FROM, TO) + + +/* Node: Stack Arguments */ + +/* Since many parameters take up one register each in any case, + defining TARGET_PROMOTE_PROTOTYPES that always returns true would + seem like a good idea, but measurements indicate that a combination + using PROMOTE_MODE is better. */ + +#define ACCUMULATE_OUTGOING_ARGS 1 + + +/* Node: Register Arguments */ + +/* Contrary to what you'd believe, defining FUNCTION_ARG_CALLEE_COPIES + seems like a (small total) loss, at least for gcc-2.7.2 compiling and + running gcc-2.1 (small win in size, small loss running -- 100.1%), + and similarly for size for products (.1 .. .3% bloat, sometimes win). + Due to the empirical likeliness of making slower code, it is not + defined. */ + +/* This no longer *needs* to be a structure; but keeping it as such should + not hurt (and hacking the ABI is simpler). */ +#define CUMULATIVE_ARGS struct cum_args +struct cum_args {int regs;}; + +/* The regs member is an integer, the number of arguments got into + registers so far. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ + ((CUM).regs = 0) + +#define FUNCTION_ARG_REGNO_P(REGNO) \ + ((REGNO) >= CRIS_FIRST_ARG_REG \ + && (REGNO) < CRIS_FIRST_ARG_REG + (CRIS_MAX_ARGS_IN_REGS)) + + +/* Node: Aggregate Return */ + +#define CRIS_STRUCT_VALUE_REGNUM ((CRIS_FIRST_ARG_REG) - 1) + + +/* Node: Caller Saves */ +/* (no definitions) */ + +/* Node: Function entry */ + +/* See cris.c for TARGET_ASM_FUNCTION_PROLOGUE and + TARGET_ASM_FUNCTION_EPILOGUE. */ + +/* Node: Profiling */ + +#define FUNCTION_PROFILER(FILE, LABELNO) \ + error ("no FUNCTION_PROFILER for CRIS") + +/* FIXME: Some of the undefined macros might be mandatory. If so, fix + documentation. */ + + +/* Node: Trampolines */ + +#define TRAMPOLINE_SIZE (TARGET_V32 ? 58 : 32) + +/* CRIS wants instructions on word-boundary. */ +#define TRAMPOLINE_ALIGNMENT 16 + +/* Node: Library Calls */ + +/* If you change this, you have to check whatever libraries and systems + that use it. */ +#define TARGET_EDOM 33 + + +/* Node: Addressing Modes */ + +#define HAVE_POST_INCREMENT 1 + +#define CONSTANT_ADDRESS_P(X) \ + (CONSTANT_P (X) && cris_legitimate_address_p (QImode, X, false)) + +/* Must be a compile-time constant, so we go with the highest value + among all CRIS variants. */ +#define MAX_REGS_PER_ADDRESS 2 + +/* Fix reloads known to cause suboptimal spilling. */ +#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, INDL, WIN) \ + do \ + { \ + if (cris_reload_address_legitimized (X, MODE, OPNUM, TYPE, INDL)) \ + goto WIN; \ + } \ + while (0) + + +/* Node: Condition Code */ + +#define NOTICE_UPDATE_CC(EXP, INSN) cris_notice_update_cc (EXP, INSN) + +/* FIXME: Maybe define CANONICALIZE_COMPARISON later, when playing with + optimizations. It is needed; currently we do this with instruction + patterns and NOTICE_UPDATE_CC. */ + + +/* Node: Costs */ + +/* Regardless of the presence of delay slots, the default value of 1 for + BRANCH_COST is the best in the range (1, 2, 3), tested with gcc-2.7.2 + with testcases ipps and gcc, giving smallest and fastest code. */ + +#define SLOW_BYTE_ACCESS 0 + +/* This is the threshold *below* which inline move sequences of + word-length sizes will be emitted. The "9" will translate to + (9 - 1) * 4 = 32 bytes maximum moved, but using 16 instructions + (8 instruction sequences) or less. */ +#define MOVE_RATIO(speed) 9 + + +/* Node: Sections */ + +#define TEXT_SECTION_ASM_OP "\t.text" + +#define DATA_SECTION_ASM_OP "\t.data" + +/* The jump table is immediately connected to the preceding insn. */ +#define JUMP_TABLES_IN_TEXT_SECTION 1 + + +/* Node: PIC */ + +/* Helper type. */ + +enum cris_pic_symbol_type + { + cris_no_symbol = 0, + cris_got_symbol = 1, + cris_rel_symbol = 2, + cris_got_symbol_needing_fixup = 3, + cris_invalid_pic_symbol = 4 + }; + +#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? CRIS_GOT_REGNUM : INVALID_REGNUM) + +#define LEGITIMATE_PIC_OPERAND_P(X) cris_legitimate_pic_operand (X) + + +/* Node: File Framework */ + +/* We don't want an .ident for gcc. To avoid that but still support + #ident, we override TARGET_ASM_OUTPUT_IDENT and, since the gcc .ident + is its only use besides front-end .ident directives, we return if + the state if the cgraph is not CGRAPH_STATE_PARSING. */ +#undef TARGET_ASM_OUTPUT_IDENT +#define TARGET_ASM_OUTPUT_IDENT cris_asm_output_ident + +#define ASM_APP_ON "#APP\n" + +#define ASM_APP_OFF "#NO_APP\n" + + +/* Node: Data Output */ + +#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) (C) == '@' + +/* Node: Uninitialized Data */ + +/* Remember to round off odd values if we want data alignment, + since we cannot do that with an .align directive. + + Using .comm causes the space not to be reserved in .bss, but by + tricks with the symbol type. Not good if other tools than binutils + are used on the object files. Since ".global ... .lcomm ..." works, we + use that. Use .._ALIGNED_COMMON, since gcc whines when we only have + ..._COMMON, and we prefer to whine ourselves; BIGGEST_ALIGNMENT is not + the one to check. */ +/* FIXME: I suspect a bug in gcc with alignment. Do not warn until + investigated; it mucks up the testsuite results. */ +#define CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, LOCAL) \ + do \ + { \ + int align_ = (ALIGN) / BITS_PER_UNIT; \ + if (TARGET_DATA_ALIGN && TARGET_ALIGN_BY_32 && align_ < 4) \ + align_ = 4; \ + else if (TARGET_DATA_ALIGN && align_ < 2) \ + align_ = 2; \ + /* FIXME: Do we need this? */ \ + else if (align_ < 1) \ + align_ = 1; \ + \ + if (LOCAL) \ + { \ + fprintf ((FILE), "%s", LOCAL_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), "\n"); \ + } \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ",%u,%u\n", (int)(SIZE), align_); \ + } \ + while (0) + +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 0) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + CRIS_ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN, 1) + +/* Node: Label Output */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global " + +#define SUPPORTS_WEAK 1 + +#define ASM_OUTPUT_SYMBOL_REF(STREAM, SYM) \ + cris_asm_output_symbol_ref (STREAM, SYM) + +#define ASM_OUTPUT_LABEL_REF(STREAM, BUF) \ + cris_asm_output_label_ref (STREAM, BUF) + +/* Remove any previous definition (elfos.h). */ +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + sprintf (LABEL, "*%s%s%ld", LOCAL_LABEL_PREFIX, PREFIX, (long) NUM) + +/* Node: Initialization */ +/* (no definitions) */ + +/* Node: Macros for Initialization */ +/* (no definitions) */ + +/* Node: Instruction Output */ + +#define REGISTER_NAMES \ + {"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", \ + "r9", "r10", "r11", "r12", "r13", "sp", "acr", "srp", "mof", "faked_ap", "dccr"} + +#define ADDITIONAL_REGISTER_NAMES \ + {{"r14", 14}, {"r15", 15}, {"pc", 15}} + +/* Output an empty line to illustrate the presence of the delay slot. */ +#define DBR_OUTPUT_SEQEND(FILE) \ + fprintf (FILE, "\n") + +#define LOCAL_LABEL_PREFIX "." + +/* cppinit.c initializes a const array from this, so it must be constant, + can't have it different based on options. Luckily, the prefix is + always allowed, so let's have it on all GCC-generated code. Note that + we have this verbatim everywhere in the back-end, not using %R or %s or + such. */ +#define REGISTER_PREFIX "$" + +/* Remove any previous definition (elfos.h). */ +/* We use -fno-leading-underscore to remove it, when necessary. */ +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \ + fprintf (FILE, \ + TARGET_V32 \ + ? "\tsubq 4,$sp\n\tmove $%s,[$sp]\n" : "\tpush $%s\n", \ + reg_names[REGNO]) + +#define ASM_OUTPUT_REG_POP(FILE, REGNO) \ + fprintf (FILE, "\tmove [$sp+],$%s\n", reg_names[REGNO]) + + +/* Node: Dispatch Tables */ + +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ + do \ + { \ + if (TARGET_V32) \ + asm_fprintf (FILE, "\t.word %LL%d-.\n", VALUE); \ + else \ + asm_fprintf (FILE, "\t.word %LL%d-%LL%d\n", VALUE, REL); \ + } \ + while (0) + +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ + asm_fprintf (FILE, "\t.dword %LL%d\n", VALUE) + +/* Defined to also emit an .align in elfos.h. We don't want that. */ +#undef ASM_OUTPUT_CASE_LABEL + +/* Since the "bound" insn loads the comparison value if the compared< + value (register) is out of bounds (0..comparison value-1), we need + to output another case to catch it. + The way to find it is to look for the label_ref at the else-arm inside + the expanded casesi core-insn. + FIXME: Check this construct when changing to new version of gcc. */ +#define ASM_OUTPUT_CASE_END(STREAM, NUM, TABLE) \ + cris_asm_output_case_end (STREAM, NUM, TABLE) + + +/* Node: Exception Region Output */ +/* (no definitions) */ +/* FIXME: Fill in with our own optimized layout. */ + +/* Node: Alignment Output */ + +#define ASM_OUTPUT_ALIGN(FILE, LOG) \ + fprintf (FILE, "\t.align %d\n", (LOG)) + + +/* Node: All Debuggers */ + +#define DBX_REGISTER_NUMBER(REGNO) \ + ((REGNO) == CRIS_SRP_REGNUM ? CRIS_CANONICAL_SRP_REGNUM : \ + (REGNO) == CRIS_MOF_REGNUM ? CRIS_CANONICAL_MOF_REGNUM : \ + (REGNO) == CRIS_CC0_REGNUM ? CRIS_CANONICAL_CC0_REGNUM : \ + (REGNO)) + +/* FIXME: Investigate DEBUGGER_AUTO_OFFSET, DEBUGGER_ARG_OFFSET. */ + + +/* Node: DBX Options */ + +/* Is this correct? Check later. */ +#define DBX_NO_XREFS + +#define DBX_CONTIN_LENGTH 0 + +/* FIXME: Is this needed when we have 0 DBX_CONTIN_LENGTH? */ +#define DBX_CONTIN_CHAR '?' + + +/* Node: DBX Hooks */ +/* (no definitions) */ + +/* Node: File names and DBX */ +/* (no definitions) */ + + +/* Node: SDB and DWARF */ +/* (no definitions) */ + +/* Node: Misc */ + +/* A combination of the bound (umin) insn together with a + sign-extended add via the table to PC seems optimal. + If the table overflows, the assembler will take care of it. + Theoretically, in extreme cases (uncertain if they occur), an error + will be emitted, so FIXME: Check how large case-tables are emitted, + possible add an option to emit SImode case-tables. */ +#define CASE_VECTOR_MODE HImode + +#define CASE_VECTOR_PC_RELATIVE 1 + +/* FIXME: Investigate CASE_VECTOR_SHORTEN_MODE to make sure HImode is not + used when broken-.word could possibly fail (plus testcase). */ + +/* This is the number of bytes that can be moved in one + reasonably fast instruction sequence. For CRIS, this is two + instructions: mem => reg, reg => mem. */ +#define MOVE_MAX 4 + +/* Maybe SHIFT_COUNT_TRUNCATED is safe to define? FIXME: Check later. */ + +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1) + +#define Pmode SImode + +#define FUNCTION_MODE QImode + +#define NO_IMPLICIT_EXTERN_C + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc-4.9/gcc/config/cris/cris.md b/gcc-4.9/gcc/config/cris/cris.md new file mode 100644 index 000000000..47f64512a --- /dev/null +++ b/gcc-4.9/gcc/config/cris/cris.md @@ -0,0 +1,5157 @@ +;; GCC machine description for CRIS cpu cores. +;; Copyright (C) 1998-2014 Free Software Foundation, Inc. +;; Contributed by Axis Communications. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The original PO technology requires these to be ordered by speed, +;; so that assigner will pick the fastest. + +;; See files "md.texi" and "rtl.def" for documentation on define_insn, +;; match_*, et. al. +;; +;; The function cris_notice_update_cc in cris.c handles condition code +;; updates for most instructions, helped by the "cc" attribute. + +;; There are several instructions that are orthogonal in size, and seems +;; they could be matched by a single pattern without a specified size +;; for the operand that is orthogonal. However, this did not work on +;; gcc-2.7.2 (and probably not on gcc-2.8.1), relating to that when a +;; constant is substituted into an operand, the actual mode must be +;; deduced from the pattern. There is reasonable hope that that has been +;; fixed, so FIXME: try again. + +;; You will notice that three-operand alternatives ("=r", "r", "!To") +;; are marked with a "!" constraint modifier to avoid being reloaded +;; into. This is because gcc would otherwise prefer to use the constant +;; pool and its offsettable address instead of reloading to an +;; ("=r", "0", "i") alternative. Also, the constant-pool support was not +;; only suboptimal but also buggy in 2.7.2, ??? maybe only in 2.6.3. + +;; All insns that look like (set (...) (plus (...) (reg:SI 8))) +;; get problems when reloading r8 (frame pointer) to r14 + offs (stack +;; pointer). Thus the instructions that get into trouble have specific +;; checks against matching frame_pointer_rtx. +;; ??? But it should be re-checked for gcc > 2.7.2 +;; FIXME: This changed some time ago (from 2000-03-16) for gcc-2.9x. + +;; FIXME: When PIC, all [rX=rY+S] could be enabled to match +;; [rX=gotless_symbol]. +;; The movsi for a gotless symbol could be split (post reload). + + +(define_c_enum "" + [ + ;; PLT reference from call expansion: operand 0 is the address, + ;; the mode is VOIDmode. Always wrapped in CONST. + ;; The value is relative to the GOT. + CRIS_UNSPEC_PLT_GOTREL + + ;; PLT reference from call expansion: operand 0 is the address, + ;; the mode is VOIDmode. Always wrapped in CONST. + ;; The value is relative to the PC. It's arch-dependent whether + ;; the offset counts from the start or the end of the current item. + CRIS_UNSPEC_PLT_PCREL + + ;; The address of the global offset table as a source operand. + CRIS_UNSPEC_GOT + + ;; The offset from the global offset table to the operand. + CRIS_UNSPEC_GOTREL + + ;; The PC-relative offset to the operand. It's arch-dependent whether + ;; the offset counts from the start or the end of the current item. + CRIS_UNSPEC_PCREL + + ;; The index into the global offset table of a symbol, while + ;; also generating a GOT entry for the symbol. + CRIS_UNSPEC_GOTREAD + + ;; Similar to CRIS_UNSPEC_GOTREAD, but also generating a PLT entry. + CRIS_UNSPEC_PLTGOTREAD + + ;; Condition for v32 casesi jump, since it needs to have if_then_else + ;; form with register as one branch and default label as other. + ;; Operand 0 is const_int 0. + CRIS_UNSPEC_CASESI + + ;; Stack frame deallocation barrier. + CRIS_UNSPEC_FRAME_DEALLOC + + ;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1... + CRIS_UNSPEC_SWAP_BITS + ]) + +;; Register numbers. +(define_constants + [(CRIS_GOT_REGNUM 0) + (CRIS_STATIC_CHAIN_REGNUM 7) + (CRIS_FP_REGNUM 8) + (CRIS_SP_REGNUM 14) + (CRIS_ACR_REGNUM 15) + (CRIS_SRP_REGNUM 16) + (CRIS_MOF_REGNUM 17) + (CRIS_AP_REGNUM 18) + (CRIS_CC0_REGNUM 19)] +) + +;; We need an attribute to define whether an instruction can be put in +;; a branch-delay slot or not, and whether it has a delay slot. +;; +;; Branches and return instructions have a delay slot, and cannot +;; themselves be put in a delay slot. This has changed *for short +;; branches only* between architecture variants, but the possible win +;; is presumed negligible compared to the added complexity of the machine +;; description: one would have to add always-correct infrastructure to +;; distinguish short branches. +;; +;; Whether an instruction can be put in a delay slot depends on the +;; instruction (all short instructions except jumps and branches) +;; and the addressing mode (must not be prefixed or referring to pc). +;; In short, any "slottable" instruction must be 16 bit and not refer +;; to pc, or alter it. +;; +;; The possible values are "yes", "no", "has_slot", "has_return_slot" +;; and "has_call_slot". +;; Yes/no tells whether the insn is slottable or not. Has_call_slot means +;; that the insn is a call insn, which for CRIS v32 has a delay-slot. +;; Of special concern is that no RTX_FRAME_RELATED insn must go in that +;; call delay slot, as it's located in the address *after* the call insn, +;; and the unwind machinery doesn't know about delay slots. +;; Has_slot means that the insn is a branch insn (which are +;; not considered slottable since that is generally true). Having the +;; seemingly illogical value "has_slot" means we do not have to add +;; another attribute just to say that an insn has a delay-slot, since it +;; also infers that it is not slottable. Better names for the attribute +;; were found to be longer and not add readability to the machine +;; description. +;; Has_return_slot is similar, for the return insn. +;; +;; The default that is defined here for this attribute is "no", not +;; slottable, not having a delay-slot, so there's no need to worry about +;; it being wrong for non-branch and return instructions. +;; The default could depend on the kind of insn and the addressing +;; mode, but that would need more attributes and hairier, more error +;; prone code. +;; +;; There is an extra memory constraint, 'Q', which recognizes an indirect +;; register. The constraints 'Q' and '>' together match all possible +;; memory operands that are slottable. +;; For other operands, you need to check if it has a valid "slottable" +;; quick-immediate operand, where the particular signedness-variation +;; may match the constraints 'I' or 'J'.), and include it in the +;; constraint pattern for the slottable pattern. An alternative using +;; only "r" constraints is most often slottable. + +(define_attr "slottable" "no,yes,has_slot,has_return_slot,has_call_slot" + (const_string "no")) + +;; We also need attributes to sanely determine the condition code +;; state. See cris_notice_update_cc for how this is used. + +(define_attr "cc" "none,clobber,normal,noov32,rev" (const_string "normal")) + +;; At the moment, this attribute is just used to help bb-reorder do its +;; work; the default 0 doesn't help it. Many insns have other lengths, +;; though none are shorter. +(define_attr "length" "" (const_int 2)) + +;; A branch has one delay-slot. The instruction in the +;; delay-slot is always executed, independent of whether the branch is +;; taken or not. Note that besides setting "slottable" to "has_slot", +;; there also has to be a "%#" at the end of a "delayed" instruction +;; output pattern (for "jump" this means "ba %l0%#"), so print_operand can +;; catch it and print a "nop" if necessary. This method was stolen from +;; sparc.md. + +(define_delay (eq_attr "slottable" "has_slot") + [(eq_attr "slottable" "yes") (nil) (nil)]) + +;; We can't put prologue insns in call-insn delay-slots when +;; DWARF2 unwind info is emitted, because the unwinder matches the +;; address after the insn. It must see the return address of a call at +;; a position at least *one byte after* the insn, or it'll think that +;; the insn hasn't been executed. If the insn is in a delay-slot of a +;; call, it's just *exactly* after the insn. + +(define_delay (eq_attr "slottable" "has_call_slot") + [(and (eq_attr "slottable" "yes") + (ior (not (match_test "RTX_FRAME_RELATED_P (insn)")) + (not (match_test "flag_exceptions")))) + (nil) (nil)]) + +;; The insn in the return insn slot must not be the +;; return-address-register restore. FIXME: Use has_slot and express +;; as a parallel with a use of the return-address-register (currently +;; only SRP). However, this requires an amount of fixing tests for +;; naked RETURN in middle-end. +(define_delay (eq_attr "slottable" "has_return_slot") + [(and (eq_attr "slottable" "yes") + (not (match_test "dead_or_set_regno_p (insn, CRIS_SRP_REGNUM)"))) + (nil) (nil)]) + + +;; Iterator definitions. + +;; For the "usual" pattern size alternatives. +(define_mode_iterator BWD [SI HI QI]) +(define_mode_iterator WD [SI HI]) +(define_mode_iterator BW [HI QI]) +(define_mode_attr S [(SI "HI") (HI "QI")]) +(define_mode_attr s [(SI "hi") (HI "qi")]) +(define_mode_attr m [(SI ".d") (HI ".w") (QI ".b")]) +(define_mode_attr mm [(SI ".w") (HI ".b")]) +(define_mode_attr nbitsm1 [(SI "31") (HI "15") (QI "7")]) + +;; For the sign_extend+zero_extend variants. +(define_code_iterator szext [sign_extend zero_extend]) +(define_code_attr u [(sign_extend "") (zero_extend "u")]) +(define_code_attr su [(sign_extend "s") (zero_extend "u")]) + +;; For the shift variants. +(define_code_iterator shift [ashiftrt lshiftrt ashift]) +(define_code_iterator shiftrt [ashiftrt lshiftrt]) +(define_code_attr shlr [(ashiftrt "ashr") (lshiftrt "lshr") (ashift "ashl")]) +(define_code_attr slr [(ashiftrt "asr") (lshiftrt "lsr") (ashift "lsl")]) + +(define_code_iterator ncond [eq ne gtu ltu geu leu]) +(define_code_iterator ocond [gt le]) +(define_code_iterator rcond [lt ge]) +(define_code_attr CC [(eq "eq") (ne "ne") (gt "gt") (gtu "hi") (lt "lt") + (ltu "lo") (ge "ge") (geu "hs") (le "le") (leu "ls")]) +(define_code_attr rCC [(eq "ne") (ne "eq") (gt "le") (gtu "ls") (lt "ge") + (ltu "hs") (ge "lt") (geu "lo") (le "gt") (leu "hi")]) +(define_code_attr oCC [(lt "mi") (ge "pl")]) +(define_code_attr roCC [(lt "pl") (ge "mi")]) + +;; Operand and operator predicates. + +(include "predicates.md") +(include "constraints.md") + +;; Test insns. + +;; No test insns with side-effect on the mem addressing. +;; +;; See note on cmp-insns with side-effects (or lack of them) + +;; Normal named test patterns from SI on. + +(define_insn "*tstsi" + [(set (cc0) + (compare (match_operand:SI 0 "nonimmediate_operand" "r,Q>,m") + (const_int 0)))] + "" +{ + if (which_alternative == 0 && TARGET_V32) + return "cmpq 0,%0"; + return "test.d %0"; +} + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*tst_cmp" + [(set (cc0) + (compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m") + (const_int 0)))] + "cris_cc0_user_requires_cmp (insn)" + "@ + cmp 0,%0 + test %0 + test %0" + [(set_attr "slottable" "no,yes,no")]) + +(define_insn "*tst_non_cmp" + [(set (cc0) + (compare (match_operand:BW 0 "nonimmediate_operand" "r,Q>,m") + (const_int 0)))] + "!cris_cc0_user_requires_cmp (insn)" + "@ + move %0,%0 + test %0 + test %0" + [(set_attr "slottable" "yes,yes,no") + (set_attr "cc" "noov32,*,*")]) + +;; It seems that the position of the sign-bit and the fact that 0.0 is +;; all 0-bits would make "tstsf" a straight-forward implementation; +;; either "test.d" it for positive/negative or "btstq 30,r" it for +;; zeroness. +;; +;; FIXME: Do that some time; check next_cc0_user to determine if +;; zero or negative is tested for. + +;; Compare insns. + +;; We could optimize the sizes of the immediate operands for various +;; cases, but that is not worth it because of the very little usage of +;; DImode for anything else but a structure/block-mode. Just do the +;; obvious stuff for the straight-forward constraint letters. + +(define_insn "*cmpdi_non_v32" + [(set (cc0) + (compare (match_operand:DI 0 "nonimmediate_operand" "rm,r,r,r,r,r,r,o") + (match_operand:DI 1 "general_operand" "M,Kc,I,P,n,r,o,r")))] + "!TARGET_V32" + "@ + test.d %M0\;ax\;test.d %H0 + cmpq %1,%M0\;ax\;cmpq 0,%H0 + cmpq %1,%M0\;ax\;cmpq -1,%H0 + cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M0,%M1\;ax\;cmp.d %H0,%H1") + +(define_insn "*cmpdi_v32" + [(set (cc0) + (compare (match_operand:DI 0 "register_operand" "r,r,r,r,r") + (match_operand:DI 1 "nonmemory_operand" "Kc,I,P,n,r")))] + "TARGET_V32" + "@ + cmpq %1,%M0\;ax\;cmpq 0,%H0 + cmpq %1,%M0\;ax\;cmpq -1,%H0 + cmp%e1.%z1 %1,%M0\;ax\;cmpq %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0 + cmp.d %M1,%M0\;ax\;cmp.d %H1,%H0") + +;; Note that compare insns with side effect addressing mode (e.g.): +;; +;; cmp.S [rx=ry+i],rz; +;; cmp.S [%3=%1+%2],%0 +;; +;; are *not* usable for gcc since the reloader *does not accept* +;; cc0-changing insns with side-effects other than setting the condition +;; codes. The reason is that the reload stage *may* cause another insn to +;; be output after the main instruction, in turn invalidating cc0 for the +;; insn using the test. (This does not apply to the CRIS case, since a +;; reload for output -- move to memory -- does not change the condition +;; code. Unfortunately we have no way to describe that at the moment. I +;; think code would improve being in the order of one percent faster. + +;; We have cmps and cmpu (compare reg w. sign/zero extended mem). +;; These are mostly useful for compares in SImode, using 8 or 16-bit +;; constants, but sometimes gcc will find its way to use it for other +;; (memory) operands. Avoid side-effect patterns, though (see above). + +(define_insn "*cmp_ext" + [(set (cc0) + (compare + (match_operand:SI 0 "register_operand" "r,r") + (match_operator:SI 2 "cris_extend_operator" + [(match_operand:BW 1 "memory_operand" "Q>,m")])))] + "" + "cmp%e2 %1,%0" + [(set_attr "slottable" "yes,no")]) + +;; Swap operands; it seems the canonical look (if any) is not enforced. +;; +;; FIXME: Investigate that. + +(define_insn "*cmp_swapext" + [(set (cc0) + (compare + (match_operator:SI 2 "cris_extend_operator" + [(match_operand:BW 0 "memory_operand" "Q>,m")]) + (match_operand:SI 1 "register_operand" "r,r")))] + "" + "cmp%e2 %0,%1" + [(set_attr "slottable" "yes,no") + (set_attr "cc" "rev")]) + +;; The "normal" compare patterns, from SI on. Special-cases with zero +;; are covered above. + +(define_insn "*cmpsi" + [(set (cc0) + (compare + (match_operand:SI 0 "nonimmediate_operand" "r,r,r, Q>,r,r,m") + (match_operand:SI 1 "general_operand" "I,r,Q>,r, P,g,r")))] + "" + "@ + cmpq %1,%0 + cmp.d %1,%0 + cmp.d %1,%0 + cmp.d %0,%1 + cmp%e1.%z1 %1,%0 + cmp.d %1,%0 + cmp.d %0,%1" + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no") + (set_attr "cc" "normal,normal,normal,rev,normal,normal,rev")]) + +(define_insn "*cmp" + [(set (cc0) + (compare (match_operand:BW 0 "nonimmediate_operand" "r,r, Q>,r,m") + (match_operand:BW 1 "general_operand" "r,Q>,r, g,r")))] + "" + "@ + cmp %1,%0 + cmp %1,%0 + cmp %0,%1 + cmp %1,%0 + cmp %0,%1" + [(set_attr "slottable" "yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,rev,normal,rev")]) + +;; Pattern matching the BTST insn. +;; It is useful for "if (i & val)" constructs, where val is an exact +;; power of 2, or if val + 1 is a power of two, where we check for a bunch +;; of zeros starting at bit 0). + +;; SImode. This mode is the only one needed, since gcc automatically +;; extends subregs for lower-size modes. FIXME: Add testcase. +(define_insn "*btst" + [(set (cc0) + (compare + (zero_extract:SI + (match_operand:SI 0 "nonmemory_operand" "r, r,r, r,r, r,Kp") + (match_operand:SI 1 "const_int_operand" "Kc,n,Kc,n,Kc,n,n") + (match_operand:SI 2 "nonmemory_operand" "M, M,Kc,n,r, r,r")) + (const_int 0)))] + ;; Either it is a single bit, or consecutive ones starting at 0. + ;; The btst ones depend on stuff in NOTICE_UPDATE_CC. + "CONST_INT_P (operands[1]) + && (operands[1] == const1_rtx || operands[2] == const0_rtx) + && (REG_S_P (operands[0]) + || (operands[1] == const1_rtx + && REG_S_P (operands[2]) + && CONST_INT_P (operands[0]) + && exact_log2 (INTVAL (operands[0])) >= 0)) + && !TARGET_CCINIT" + +;; The next-to-last "&&" condition above should be caught by some kind of +;; canonicalization in gcc, but we can easily help with it here. +;; It results from expressions of the type +;; "power_of_2_value & (1 << y)". +;; +;; Since there may be codes with tests in on bits (in constant position) +;; beyond the size of a word, handle that by assuming those bits are 0. +;; GCC should handle that, but it's a matter of easily-added belts while +;; having suspenders. + + "@ + btstq (%1-1),%0 + cmpq 0,%0 + btstq %2,%0 + clearf nz + btst %2,%0 + clearf nz + cmpq %p0,%2" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Move insns. + +;; The whole mandatory movdi family is here; expander, "anonymous" +;; recognizer and splitter. We're forced to have a movdi pattern, +;; although GCC should be able to split it up itself. Normally it can, +;; but if other insns have DI operands (as is the case here), reload +;; must be able to generate or match a movdi. many testcases fail at +;; -O3 or -fssa if we don't have this. FIXME: Fix GCC... See +;; . +;; However, a patch from Richard Kenner (similar to the cause of +;; discussion at the URL above), indicates otherwise. See +;; . +;; The truth has IMO is not been decided yet, so check from time to +;; time by disabling the movdi patterns. + +;; To appease testcase gcc.c-torture/execute/920501-2.c (and others) at +;; -O0, we need a movdi as a temporary measure. Here's how things fail: +;; A cmpdi RTX needs reloading (global): +;; (insn 185 326 186 (set (cc0) +;; (compare (mem/f:DI (reg/v:SI 22) 0) +;; (const_int 1 [0x1]))) 4 {cmpdi} (nil) +;; (nil)) +;; Now, reg 22 is reloaded for input address, and the mem is also moved +;; out of the instruction (into a register), since one of the operands +;; must be a register. Reg 22 is reloaded (into reg 10), and the mem is +;; moved out and synthesized in SImode parts (reg 9, reg 10 - should be ok +;; wrt. overlap). The bad things happen with the synthesis in +;; emit_move_insn_1; the location where to substitute reg 10 is lost into +;; two new RTX:es, both still having reg 22. Later on, the left-over reg +;; 22 is recognized to have an equivalent in memory which is substituted +;; straight in, and we end up with an unrecognizable insn: +;; (insn 325 324 326 (set (reg:SI 9 r9) +;; (mem/f:SI (mem:SI (plus:SI (reg:SI 8 r8) +;; (const_int -84 [0xffffffac])) 0) 0)) -1 (nil) +;; (nil)) +;; which is the first part of the reloaded synthesized "movdi". +;; The right thing would be to add equivalent replacement locations for +;; insn with pseudos that need more reloading. The question is where. + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" +{ + if (MEM_P (operands[0]) + && operands[1] != const0_rtx + && (!TARGET_V32 || (!REG_P (operands[1]) && can_create_pseudo_p ()))) + operands[1] = copy_to_mode_reg (DImode, operands[1]); + + /* Some other ports (as of 2001-09-10 for example mcore and romp) also + prefer to split up constants early, like this. The testcase in + gcc.c-torture/execute/961213-1.c shows that CSE2 gets confused by the + resulting subreg sets when using the construct from mcore (as of FSF + CVS, version -r 1.5), and it believes that the high part (the last one + emitted) is the final value. */ + if ((CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_DOUBLE) + && ! reload_completed + && ! reload_in_progress) + { + rtx insns; + rtx op0 = operands[0]; + rtx op1 = operands[1]; + + start_sequence (); + emit_move_insn (operand_subword (op0, 0, 1, DImode), + operand_subword (op1, 0, 1, DImode)); + emit_move_insn (operand_subword (op0, 1, 1, DImode), + operand_subword (op1, 1, 1, DImode)); + insns = get_insns (); + end_sequence (); + + emit_insn (insns); + DONE; + } +}) + +(define_insn_and_split "*movdi_insn_non_v32" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rx,m") + (match_operand:DI 1 "general_operand" "rx,g,rxM"))] + "(register_operand (operands[0], DImode) + || register_operand (operands[1], DImode) + || operands[1] == const0_rtx) + && !TARGET_V32" + "#" + "&& reload_completed" + [(match_dup 2)] + "operands[2] = cris_split_movdx (operands);") + +;; Overlapping (but non-identical) source memory address and destination +;; register would be a compiler bug, so we don't have to specify that. +(define_insn "*movdi_v32" + [(set + (match_operand:DI 0 "nonimmediate_operand" "=r,rx,&r,>, m,r,x,m") + (match_operand:DI 1 "general_operand" "rxi,r>,m, rx,r,m,m,x"))] + "TARGET_V32" +{ + switch (which_alternative) + { + /* FIXME: 1) Use autoincrement where possible. 2) Have peephole2, + particularly for cases where the address register is dead. */ + case 5: + if (REGNO (operands[0]) == REGNO (XEXP (operands[1], 0))) + return "addq 4,%L1\;move.d %1,%H0\;subq 4,%L1\;move.d %1,%M0"; + gcc_assert (REGNO (operands[0]) + 1 == REGNO (XEXP (operands[1], 0))); + return "move.d [%L1+],%M0\;move.d [%L1],%H0"; + case 2: + /* We could do away with the addq if we knew the address-register + isn't ACR. If we knew the address-register is dead, we could do + away with the subq too. */ + return "move.d [%L1],%M0\;addq 4,%L1\;move.d [%L1],%H0\;subq 4,%L1"; + case 4: + return "move.d %M1,[%L0]\;addq 4,%L0\;move.d %H1,[%L0]\;subq 4,%L0"; + case 6: + return "move [%L1],%M0\;addq 4,%L1\;move [%L1],%H0\;subq 4,%L1"; + case 7: + return "move %M1,[%L0]\;addq 4,%L0\;move %H1,[%L0]\;subq 4,%L0"; + + default: + return "#"; + } +} + ;; The non-split cases clobber cc0 because of their adds and subs. + ;; Beware that NOTICE_UPDATE_CC is called before the forced split happens. + [(set_attr "cc" "*,*,clobber,*,clobber,clobber,*,*")]) + +;; Much like "*movdi_insn_non_v32". Overlapping registers and constants +;; is handled so much better in cris_split_movdx. +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "TARGET_V32 + && reload_completed + && (!MEM_P (operands[0]) || !REG_P (XEXP (operands[0], 0))) + && (!MEM_P (operands[1]) || !REG_P (XEXP (operands[1], 0)))" + [(match_dup 2)] + "operands[2] = cris_split_movdx (operands);") + +;; Side-effect patterns for move.S1 [rx=ry+rx.S2],rw +;; and move.S1 [rx=ry+i],rz +;; Then movs.S1 and movu.S1 for both modes. +;; +;; move.S1 [rx=ry+rz.S],rw avoiding when rx is ry, or rw is rx +;; FIXME: These could have anonymous mode for operand 0. +;; FIXME: Special registers' alternatives too. + +(define_insn "*mov_side_biap" + [(set (match_operand:BW 0 "register_operand" "=r,r") + (mem:BW (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (match_operand:SI 3 "register_operand" "r,r")))) + (set (match_operand:SI 4 "register_operand" "=*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + move [%4=%3+%1%T2],%0") + +(define_insn "*mov_sidesisf_biap" + [(set (match_operand 0 "register_operand" "=r,r,x,x") + (mem (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r,r,r") + (match_operand:SI 2 "const_int_operand" "n,n,n,n")) + (match_operand:SI 3 "register_operand" "r,r,r,r")))) + (set (match_operand:SI 4 "register_operand" "=*3,r,*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + move.%s0 [%4=%3+%1%T2],%0 + # + move [%4=%3+%1%T2],%0") + +;; move.S1 [rx=ry+i],rz +;; avoiding move.S1 [ry=ry+i],rz +;; and move.S1 [rz=ry+i],rz +;; Note that "i" is allowed to be a register. + +(define_insn "*mov_side" + [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r") + (mem:BW + (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || satisfies_constraint_N (operands[2]) + || satisfies_constraint_J (operands[2]))) + return "#"; + if (which_alternative == 4) + return "move [%3=%2%S1],%0"; + return "move [%3=%1%S2],%0"; +}) + +(define_insn "*mov_sidesisf" + [(set (match_operand 0 "register_operand" "=r,r,r,x,x,x,r,r,x,x") + (mem + (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,r,r,r,R,R,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r>Rn,r,>Rn,r,r,r,r")))) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*1,r,r,*2,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "GET_MODE_SIZE (GET_MODE (operands[0])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 + || which_alternative == 3 + || which_alternative == 6 + || which_alternative == 8) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || satisfies_constraint_N (operands[2]) + || satisfies_constraint_J (operands[2]))) + return "#"; + if (which_alternative < 3) + return "move.%s0 [%3=%1%S2],%0"; + if (which_alternative == 7) + return "move.%s0 [%3=%2%S1],%0"; + if (which_alternative == 9) + return "move [%3=%2%S1],%0"; + return "move [%3=%1%S2],%0"; +}) + +;; Other way around; move to memory. + +;; Note that the condition (which for side-effect patterns is usually a +;; call to cris_side_effect_mode_ok), isn't consulted for register +;; allocation preferences -- constraints is the method for that. The +;; drawback is that we can't exclude register allocation to cause +;; "move.s rw,[rx=ry+rz.S]" when rw==rx without also excluding rx==ry or +;; rx==rz if we use an earlyclobber modifier for the constraint for rx. +;; Instead of that, we recognize and split the cases where dangerous +;; register combinations are spotted: where a register is set in the +;; side-effect, and used in the main insn. We don't handle the case where +;; the set in the main insn overlaps the set in the side-effect; that case +;; must be handled in gcc. We handle just the case where the set in the +;; side-effect overlaps the input operand of the main insn (i.e. just +;; moves to memory). + +;; +;; move.s rz,[ry=rx+rw.S] + +(define_insn "*mov_side_biap_mem" + [(set (mem:BW (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "const_int_operand" "n,n,n")) + (match_operand:SI 2 "register_operand" "r,r,r"))) + (match_operand:BW 3 "register_operand" "r,r,r")) + (set (match_operand:SI 4 "register_operand" "=*2,!3,r") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)" + "@ + # + # + move %3,[%4=%2+%0%T1]") + +(define_insn "*mov_sidesisf_biap_mem" + [(set (mem (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r,r,r,r,r,r") + (match_operand:SI 1 "const_int_operand" "n,n,n,n,n,n")) + (match_operand:SI 2 "register_operand" "r,r,r,r,r,r"))) + (match_operand 3 "register_operand" "r,r,r,x,x,x")) + (set (match_operand:SI 4 "register_operand" "=*2,!3,r,*2,!3,r") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))] + "GET_MODE_SIZE (GET_MODE (operands[3])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (MULT, operands, 4, 2, 0, 1, 3)" + "@ + # + # + move.%s3 %3,[%4=%2+%0%T1] + # + # + move %3,[%4=%2+%0%T1]") + +;; Split for the case above where we're out of luck with register +;; allocation (again, the condition isn't checked for that), and we end up +;; with the set in the side-effect getting the same register as the input +;; register. + +(define_split + [(parallel + [(set (match_operator + 6 "cris_mem_op" + [(plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (match_operand:SI 2 "register_operand" ""))]) + (match_operand 3 "register_operand" "")) + (set (match_operand:SI 4 "cris_nonsp_register_operand" "") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))])] + "reload_completed && reg_overlap_mentioned_p (operands[4], operands[3])" + [(set (match_dup 5) (match_dup 3)) + (set (match_dup 4) (match_dup 2)) + (set (match_dup 4) + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 4)))] + "operands[5] + = replace_equiv_address (operands[6], + gen_rtx_PLUS (SImode, + gen_rtx_MULT (SImode, + operands[0], + operands[1]), + operands[2]));") + +;; move.s rx,[ry=rz+i] +;; FIXME: These could have anonymous mode for operand 2. + +;; QImode + +(define_insn "*mov_side_mem" + [(set (mem:BW + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r,R,R,R") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn,r,r,r"))) + (match_operand:BW 2 "register_operand" "r,r,r,r,r,r,r")) + (set (match_operand:SI 3 "register_operand" "=*0,!*2,r,r,*1,!*2,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)" +{ + if ((which_alternative == 0 || which_alternative == 4) + && (!CONST_INT_P (operands[1]) + || INTVAL (operands[1]) > 127 + || INTVAL (operands[1]) < -128 + || satisfies_constraint_N (operands[1]) + || satisfies_constraint_J (operands[1]))) + return "#"; + if (which_alternative == 1 || which_alternative == 5) + return "#"; + if (which_alternative == 6) + return "move.%s2 %2,[%3=%1%S0]"; + return "move %2,[%3=%0%S1]"; +}) + +;; SImode + +(define_insn "*mov_sidesisf_mem" + [(set (mem + (plus:SI + (match_operand:SI + 0 "cris_bdap_operand" + "%r, r, r,r, r, r,r, R,R, R,R, R") + (match_operand:SI + 1 "cris_bdap_operand" + "r>Rn,r>Rn,r,>Rn,r>Rn,r,>Rn,r,r, r,r, r"))) + (match_operand 2 "register_operand" + "r, r, r,r, x, x,x, r,r, r,x, x")) + (set (match_operand:SI 3 "register_operand" + "=*0,!2, r,r, *0, r,r, *1,!*2,r,*1,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "GET_MODE_SIZE (GET_MODE (operands[2])) == UNITS_PER_WORD + && cris_side_effect_mode_ok (PLUS, operands, 3, 0, 1, -1, 2)" +{ + if ((which_alternative == 0 || which_alternative == 4) + && (!CONST_INT_P (operands[1]) + || INTVAL (operands[1]) > 127 + || INTVAL (operands[1]) < -128 + || satisfies_constraint_N (operands[1]) + || satisfies_constraint_J (operands[1]))) + return "#"; + if (which_alternative == 1 + || which_alternative == 7 + || which_alternative == 8 + || which_alternative == 10) + return "#"; + if (which_alternative < 4) + return "move.%s2 %2,[%3=%0%S1]"; + if (which_alternative == 9) + return "move.%s2 %2,[%3=%1%S0]"; + if (which_alternative == 11) + return "move %2,[%3=%1%S0]"; + return "move %2,[%3=%0%S1]"; +}) + +;; Like the biap case, a split where the set in the side-effect gets the +;; same register as the input register to the main insn, since the +;; condition isn't checked at register allocation. + +(define_split + [(parallel + [(set (match_operator + 4 "cris_mem_op" + [(plus:SI + (match_operand:SI 0 "cris_bdap_operand" "") + (match_operand:SI 1 "cris_bdap_operand" ""))]) + (match_operand 2 "register_operand" "")) + (set (match_operand:SI 3 "cris_nonsp_register_operand" "") + (plus:SI (match_dup 0) (match_dup 1)))])] + "reload_completed && reg_overlap_mentioned_p (operands[3], operands[2])" + [(set (match_dup 4) (match_dup 2)) + (set (match_dup 3) (match_dup 0)) + (set (match_dup 3) (plus:SI (match_dup 3) (match_dup 1)))] + "") + +;; Clear memory side-effect patterns. It is hard to get to the mode if +;; the MEM was anonymous, so there will be one for each mode. + +;; clear.[bwd] [ry=rx+rw.s2] + +(define_insn "*clear_side_biap" + [(set (mem:BWD (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "const_int_operand" "n,n")) + (match_operand:SI 2 "register_operand" "r,r"))) + (const_int 0)) + (set (match_operand:SI 3 "register_operand" "=*2,r") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))] + "cris_side_effect_mode_ok (MULT, operands, 3, 2, 0, 1, -1)" + "@ + # + clear [%3=%2+%0%T1]") + +;; clear.[bwd] [ry=rz+i] + +(define_insn "*clear_side" + [(set (mem:BWD + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn,r,r"))) + (const_int 0)) + (set (match_operand:SI 2 "register_operand" "=*0,r,r,*1,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "cris_side_effect_mode_ok (PLUS, operands, 2, 0, 1, -1, -1)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[1]) + || INTVAL (operands[1]) > 127 + || INTVAL (operands[1]) < -128 + || satisfies_constraint_N (operands[1]) + || satisfies_constraint_J (operands[1]))) + return "#"; + if (which_alternative == 4) + return "clear [%2=%1%S0]"; + return "clear [%2=%0%S1]"; +}) + +;; Normal move patterns from SI on. + +(define_expand "movsi" + [(set + (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "cris_general_operand_or_symbol" ""))] + "" +{ + /* If the output goes to a MEM, make sure we have zero or a register as + input. */ + if (MEM_P (operands[0]) + && ! REG_S_P (operands[1]) + && operands[1] != const0_rtx + && can_create_pseudo_p ()) + operands[1] = force_reg (SImode, operands[1]); + + /* If we're generating PIC and have an incoming symbol, validize it to a + general operand or something that will match a special pattern. + + FIXME: Do we *have* to recognize anything that would normally be a + valid symbol? Can we exclude global PIC addresses with an added + offset? */ + if (flag_pic + && CONSTANT_ADDRESS_P (operands[1]) + && !cris_valid_pic_const (operands[1], false)) + { + enum cris_pic_symbol_type t = cris_pic_symbol_type_of (operands[1]); + + gcc_assert (t != cris_no_symbol); + + if (! REG_S_P (operands[0])) + { + /* We must have a register as destination for what we're about to + do, and for the patterns we generate. */ + CRIS_ASSERT (can_create_pseudo_p ()); + operands[1] = force_reg (SImode, operands[1]); + } + else + { + /* FIXME: add a REG_EQUAL (or is it REG_EQUIV) note to the + destination register for the symbol. It might not be + worth it. Measure. */ + crtl->uses_pic_offset_table = 1; + if (t == cris_rel_symbol) + { + /* Change a "move.d sym(+offs),rN" into (allocate register rM) + for pre-v32: + "move.d (const (plus (unspec [sym] + CRIS_UNSPEC_GOTREL) offs)),rM" "add.d rPIC,rM,rN" + and for v32: + "move.d (const (plus (unspec [sym] + CRIS_UNSPEC_PCREL) offs)),rN". */ + rtx tem, rm, rn = operands[0]; + rtx sym = GET_CODE (operands[1]) != CONST + ? operands[1] : get_related_value (operands[1]); + HOST_WIDE_INT offs = get_integer_term (operands[1]); + + gcc_assert (can_create_pseudo_p ()); + + if (TARGET_V32) + { + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), + CRIS_UNSPEC_PCREL); + if (offs != 0) + tem = plus_constant (Pmode, tem, offs); + rm = rn; + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + } + else + { + /* We still uses GOT-relative addressing for + pre-v32. */ + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), + CRIS_UNSPEC_GOTREL); + if (offs != 0) + tem = plus_constant (Pmode, tem, offs); + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx, + rn, 0, OPTAB_LIB_WIDEN) != rn) + internal_error ("expand_binop failed in movsi gotrel"); + } + DONE; + } + else if (t == cris_got_symbol) + { + /* Change a "move.d sym,rN" into (allocate register rM, rO) + "move.d (const (unspec [sym] CRIS_UNSPEC_GOTREAD)),rM" + "add.d rPIC,rM,rO", "move.d [rO],rN" with + the memory access marked as read-only. */ + rtx tem, mem, rm, ro, rn = operands[0]; + gcc_assert (can_create_pseudo_p ()); + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[1]), + CRIS_UNSPEC_GOTREAD); + rm = gen_reg_rtx (Pmode); + emit_move_insn (rm, gen_rtx_CONST (Pmode, tem)); + ro = gen_reg_rtx (Pmode); + if (expand_binop (Pmode, add_optab, rm, pic_offset_table_rtx, + ro, 0, OPTAB_LIB_WIDEN) != ro) + internal_error ("expand_binop failed in movsi got"); + mem = gen_rtx_MEM (Pmode, ro); + + /* This MEM doesn't alias anything. Whether it + aliases other same symbols is unimportant. */ + set_mem_alias_set (mem, new_alias_set ()); + MEM_NOTRAP_P (mem) = 1; + + /* We can set the GOT memory read of a non-called symbol + to readonly, but not that of a call symbol, as those + are subject to lazy evaluation and usually have the value + changed from the first call to the second (but + constant thereafter). */ + MEM_READONLY_P (mem) = 1; + emit_move_insn (rn, mem); + DONE; + } + else + { + /* We get here when we have to change something that would + be recognizable if it wasn't PIC. A ``sym'' is ok for + PIC symbols both with and without a GOT entry. And ``sym + + offset'' is ok for local symbols, so the only thing it + could be, is a global symbol with an offset. Check and + abort if not. */ + rtx reg = gen_reg_rtx (Pmode); + rtx sym = get_related_value (operands[1]); + HOST_WIDE_INT offs = get_integer_term (operands[1]); + + gcc_assert (can_create_pseudo_p () + && t == cris_got_symbol_needing_fixup + && sym != NULL_RTX && offs != 0); + + emit_move_insn (reg, sym); + if (expand_binop (SImode, add_optab, reg, + GEN_INT (offs), operands[0], 0, + OPTAB_LIB_WIDEN) != operands[0]) + internal_error ("expand_binop failed in movsi got+offs"); + DONE; + } + } + } +}) + +(define_insn "*movsi_got_load" + [(set (reg:SI CRIS_GOT_REGNUM) (unspec:SI [(const_int 0)] CRIS_UNSPEC_GOT))] + "flag_pic" +{ + return TARGET_V32 + ? "lapc _GLOBAL_OFFSET_TABLE_,%:" + : "move.d $pc,%:\;sub.d .:GOTOFF,%:"; +} + [(set_attr "cc" "clobber")]) + +(define_insn "*movsi_internal" + [(set + (match_operand:SI 0 "nonimmediate_operand" + "=r,r, r,Q>,r,Q>,g,r,r, r,g,rQ>,x, m,x") + (match_operand:SI 1 "cris_general_operand_or_pic_source" + "r,Q>,M,M, I,r, M,n,!S,g,r,x, rQ>,x,gi"))] + ;; Note that we prefer not to use the S alternative (if for some reason + ;; it competes with others) above, but g matches S. + "" +{ + /* Better to have c-switch here; it is worth it to optimize the size of + move insns. The alternative would be to try to find more constraint + letters. FIXME: Check again. It seems this could shrink a bit. */ + switch (which_alternative) + { + case 9: + if (TARGET_V32) + { + if (!flag_pic + && (GET_CODE (operands[1]) == SYMBOL_REF + || GET_CODE (operands[1]) == LABEL_REF + || GET_CODE (operands[1]) == CONST)) + { + /* FIXME: Express this through (set_attr cc none) instead, + since we can't express the ``none'' at this point. FIXME: + Use lapc for everything except const_int and when next cc0 + user would want the flag setting. */ + CC_STATUS_INIT; + return "lapc %1,%0"; + } + if (flag_pic == 1 + && GET_CODE (operands[1]) == CONST + && GET_CODE (XEXP (operands[1], 0)) == UNSPEC + && XINT (XEXP (operands[1], 0), 1) == CRIS_UNSPEC_GOTREAD) + return "movu.w %1,%0"; + } + /* FALLTHROUGH */ + case 0: + case 1: + case 5: + case 10: + return "move.d %1,%0"; + + case 11: + case 12: + case 13: + case 14: + return "move %d1,%0"; + + case 2: + case 3: + case 6: + return "clear.d %0"; + + /* Constants -32..31 except 0. */ + case 4: + return "moveq %1,%0"; + + /* We can win a little on constants -32768..-33, 32..65535. */ + case 7: + if (INTVAL (operands[1]) > 0 && INTVAL (operands[1]) < 65536) + { + if (INTVAL (operands[1]) < 256) + return "movu.b %1,%0"; + return "movu.w %1,%0"; + } + else if (INTVAL (operands[1]) >= -32768 && INTVAL (operands[1]) < 32768) + { + if (INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) < 128) + return "movs.b %1,%0"; + return "movs.w %1,%0"; + } + return "move.d %1,%0"; + + case 8: + { + rtx tem = operands[1]; + gcc_assert (GET_CODE (tem) == CONST); + tem = XEXP (tem, 0); + if (GET_CODE (tem) == PLUS + && GET_CODE (XEXP (tem, 0)) == UNSPEC + && (XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL + || XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_PCREL) + && CONST_INT_P (XEXP (tem, 1))) + tem = XEXP (tem, 0); + gcc_assert (GET_CODE (tem) == UNSPEC); + switch (XINT (tem, 1)) + { + case CRIS_UNSPEC_GOTREAD: + case CRIS_UNSPEC_PLTGOTREAD: + /* Using sign-extend mostly to be consistent with the + indexed addressing mode. */ + if (flag_pic == 1) + return "movs.w %1,%0"; + return "move.d %1,%0"; + + case CRIS_UNSPEC_GOTREL: + case CRIS_UNSPEC_PLT_GOTREL: + gcc_assert (!TARGET_V32); + return "move.d %1,%0"; + + case CRIS_UNSPEC_PCREL: + case CRIS_UNSPEC_PLT_PCREL: + gcc_assert (TARGET_V32); + return "lapc %1,%0"; + + default: + gcc_unreachable (); + } + } + default: + return "BOGUS: %1 to %0"; + } +} + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,no,yes,yes,no,no") + (set_attr "cc" "*,*,*,*,*,*,*,*,*,*,*,none,none,none,none")]) + +;; Extend operations with side-effect from mem to register, using +;; MOVS/MOVU. These are from mem to register only. +;; +;; [rx=ry+rz.S] +;; +;; QImode to HImode +;; +;; FIXME: Can we omit extend to HImode, since GCC should truncate for +;; HImode by itself? Perhaps use only anonymous modes? + +(define_insn "*ext_sideqihi_biap" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operator:HI + 5 "cris_extend_operator" + [(mem:QI (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (match_operand:SI 3 "register_operand" "r,r")))])) + (set (match_operand:SI 4 "register_operand" "=*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + mov%e5.%m5 [%4=%3+%1%T2],%0") + +(define_insn "*ext_sidesi_biap" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 5 "cris_extend_operator" + [(mem:BW (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "const_int_operand" "n,n")) + (match_operand:SI 3 "register_operand" "r,r")))])) + (set (match_operand:SI 4 "register_operand" "=*3,r") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))] + "cris_side_effect_mode_ok (MULT, operands, 4, 3, 1, 2, 0)" + "@ + # + mov%e5 [%4=%3+%1%T2],%0") + +;; Same but [rx=ry+i] + +;; QImode to HImode + +(define_insn "*ext_sideqihi" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (match_operator:HI + 4 "cris_extend_operator" + [(mem:QI (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || satisfies_constraint_N (operands[2]) + || satisfies_constraint_J (operands[2]))) + return "#"; + if (which_alternative == 4) + return "mov%e4.%m4 [%3=%2%S1],%0"; + return "mov%e4.%m4 [%3=%1%S2],%0"; +}) + +(define_insn "*ext_sidesi" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (match_operator:SI + 4 "cris_extend_operator" + [(mem:BW (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])) + (set (match_operand:SI 3 "register_operand" "=*1,r,r,*2,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "cris_side_effect_mode_ok (PLUS, operands, 3, 1, 2, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[2]) + || INTVAL (operands[2]) > 127 + || INTVAL (operands[2]) < -128 + || satisfies_constraint_N (operands[2]) + || satisfies_constraint_J (operands[2]))) + return "#"; + if (which_alternative == 4) + return "mov%e4 [%3=%2%S1],%0"; + return "mov%e4 [%3=%1%S2],%0"; +}) + +;; FIXME: See movsi. + +(define_insn "movhi" + [(set + (match_operand:HI 0 "nonimmediate_operand" "=r,r, r,Q>,r,Q>,r,r,r,g,g,r,r,x") + (match_operand:HI 1 "general_operand" "r,Q>,M,M, I,r, L,O,n,M,r,g,x,r"))] + "" +{ + switch (which_alternative) + { + case 0: + case 1: + case 5: + case 10: + case 11: + return "move.w %1,%0"; + case 12: + case 13: + return "move %1,%0"; + case 2: + case 3: + case 9: + return "clear.w %0"; + case 4: + return "moveq %1,%0"; + case 6: + case 8: + if (INTVAL (operands[1]) < 256 && INTVAL (operands[1]) >= -128) + { + if (INTVAL (operands[1]) > 0) + return "movu.b %1,%0"; + return "movs.b %1,%0"; + } + return "move.w %1,%0"; + case 7: + return "movEq %b1,%0"; + default: + return "BOGUS: %1 to %0"; + } +} + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,yes,no,no,no,no,yes,yes") + (set_attr "cc" "*,*,none,none,*,none,*,clobber,*,none,none,*,none,none")]) + +(define_insn "movstricthi" + [(set + (strict_low_part + (match_operand:HI 0 "nonimmediate_operand" "+r,r, r,Q>,Q>,g,r,g")) + (match_operand:HI 1 "general_operand" "r,Q>,M,M, r, M,g,r"))] + "" + "@ + move.w %1,%0 + move.w %1,%0 + clear.w %0 + clear.w %0 + move.w %1,%0 + clear.w %0 + move.w %1,%0 + move.w %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")]) + +(define_expand "reload_in" + [(set (match_operand:BW 2 "register_operand" "=r") + (match_operand:BW 1 "memory_operand" "m")) + (set (match_operand:BW 0 "register_operand" "=x") + (match_dup 2))] + "" + "") + +(define_expand "reload_out" + [(set (match_operand:BW 2 "register_operand" "=&r") + (match_operand:BW 1 "register_operand" "x")) + (set (match_operand:BW 0 "memory_operand" "=m") + (match_dup 2))] + "" + "") + +(define_insn "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,r,g,g,r,r,r,x") + (match_operand:QI 1 "general_operand" "r,r, Q>,M,M, I,M,r,O,g,x,r"))] + "" + "@ + move.b %1,%0 + move.b %1,%0 + move.b %1,%0 + clear.b %0 + clear.b %0 + moveq %1,%0 + clear.b %0 + move.b %1,%0 + moveq %b1,%0 + move.b %1,%0 + move %1,%0 + move %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,yes,no,yes,yes") + (set_attr "cc" "*,*,*,*,*,*,*,*,clobber,*,none,none")]) + +(define_insn "movstrictqi" + [(set (strict_low_part + (match_operand:QI 0 "nonimmediate_operand" "+r,Q>,r, r,Q>,g,g,r")) + (match_operand:QI 1 "general_operand" "r,r, Q>,M,M, M,r,g"))] + "" + "@ + move.b %1,%0 + move.b %1,%0 + move.b %1,%0 + clear.b %0 + clear.b %0 + clear.b %0 + move.b %1,%0 + move.b %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no")]) + +;; The valid "quick" bit-patterns are, except for 0.0, denormalized +;; values REALLY close to 0, and some NaN:s (I think; their exponent is +;; all ones); the worthwhile one is "0.0". +;; It will use clear, so we know ALL types of immediate 0 never change cc. + +(define_insn "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,Q>,r, r,Q>,g,g,r,r,x,Q>,m,x, x") + (match_operand:SF 1 "general_operand" "r,r, Q>,G,G, G,r,g,x,r,x, x,Q>,g"))] + "" + "@ + move.d %1,%0 + move.d %1,%0 + move.d %1,%0 + clear.d %0 + clear.d %0 + clear.d %0 + move.d %1,%0 + move.d %1,%0 + move %1,%0 + move %1,%0 + move %1,%0 + move %1,%0 + move %1,%0 + move %1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no,no,yes,yes,yes,no,yes,no")]) + +;; Movem patterns. Primarily for use in function prologue and epilogue. +;; The V32 variants have an ordering matching the expectations of the +;; standard names "load_multiple" and "store_multiple"; pre-v32 movem +;; store R0 in the highest memory location. + +(define_expand "load_multiple" + [(match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_V32" +{ + rtx indreg; + + /* Apparently the predicate isn't checked, so we need to do so + manually. Once happened for libstdc++-v3 locale_facets.tcc. */ + if (!MEM_P (operands[1])) + FAIL; + + indreg = XEXP (operands[1], 0); + + if (GET_CODE (indreg) == POST_INC) + indreg = XEXP (indreg, 0); + if (!REG_P (indreg) + || GET_CODE (operands[2]) != CONST_INT + || !REG_P (operands[0]) + || REGNO (operands[0]) != 0 + || INTVAL (operands[2]) > CRIS_SP_REGNUM + || (int) REGNO (indreg) < INTVAL (operands[2])) + FAIL; + gcc_unreachable (); + emit_insn (cris_gen_movem_load (operands[1], operands[2], 0)); + DONE; +}) + +(define_expand "store_multiple" + [(match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")] + "TARGET_V32" +{ + rtx indreg; + + /* See load_multiple. */ + if (!MEM_P (operands[0])) + FAIL; + + indreg = XEXP (operands[0], 0); + + if (GET_CODE (indreg) == POST_INC) + indreg = XEXP (indreg, 0); + if (!REG_P (indreg) + || GET_CODE (operands[2]) != CONST_INT + || !REG_P (operands[1]) + || REGNO (operands[1]) != 0 + || INTVAL (operands[2]) > CRIS_SP_REGNUM + || (int) REGNO (indreg) < INTVAL (operands[2])) + FAIL; + gcc_unreachable (); + cris_emit_movem_store (operands[0], operands[2], 0, false); + DONE; +}) + +(define_insn "*cris_load_multiple" + [(match_parallel 0 "cris_load_multiple_op" + [(set (match_operand:SI 1 "register_operand" "=r,r") + (match_operand:SI 2 "memory_operand" "Q,m"))])] + "" + "movem %O0,%o0" + [(set_attr "cc" "none") + (set_attr "slottable" "yes,no") + ;; Not true, but setting the length to 0 causes return sequences (ret + ;; movem) to have the cost they had when (return) included the movem + ;; and reduces the performance penalty taken for needing to emit an + ;; epilogue (in turn copied by bb-reorder) instead of return patterns. + ;; FIXME: temporary change until all insn lengths are correctly + ;; described. FIXME: have better target control over bb-reorder. + (set_attr "length" "0")]) + +(define_insn "*cris_store_multiple" + [(match_parallel 0 "cris_store_multiple_op" + [(set (match_operand:SI 2 "memory_operand" "=Q,m") + (match_operand:SI 1 "register_operand" "r,r"))])] + "" + "movem %o0,%O0" + [(set_attr "cc" "none") + (set_attr "slottable" "yes,no")]) + + +;; Sign- and zero-extend insns with standard names. +;; Those for integer source operand are ordered with the widest source +;; type first. + +;; Sign-extend. + +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:SI 1 "general_operand" "g")))] + "" + "move.d %1,%M0\;smi %H0\;neg.d %H0,%H0") + +(define_insn "extenddi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:BW 1 "general_operand" "g")))] + "" + "movs %1,%M0\;smi %H0\;neg.d %H0,%H0") + +(define_insn "extendsi2" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (sign_extend:SI (match_operand:BW 1 "general_operand" "r,Q>,g")))] + "" + "movs %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +;; To do a byte->word extension, extend to dword, except that the top half +;; of the register will be clobbered. FIXME: Perhaps this is not needed. + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (sign_extend:HI (match_operand:QI 1 "general_operand" "r,Q>,g")))] + "" + "movs.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + + +;; Zero-extend. The DImode ones are synthesized by gcc, so we don't +;; specify them here. + +(define_insn "zero_extendsi2" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (zero_extend:SI + (match_operand:BW 1 "nonimmediate_operand" "r,Q>,m")))] + "" + "movu %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +;; Same comment as sign-extend QImode to HImode above applies. + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (zero_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "r,Q>,m")))] + "" + "movu.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +;; All kinds of arithmetic and logical instructions. +;; +;; First, anonymous patterns to match addressing modes with +;; side-effects. +;; +;; op.S [rx=ry+I],rz; (add, sub, or, and, bound). +;; +;; [rx=ry+rz.S] + +(define_insn "*op_side_biap" + [(set (match_operand:BWD 0 "register_operand" "=r,r") + (match_operator:BWD + 6 "cris_orthogonal_operator" + [(match_operand:BWD 1 "register_operand" "0,0") + (mem:BWD (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6 [%5=%4+%2%T3],%0") + +;; [rx=ry+i] ([%4=%2+%3]) + +(define_insn "*op_side" + [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r") + (match_operator:BWD + 5 "cris_orthogonal_operator" + [(match_operand:BWD 1 "register_operand" "0,0,0,0,0") + (mem:BWD (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || satisfies_constraint_N (operands[3]) + || satisfies_constraint_J (operands[3]))) + return "#"; + if (which_alternative == 4) + return "%x5.%s0 [%4=%3%S2],%0"; + return "%x5 [%4=%2%S3],%0"; +}) + +;; To match all cases for commutative operations we may have to have the +;; following pattern for add, or & and. I do not know really, but it does +;; not break anything. +;; +;; FIXME: This really ought to be checked. +;; +;; op.S [rx=ry+I],rz; +;; +;; [rx=ry+rz.S] + +(define_insn "*op_swap_side_biap" + [(set (match_operand:BWD 0 "register_operand" "=r,r") + (match_operator:BWD + 6 "cris_commutative_orth_op" + [(mem:BWD (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r"))) + (match_operand:BWD 1 "register_operand" "0,0")])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6 [%5=%4+%2%T3],%0") + +;; [rx=ry+i] ([%4=%2+%3]) +;; FIXME: These could have anonymous mode for operand 0. + +;; QImode + +(define_insn "*op_swap_side" + [(set (match_operand:BWD 0 "register_operand" "=r,r,r,r,r") + (match_operator:BWD + 5 "cris_commutative_orth_op" + [(mem:BWD + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r"))) + (match_operand:BWD 1 "register_operand" "0,0,0,0,0")])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || satisfies_constraint_N (operands[3]) + || satisfies_constraint_J (operands[3]))) + return "#"; + if (which_alternative == 4) + return "%x5 [%4=%3%S2],%0"; + return "%x5 [%4=%2%S3],%0"; +}) + +;; Add operations, standard names. + +;; Note that for the 'P' constraint, the high part can be -1 or 0. We +;; output the insn through the 'A' output modifier as "adds.w" and "addq", +;; respectively. +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "general_operand")))] + "" +{ + if (MEM_P (operands[2]) && TARGET_V32) + operands[2] = force_reg (DImode, operands[2]); +}) + +(define_insn "*adddi3_non_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r") + (plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,r") + (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))] + "!TARGET_V32" + "@ + addq %2,%M0\;ax\;addq 0,%H0 + subq %n2,%M0\;ax\;subq 0,%H0 + add%e2.%z2 %2,%M0\;ax\;%A2 %H2,%H0 + add.d %M2,%M0\;ax\;add.d %H2,%H0 + add.d %M2,%M1,%M0\;ax\;add.d %H2,%H1,%H0") + +; It seems no use allowing a memory operand for this one, because we'd +; need a scratch register for incrementing the address. +(define_insn "*adddi3_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") + (plus:DI (match_operand:DI 1 "register_operand" "%0,0,0,0,0") + (match_operand:DI 2 "nonmemory_operand" "J,N,P,r,n")))] + "TARGET_V32" + "@ + addq %2,%M0\;addc 0,%H0 + subq %n2,%M0\;ax\;subq 0,%H0 + add%e2.%z2 %2,%M0\;addc %H2,%H0 + add.d %M2,%M0\;addc %H2,%H0 + add.d %M2,%M0\;addc %H2,%H0") + +(define_expand "add3" + [(set (match_operand:BWD 0 "register_operand") + (plus:BWD + (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "general_operand")))] + "" + "") + +(define_insn "*addsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r, r,r,r,r, r,r, r") + (plus:SI + (match_operand:SI 1 "register_operand" "%0,0, 0,0,0,0, 0,r, r") + (match_operand:SI 2 "general_operand" "r,Q>,J,N,n,!S,g,!To,0")))] + +;; The last constraint is due to that after reload, the '%' is not +;; honored, and canonicalization doesn't care about keeping the same +;; register as in destination. This will happen after insn splitting. +;; gcc <= 2.7.2. FIXME: Check for gcc-2.9x + + "!TARGET_V32" +{ + switch (which_alternative) + { + case 0: + case 1: + return "add.d %2,%0"; + case 2: + return "addq %2,%0"; + case 3: + return "subq %n2,%0"; + case 4: + /* 'Known value', but not in -63..63. + Check if addu/subu may be used. */ + if (INTVAL (operands[2]) > 0) + { + if (INTVAL (operands[2]) < 256) + return "addu.b %2,%0"; + if (INTVAL (operands[2]) < 65536) + return "addu.w %2,%0"; + } + else + { + if (INTVAL (operands[2]) >= -255) + return "subu.b %n2,%0"; + if (INTVAL (operands[2]) >= -65535) + return "subu.w %n2,%0"; + } + return "add.d %2,%0"; + case 5: + { + rtx tem = operands[2]; + gcc_assert (GET_CODE (tem) == CONST); + tem = XEXP (tem, 0); + if (GET_CODE (tem) == PLUS + && GET_CODE (XEXP (tem, 0)) == UNSPEC + /* We don't allow CRIS_UNSPEC_PCREL here; we can't have a + pc-relative operand in an add insn. */ + && XINT (XEXP (tem, 0), 1) == CRIS_UNSPEC_GOTREL + && CONST_INT_P (XEXP (tem, 1))) + tem = XEXP (tem, 0); + gcc_assert (GET_CODE (tem) == UNSPEC); + switch (XINT (tem, 1)) + { + case CRIS_UNSPEC_GOTREAD: + case CRIS_UNSPEC_PLTGOTREAD: + /* Using sign-extend mostly to be consistent with the + indexed addressing mode. */ + if (flag_pic == 1) + return "adds.w %2,%0"; + return "add.d %2,%0"; + + case CRIS_UNSPEC_PLT_GOTREL: + case CRIS_UNSPEC_GOTREL: + return "add.d %2,%0"; + default: + gcc_unreachable (); + } + } + case 6: + return "add%u2 %2,%0"; + case 7: + return "add.d %2,%1,%0"; + case 8: + return "add.d %1,%0"; + default: + return "BOGUS addsi %2+%1 to %0"; + } +} + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no,yes")]) + +; FIXME: Check what's best: having the three-operand ACR alternative +; before or after the corresponding-operand2 alternative. Check for +; *all* insns. FIXME: constant constraint letter for -128..127. +(define_insn "*addsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,!a,r,!a, r,r,!a,r,!a,r,r,r,!a") + (plus:SI + (match_operand:SI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r, 0,0,0,r") + (match_operand:SI 2 "general_operand" "r, r, Q>,Q>,J,N,NJ,L,L, P,n,g,g")))] + "TARGET_V32" + "@ + add.d %2,%0 + addi %2.b,%1,%0 + add.d %2,%0 + addo.d %2,%1,%0 + addq %2,%0 + subq %n2,%0 + addoq %2,%1,%0 + adds.w %2,%0 + addo %2,%1,%0 + addu.w %2,%0 + add.d %2,%0 + add%u2 %2,%0 + addo.%Z2 %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no,no,no,no,no") + (set_attr "cc" "*,none,*,none,*,*,none,*,none,*,*,*,none")]) + +(define_insn "*addhi3_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r, r,r,r,r") + (plus:HI (match_operand:HI 1 "register_operand" "%0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "r,Q>,J,N,g,!To")))] + "!TARGET_V32" + "@ + add.w %2,%0 + add.w %2,%0 + addq %2,%0 + subq %n2,%0 + add.w %2,%0 + add.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")]) + +(define_insn "*addhi3_v32" + [(set (match_operand:HI 0 "register_operand" "=r, !a,r,!a, r,r,!a,r,!a") + (plus:HI + (match_operand:HI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,r") + (match_operand:HI 2 "general_operand" "r, r, Q>,Q>,J,N,NJ,g,g")))] + "TARGET_V32" + "@ + add.w %2,%0 + addi %2.b,%1,%0 + add.w %2,%0 + addo.w %2,%1,%0 + addq %2,%0 + subq %n2,%0 + addoq %2,%1,%0 + add.w %2,%0 + addo.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,no,no") + (set_attr "cc" "*,none,*,none,clobber,clobber,none,*,none")]) + +(define_insn "*addqi3_non_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r, r,r,r,r,r") + (plus:QI (match_operand:QI 1 "register_operand" "%0,0, 0,0,0,0,r") + (match_operand:QI 2 "general_operand" "r,Q>,J,N,O,g,!To")))] + "!TARGET_V32" + "@ + add.b %2,%0 + add.b %2,%0 + addq %2,%0 + subq %n2,%0 + subQ -%b2,%0 + add.b %2,%0 + add.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,clobber,clobber,normal,normal")]) + +(define_insn "*addqi3_v32" + [(set (match_operand:QI 0 "register_operand" "=r,!a,r,!a, r,r,!a,r,r,!a") + (plus:QI + (match_operand:QI 1 "register_operand" "%0,r, 0, r, 0,0,r, 0,0,r") + (match_operand:QI 2 "general_operand" "r,r, Q>,Q>,J,N,NJ,O,g,g")))] + "TARGET_V32" + "@ + add.b %2,%0 + addi %2.b,%1,%0 + add.b %2,%0 + addo.b %2,%1,%0 + addq %2,%0 + subq %n2,%0 + addoq %2,%1,%0 + subQ -%b2,%0 + add.b %2,%0 + addo.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,yes,yes,no,no") + (set_attr "cc" "*,none,*,none,clobber,clobber,none,clobber,*,none")]) + +;; Subtract. +;; +;; Note that because of insn canonicalization these will *seldom* but +;; rarely be used with a known constant as an operand. + +;; Note that for the 'P' constraint, the high part can be -1 or 0. We +;; output the insn through the 'D' output modifier as "subs.w" and "subq", +;; respectively. +(define_expand "subdi3" + [(set (match_operand:DI 0 "register_operand") + (minus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "general_operand")))] + "" +{ + if (TARGET_V32 && MEM_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); +}) + +(define_insn "*subdi3_non_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r,&r") + (minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0,r") + (match_operand:DI 2 "general_operand" "J,N,P,g,!To")))] + "!TARGET_V32" + "@ + subq %2,%M0\;ax\;subq 0,%H0 + addq %n2,%M0\;ax\;addq 0,%H0 + sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0 + sub.d %M2,%M0\;ax\;sub.d %H2,%H0 + sub.d %M2,%M1,%M0\;ax\;sub.d %H2,%H1,%H0") + +(define_insn "*subdi3_v32" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,&r") + (minus:DI (match_operand:DI 1 "register_operand" "0,0,0,0") + (match_operand:DI 2 "nonmemory_operand" "J,N,P,r")))] + "TARGET_V32" + "@ + subq %2,%M0\;ax\;subq 0,%H0 + addq %n2,%M0\;ax\;addq 0,%H0 + sub%e2.%z2 %2,%M0\;ax\;%D2 %H2,%H0 + sub.d %M2,%M0\;ax\;sub.d %H2,%H0") + +(define_expand "sub3" + [(set (match_operand:BWD 0 "register_operand") + (minus:BWD + (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "general_operand")))] + "" + "") + +(define_insn "*subsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r, r,r,r,r,r,r") + (minus:SI + (match_operand:SI 1 "register_operand" "0,0, 0,0,0,0,0,r") + (match_operand:SI 2 "general_operand" "r,Q>,J,N,P,n,g,!To")))] + "!TARGET_V32" + +;; This does not do the optimal: "addu.w 65535,r0" when %2 is negative. +;; But then again, %2 should not be negative. + + "@ + sub.d %2,%0 + sub.d %2,%0 + subq %2,%0 + addq %n2,%0 + sub%e2.%z2 %2,%0 + sub.d %2,%0 + sub.d %2,%0 + sub.d %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no,no")]) + +(define_insn "*subsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r") + (minus:SI + (match_operand:SI 1 "register_operand" "0,0,0,0,0,0,0") + (match_operand:SI 2 "general_operand" "r,Q>,J,N,P,n,g")))] + "TARGET_V32" + "@ + sub.d %2,%0 + sub.d %2,%0 + subq %2,%0 + addq %n2,%0 + sub%e2.%z2 %2,%0 + sub.d %2,%0 + sub.d %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no,no")]) + +(define_insn "*sub3_nonv32" + [(set (match_operand:BW 0 "register_operand" "=r,r, r,r,r,r") + (minus:BW (match_operand:BW 1 "register_operand" "0,0, 0,0,0,r") + (match_operand:BW 2 "general_operand" "r,Q>,J,N,g,!To")))] + "!TARGET_V32" + "@ + sub %2,%0 + sub %2,%0 + subq %2,%0 + addq %n2,%0 + sub %2,%0 + sub %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "normal,normal,clobber,clobber,normal,normal")]) + +(define_insn "*sub3_v32" + [(set (match_operand:BW 0 "register_operand" "=r,r,r,r,r") + (minus:BW (match_operand:BW 1 "register_operand" "0,0,0,0,0") + (match_operand:BW 2 "general_operand" "r,Q>,J,N,g")))] + "TARGET_V32" + "@ + sub %2,%0 + sub %2,%0 + subq %2,%0 + addq %n2,%0 + sub %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no") + (set_attr "cc" "normal,normal,clobber,clobber,normal")]) + +;; CRIS has some add/sub-with-sign/zero-extend instructions. +;; Although these perform sign/zero-extension to SImode, they are +;; equally applicable for the HImode case. +;; FIXME: Check; GCC should handle the widening. +;; Note that these must be located after the normal add/sub patterns, +;; so not to get constants into any less specific operands. +;; +;; Extend with add/sub and side-effect. +;; +;; ADDS/SUBS/ADDU/SUBU and BOUND, which needs a check for zero_extend +;; +;; adds/subs/addu/subu bound [rx=ry+rz.S] + +;; QImode to HImode +;; FIXME: GCC should widen. + +(define_insn "*extopqihi_side_biap" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operator:HI + 6 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0") + (match_operator:HI + 7 "cris_extend_operator" + [(mem:QI (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))])])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6%e7.%m7 [%5=%4+%2%T3],%0") + +(define_insn "*extopsi_side_biap" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 6 "cris_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operator:SI + 7 "cris_extend_operator" + [(mem:BW (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))])])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x6%e7 [%5=%4+%2%T3],%0") + + +;; [rx=ry+i] + +;; QImode to HImode + +(define_insn "*extopqihi_side" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (match_operator:HI + 5 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0,0,0,0") + (match_operator:HI + 6 "cris_extend_operator" + [(mem:QI + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r") + ))])])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || satisfies_constraint_N (operands[3]) + || satisfies_constraint_J (operands[3]))) + return "#"; + if (which_alternative == 4) + return "%x5%E6.%m6 [%4=%3%S2],%0"; + return "%x5%E6.%m6 [%4=%2%S3],%0"; +}) + +(define_insn "*extopsi_side" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (match_operator:SI + 5 "cris_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0,0,0,0") + (match_operator:SI + 6 "cris_extend_operator" + [(mem:BW + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r") + ))])])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || satisfies_constraint_N (operands[3]) + || satisfies_constraint_J (operands[3]))) + return "#"; + if (which_alternative == 4) + return "%x5%E6 [%4=%3%S2],%0"; + return "%x5%E6 [%4=%2%S3],%0"; +}) + + +;; As with op.S we may have to add special pattern to match commuted +;; operands to adds/addu and bound +;; +;; adds/addu/bound [rx=ry+rz.S] + +;; QImode to HImode +;; FIXME: GCC should widen. + +(define_insn "*extopqihi_swap_side_biap" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (plus:HI + (match_operator:HI + 6 "cris_extend_operator" + [(mem:QI (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))]) + (match_operand:HI 1 "register_operand" "0,0"))) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + add%e6.b [%5=%4+%2%T3],%0") + +(define_insn "*extopsi_swap_side_biap" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 7 "cris_plus_or_bound_operator" + [(match_operator:SI + 6 "cris_extend_operator" + [(mem:BW (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 4 "register_operand" "r,r")))]) + (match_operand:SI 1 "register_operand" "0,0")])) + (set (match_operand:SI 5 "register_operand" "=*4,r") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))] + "(GET_CODE (operands[7]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)" + "@ + # + %x7%E6 [%5=%4+%2%T3],%0") + +;; [rx=ry+i] +;; FIXME: GCC should widen. + +;; QImode to HImode + +(define_insn "*extopqihi_swap_side" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r") + (plus:HI + (match_operator:HI + 5 "cris_extend_operator" + [(mem:QI (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]) + (match_operand:HI 1 "register_operand" "0,0,0,0,0"))) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || satisfies_constraint_N (operands[3]) + || satisfies_constraint_J (operands[3]))) + return "#"; + if (which_alternative == 4) + return "add%e5.b [%4=%3%S2],%0"; + return "add%e5.b [%4=%2%S3],%0"; +}) + +(define_insn "*extopsi_swap_side" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (match_operator:SI + 6 "cris_plus_or_bound_operator" + [(match_operator:SI + 5 "cris_extend_operator" + [(mem:BW (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "%r,r,r,R,R") + (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn,r,r")))]) + (match_operand:SI 1 "register_operand" "0,0,0,0,0")])) + (set (match_operand:SI 4 "register_operand" "=*2,r,r,*3,r") + (plus:SI (match_dup 2) + (match_dup 3)))] + "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[5]) == ZERO_EXTEND) + && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)" +{ + if ((which_alternative == 0 || which_alternative == 3) + && (!CONST_INT_P (operands[3]) + || INTVAL (operands[3]) > 127 + || INTVAL (operands[3]) < -128 + || satisfies_constraint_N (operands[3]) + || satisfies_constraint_J (operands[3]))) + return "#"; + if (which_alternative == 4) + return \"%x6%E5.%m5 [%4=%3%S2],%0\"; + return "%x6%E5 [%4=%2%S3],%0"; +}) + +;; Extend versions (zero/sign) of normal add/sub (no side-effects). + +;; QImode to HImode +;; FIXME: GCC should widen. + +(define_insn "*extopqihi_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") + (match_operator:HI + 3 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0,0,r") + (match_operator:HI + 4 "cris_extend_operator" + [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])]))] + "!TARGET_V32 && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)" + "@ + %x3%E4.%m4 %2,%0 + %x3%E4.%m4 %2,%0 + %x3%E4.%m4 %2,%0 + %x3%E4.%m4 %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no") + (set_attr "cc" "clobber")]) + +(define_insn "*extopqihi_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (match_operator:HI + 3 "cris_additive_operand_extend_operator" + [(match_operand:HI 1 "register_operand" "0,0") + (match_operator:HI + 4 "cris_extend_operator" + [(match_operand:QI 2 "nonimmediate_operand" "r,m")])]))] + "TARGET_V32" + "%x3%e4.%m4 %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +;; QImode to SImode + +(define_insn "*extopsi_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (match_operator:SI + 3 "cris_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0,0,r") + (match_operator:SI + 4 "cris_extend_operator" + [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")])]))] + "!TARGET_V32 + && (GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)" + "@ + %x3%E4 %2,%0 + %x3%E4 %2,%0 + %x3%E4 %2,%0 + %x3%E4 %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no")]) + +(define_insn "*extopsi_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operator:SI + 3 "cris_additive_operand_extend_operator" + [(match_operand:SI 1 "register_operand" "0,0") + (match_operator:SI + 4 "cris_extend_operator" + [(match_operand:BW 2 "nonimmediate_operand" "r,m")])]))] + "TARGET_V32" + "%x3%e4.%m4 %2,%0" + [(set_attr "slottable" "yes")]) + +;; As with the side-effect patterns, may have to have swapped operands for add. +;; For commutative operands, these are the canonical forms. + +;; QImode to HImode + +(define_insn "*addxqihi_swap_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") + (plus:HI + (match_operator:HI + 3 "cris_extend_operator" + [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")]) + (match_operand:HI 1 "register_operand" "0,0,0,r")))] + "!TARGET_V32 && operands[1] != frame_pointer_rtx" + "@ + add%e3.b %2,%0 + add%e3.b %2,%0 + add%e3.b %2,%0 + add%e3.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no") + (set_attr "cc" "clobber")]) + +;; A case for v32, to catch the "addo" insn in addition to "adds". We +;; only care to match the canonical form; there should be no other. + +(define_insn "*addsbw_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,!a") + (plus:HI + (sign_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "r,m,m")) + (match_operand:HI 1 "register_operand" "0,0,r")))] + "TARGET_V32" + "@ + adds.b %2,%0 + adds.b %2,%0 + addo.b %2,%1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber,clobber,none")]) + +(define_insn "*addubw_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (plus:HI + (zero_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "r,m")) + (match_operand:HI 1 "register_operand" "0,0")))] + "TARGET_V32" + "addu.b %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +(define_insn "*extopsi_swap_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (match_operator:SI + 4 "cris_plus_or_bound_operator" + [(match_operator:SI + 3 "cris_extend_operator" + [(match_operand:BW 2 "nonimmediate_operand" "r,Q>,m,!To")]) + (match_operand:SI 1 "register_operand" "0,0,0,r")]))] + "!TARGET_V32 + && (GET_CODE (operands[4]) != UMIN || GET_CODE (operands[3]) == ZERO_EXTEND) + && operands[1] != frame_pointer_rtx" + "@ + %x4%E3 %2,%0 + %x4%E3 %2,%0 + %x4%E3 %2,%0 + %x4%E3 %2,%1,%0" + [(set_attr "slottable" "yes,yes,no,no")]) + +(define_insn "*adds_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,!a") + (plus:SI + (sign_extend:SI + (match_operand:BW 2 "nonimmediate_operand" "r,m,m")) + (match_operand:SI 1 "register_operand" "0,0,r")))] + "TARGET_V32" + "@ + adds %2,%0 + adds %2,%0 + addo %2,%1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "*,*,none")]) + +(define_insn "*addu_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI + (zero_extend:SI + (match_operand:BW 2 "nonimmediate_operand" "r,m")) + (match_operand:SI 1 "register_operand" "0,0")))] + "TARGET_V32 && operands[1] != frame_pointer_rtx" + "addu %2,%0" + [(set_attr "slottable" "yes")]) + +(define_insn "*bound_v32" + [(set (match_operand:SI 0 "register_operand" "=r") + (umin:SI + (zero_extend:SI + (match_operand:BW 2 "register_operand" "r")) + (match_operand:SI 1 "register_operand" "0")))] + "TARGET_V32 && operands[1] != frame_pointer_rtx" + "bound %2,%0" + [(set_attr "slottable" "yes")]) + +;; This is the special case when we use what corresponds to the +;; instruction above in "casesi". Do *not* change it to use the generic +;; pattern and "REG 15" as pc; I did that and it led to madness and +;; maintenance problems: Instead of (as imagined) recognizing and removing +;; or replacing this pattern with something simpler, other variant +;; patterns were recognized or combined, including some prefix variants +;; where the value in pc is not that of the next instruction (which means +;; this instruction actually *is* special and *should* be marked as such). +;; When switching from the "generic pattern match" approach to this simpler +;; approach, there were insignificant differences in gcc, ipps and +;; product code, somehow due to scratching reload behind the ear or +;; something. Testcase "gcc" looked .01% slower and 4 bytes bigger; +;; product code became .001% smaller but "looked better". The testcase +;; "ipps" was just different at register allocation). +;; +;; Assumptions in the jump optimizer forces us to use IF_THEN_ELSE in this +;; pattern with the default-label as the else, with the "if" being +;; index-is-less-than the max number of cases plus one. The default-label +;; is attached to the end of the case-table at time of output. + +(define_insn "*casesi_adds_w" + [(set (pc) + (if_then_else + (ltu (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "n")) + (plus:SI (sign_extend:SI + (mem:HI + (plus:SI (mult:SI (match_dup 0) (const_int 2)) + (pc)))) + (pc)) + (label_ref (match_operand 2 "" "")))) + (use (label_ref (match_operand 3 "" "")))] + "!TARGET_V32 && operands[0] != frame_pointer_rtx" + "adds.w [$pc+%0.w],$pc" + [(set_attr "cc" "clobber")]) + +;; For V32, we just have a jump, but we need to mark the table as used, +;; and the jump insn must have the if_then_else form expected by core +;; GCC. Since we don't want to prolong the lifetime of the original +;; index value, we compare against "unspec 0". It's a pity we have to +;; jump through to get the default label in place and to keep the jump +;; table around. FIXME: Look into it some time. + +(define_insn "*casesi_jump_v32" + [(set (pc) + (if_then_else + (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI) + (match_operand:SI 0 "const_int_operand" "n")) + (match_operand:SI 1 "register_operand" "r") + (label_ref (match_operand 2 "" "")))) + (use (label_ref (match_operand 3 "" "")))] + "TARGET_V32" + "jump %1%#" + [(set_attr "cc" "clobber") + (set_attr "slottable" "has_slot")]) + +;; Multiply instructions. + +;; Sometimes powers of 2 (which are normally canonicalized to a +;; left-shift) appear here, as a result of address reloading. +;; As a special, for values 3 and 5, we can match with an addi, so add those. +;; +;; FIXME: This may be unnecessary now. +;; Explicitly named for convenience of having a gen_... function. + +(define_insn "addi_mul" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI + (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "const_int_operand" "n")))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 2 + || INTVAL (operands[2]) == 4 || INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5)" +{ + if (INTVAL (operands[2]) == 2) + return "lslq 1,%0"; + else if (INTVAL (operands[2]) == 4) + return "lslq 2,%0"; + else if (INTVAL (operands[2]) == 3) + return "addi %0.w,%0"; + else if (INTVAL (operands[2]) == 5) + return "addi %0.d,%0"; + return "BAD: adr_mulsi: %0=%1*%2"; +} +[(set_attr "slottable" "yes") + ;; No flags are changed if this insn is "addi", but it does not seem + ;; worth the trouble to distinguish that to the lslq cases. + (set_attr "cc" "clobber")]) + +;; The addi insn as it is normally used. + +;; Make the ACR alternative taste bad enough to not choose it as a +;; preference to avoid spilling problems (unwind-dw2-fde.c at build). +;; FIXME: Revisit for new register allocator. + +(define_insn "*addi" + [(set (match_operand:SI 0 "register_operand" "=r,!a") + (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "r,r") + (match_operand:SI 3 "const_int_operand" "n,n")) + (match_operand:SI 1 "register_operand" "0,r")))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && CONST_INT_P (operands[3]) + && (INTVAL (operands[3]) == 1 + || INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)" + "@ + addi %2%T3,%0 + addi %2%T3,%1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +;; The mstep instruction. Probably not useful by itself; it's to +;; non-linear wrt. the other insns. We used to expand to it, so at least +;; it's correct. + +(define_insn "mstep_shift" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (lt:SI (cc0) (const_int 0)) + (plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (const_int 1)) + (match_operand:SI 2 "register_operand" "r")) + (ashift:SI (match_operand:SI 3 "register_operand" "0") + (const_int 1))))] + "!TARGET_V32" + "mstep %2,%0" + [(set_attr "slottable" "yes")]) + +;; When illegitimate addresses are legitimized, sometimes gcc forgets +;; to canonicalize the multiplications. +;; +;; FIXME: Check gcc > 2.7.2, remove and possibly fix in gcc. + +(define_insn "mstep_mul" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (lt:SI (cc0) (const_int 0)) + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "0") + (const_int 2)) + (match_operand:SI 2 "register_operand" "r")) + (mult:SI (match_operand:SI 3 "register_operand" "0") + (const_int 2))))] + "!TARGET_V32 + && operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[2] != frame_pointer_rtx + && operands[3] != frame_pointer_rtx" + "mstep %2,%0" + [(set_attr "slottable" "yes")]) + +(define_insn "mul3" + [(set (match_operand:WD 0 "register_operand" "=r") + (mult:WD + (szext:WD (match_operand: 1 "register_operand" "%0")) + (szext:WD (match_operand: 2 "register_operand" "r")))) + (clobber (match_scratch:SI 3 "=h"))] + "TARGET_HAS_MUL_INSNS" + "%!mul %2,%0" + [(set (attr "slottable") + (if_then_else (match_test "TARGET_MUL_BUG") + (const_string "no") + (const_string "yes"))) + ;; For umuls.[bwd] it's just N unusable here, but let's be safe. + ;; For muls.b, this really extends to SImode, so cc should be + ;; considered clobbered. + ;; For muls.w, it's just N unusable here, but let's be safe. + (set_attr "cc" "clobber")]) + +;; Note that gcc does not make use of such a thing as umulqisi3. It gets +;; confused and will erroneously use it instead of umulhisi3, failing (at +;; least) gcc.c-torture/execute/arith-rand.c at all optimization levels. +;; Inspection of optab code shows that there must be only one widening +;; multiplication per mode widened to. + +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (match_scratch:SI 3 "=h"))] + "TARGET_HAS_MUL_INSNS" + "%!muls.d %2,%0" + [(set (attr "slottable") + (if_then_else (match_test "TARGET_MUL_BUG") + (const_string "no") + (const_string "yes"))) + ;; Just N unusable here, but let's be safe. + (set_attr "cc" "clobber")]) + +;; A few multiply variations. + +;; When needed, we can get the high 32 bits from the overflow +;; register. We don't care to split and optimize these. +;; +;; Note that cc0 is still valid after the move-from-overflow-register +;; insn; no special precaution need to be taken in cris_notice_update_cc. + +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI + (szext:DI (match_operand:SI 1 "register_operand" "%0")) + (szext:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (match_scratch:SI 3 "=h"))] + "TARGET_HAS_MUL_INSNS" + "%!mul.d %2,%M0\;move $mof,%H0") + +;; These two patterns may be expressible by other means, perhaps by making +;; [u]?mulsidi3 a define_expand. + +;; Due to register allocation braindamage, the clobber 1,2 alternatives +;; cause a move into the clobbered register *before* the insn, then +;; after the insn, mof is moved too, rather than the clobber assigned +;; the last mof target. This became apparent when making MOF and SRP +;; visible registers, with the necessary tweak to smulsi3_highpart. +;; Because these patterns are used in division by constants, that damage +;; is visible (ipps regression tests). Therefore the last two +;; alternatives, "helping" reload to avoid an unnecessary move, but +;; punished by force of one "?". Check code from "int d (int a) {return +;; a / 1000;}" and unsigned. FIXME: Comment above was for 3.2, revisit. + +(define_insn "mulsi3_highpart" + [(set (match_operand:SI 0 "register_operand" "=h,h,?r,?r") + (truncate:SI + (lshiftrt:DI + (mult:DI + (szext:DI (match_operand:SI 1 "register_operand" "r,r,0,r")) + (szext:DI (match_operand:SI 2 "register_operand" "r,r,r,0"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1,2,h,h"))] + "TARGET_HAS_MUL_INSNS" + "@ + %!mul.d %2,%1 + %!mul.d %1,%2 + %!mul.d %2,%1\;move $mof,%0 + %!mul.d %1,%2\;move $mof,%0" + [(set_attr "slottable" "yes,yes,no,no") + (set_attr "cc" "clobber")]) + +;; Divide and modulus instructions. CRIS only has a step instruction. + +(define_insn "dstep_shift" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (geu:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (const_int 1)) + (match_operand:SI 2 "register_operand" "r")) + (minus:SI (ashift:SI (match_operand:SI 3 "register_operand" "0") + (const_int 1)) + (match_operand:SI 4 "register_operand" "2")) + (ashift:SI (match_operand:SI 5 "register_operand" "0") + (const_int 1))))] + "" + "dstep %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Here's a variant with mult instead of ashift. +;; +;; FIXME: This should be investigated. Which one matches through combination? + +(define_insn "dstep_mul" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI + (geu:SI (mult:SI (match_operand:SI 1 "register_operand" "0") + (const_int 2)) + (match_operand:SI 2 "register_operand" "r")) + (minus:SI (mult:SI (match_operand:SI 3 "register_operand" "0") + (const_int 2)) + (match_operand:SI 4 "register_operand" "2")) + (mult:SI (match_operand:SI 5 "register_operand" "0") + (const_int 2))))] + "operands[0] != frame_pointer_rtx + && operands[1] != frame_pointer_rtx + && operands[2] != frame_pointer_rtx + && operands[3] != frame_pointer_rtx" + "dstep %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Logical operators. + +;; Bitwise "and". + +;; There is no use in defining "anddi3", because gcc can expand this by +;; itself, and make reasonable code without interference. + +;; If the first operand is memory or a register and is the same as the +;; second operand, and the third operand is -256 or -65536, we can use +;; CLEAR instead. Or, if the first operand is a register, and the third +;; operand is 255 or 65535, we can zero_extend. +;; GCC isn't smart enough to recognize these cases (yet), and they seem +;; to be common enough to be worthwhile. +;; FIXME: This should be made obsolete. + +(define_expand "andsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (! (CONST_INT_P (operands[2]) + && (((INTVAL (operands[2]) == -256 + || INTVAL (operands[2]) == -65536) + && rtx_equal_p (operands[1], operands[0])) + || ((INTVAL (operands[2]) == 255 + || INTVAL (operands[2]) == 65535) + && REG_P (operands[0]))))) + { + /* Make intermediate steps if operand0 is not a register or + operand1 is not a register, and hope that the reload pass will + make something useful out of it. Note that the operands are + *not* canonicalized. For the moment, I chicken out on this, + because all or most ports do not describe 'and' with + canonicalized operands, and I seem to remember magic in reload, + checking that operand1 has constraint '%0', in which case + operand0 and operand1 must have similar predicates. + FIXME: Investigate. */ + rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + rtx reg1 = operands[1]; + + if (! REG_P (reg1)) + { + emit_move_insn (reg0, reg1); + reg1 = reg0; + } + + emit_insn (gen_rtx_SET (SImode, reg0, + gen_rtx_AND (SImode, reg1, operands[2]))); + + /* Make sure we get the right *final* destination. */ + if (! REG_P (operands[0])) + emit_move_insn (operands[0], reg0); + + DONE; + } +}) + +;; Some special cases of andsi3. + +(define_insn "*andsi_movu" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%r,Q,To") + (match_operand:SI 2 "const_int_operand" "n,n,n")))] + "(INTVAL (operands[2]) == 255 || INTVAL (operands[2]) == 65535) + && !side_effects_p (operands[1])" + "movu.%z2 %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*andsi_clear" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,Q,Q,To,To") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,0,0,0,0") + (match_operand:SI 2 "const_int_operand" "P,n,P,n,P,n")))] + "(INTVAL (operands[2]) == -65536 || INTVAL (operands[2]) == -256) + && !side_effects_p (operands[0])" + "@ + cLear.b %0 + cLear.w %0 + cLear.b %0 + cLear.w %0 + cLear.b %0 + cLear.w %0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "none")]) + +;; This is a catch-all pattern, taking care of everything that was not +;; matched in the insns above. +;; +;; Sidenote: the tightening from "nonimmediate_operand" to +;; "register_operand" for operand 1 actually increased the register +;; pressure (worse code). That will hopefully change with an +;; improved reload pass. + +(define_insn "*expanded_andsi_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r,r") + (and:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,r") + (match_operand:SI 2 "general_operand" "I,r,Q>,g,!To")))] + "!TARGET_V32" + "@ + andq %2,%0 + and.d %2,%0 + and.d %2,%0 + and.d %2,%0 + and.d %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,no")]) + +(define_insn "*expanded_andsi_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0") + (match_operand:SI 2 "general_operand" "I,r,Q>,g")))] + "TARGET_V32" + "@ + andq %2,%0 + and.d %2,%0 + and.d %2,%0 + and.d %2,%0" + [(set_attr "slottable" "yes,yes,yes,no") + (set_attr "cc" "noov32")]) + +;; For both QI and HI we may use the quick patterns. This results in +;; useless condition codes, but that is used rarely enough for it to +;; normally be a win (could check ahead for use of cc0, but seems to be +;; more pain than win). + +;; FIXME: See note for andsi3 + +(define_expand "andhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "" +{ + if (! (CONST_INT_P (operands[2]) + && (((INTVAL (operands[2]) == -256 + || INTVAL (operands[2]) == 65280) + && rtx_equal_p (operands[1], operands[0])) + || (INTVAL (operands[2]) == 255 + && REG_P (operands[0]))))) + { + /* See comment for andsi3. */ + rtx reg0 = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (HImode); + rtx reg1 = operands[1]; + + if (! REG_P (reg1)) + { + emit_move_insn (reg0, reg1); + reg1 = reg0; + } + + emit_insn (gen_rtx_SET (HImode, reg0, + gen_rtx_AND (HImode, reg1, operands[2]))); + + /* Make sure we get the right destination. */ + if (! REG_P (operands[0])) + emit_move_insn (operands[0], reg0); + + DONE; + } +}) + +;; Some fast andhi3 special cases. + +(define_insn "*andhi_movu" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "r,Q,To") + (const_int 255)))] + "!side_effects_p (operands[1])" + "mOvu.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*andhi_clear" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,Q,To") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "0,0,0") + (const_int -256)))] + "!side_effects_p (operands[0])" + "cLear.b %0" + [(set_attr "slottable" "yes,yes,no") + (set_attr "cc" "none")]) + +;; Catch-all andhi3 pattern. + +(define_insn "*expanded_andhi_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r, r,r,r,r") + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g,!To")))] + +;; Sidenote: the tightening from "general_operand" to +;; "register_operand" for operand 1 actually increased the register +;; pressure (worse code). That will hopefully change with an +;; improved reload pass. + + "!TARGET_V32" + "@ + andq %2,%0 + and.w %2,%0 + and.w %2,%0 + and.w %2,%0 + anDq %b2,%0 + and.w %2,%0 + and.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no,no") + (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*expanded_andhi_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r") + (and:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))] + "TARGET_V32" + "@ + andq %2,%0 + and.w %2,%0 + and.w %2,%0 + and.w %2,%0 + anDq %b2,%0 + and.w %2,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no") + (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")]) + +;; A strict_low_part pattern. + +;; Note the use of (match_dup 0) for the first operand of the operation +;; here. Reload can't handle an operand pair where one is read-write +;; and must match a read, like in: +;; (insn 80 79 81 4 +;; (set (strict_low_part +;; (subreg:QI (reg/v:SI 0 r0 [orig:36 data ] [36]) 0)) +;; (and:QI +;; (subreg:QI (reg:SI 15 acr [orig:27 D.7531 ] [27]) 0) +;; (const_int -64 [0xf..fc0]))) x.c:126 147 {*andqi_lowpart_v32} +;; (nil)) +;; In theory, it could reload this as a movstrictqi of the register +;; operand at the and:QI to the destination register and change the +;; and:QI operand to the same as the read-write output operand and the +;; result would be recognized, but it doesn't recognize that's a valid +;; reload for a strict_low_part-destination; it just sees a "+" at the +;; destination constraints. Better than adding complexity to reload is +;; to follow the lead of m68k (see comment that begins with "These insns +;; must use MATCH_DUP") since prehistoric times and make it just a +;; match_dup. FIXME: a sanity-check in gen* to refuse an insn with +;; input-constraints matching input-output-constraints, e.g. "+r" <- "0". + +(define_insn "*andhi_lowpart_non_v32" + [(set (strict_low_part + (match_operand:HI 0 "register_operand" "+r,r,r")) + (and:HI (match_dup 0) + (match_operand:HI 1 "general_operand" "r,Q>,g")))] + "!TARGET_V32" + "@ + and.w %1,%0 + and.w %1,%0 + and.w %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*andhi_lowpart_v32" + [(set (strict_low_part + (match_operand:HI 0 "register_operand" "+r,r,r")) + (and:HI (match_dup 0) + (match_operand:HI 1 "general_operand" "r,Q>,g")))] + "TARGET_V32" + "@ + and.w %1,%0 + and.w %1,%0 + and.w %1,%0" + [(set_attr "slottable" "yes,yes,no") + (set_attr "cc" "noov32")]) + +(define_expand "andqi3" + [(set (match_operand:QI 0 "register_operand") + (and:QI (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "general_operand")))] + "" + "") + +(define_insn "*andqi3_non_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r, r,r,r") + (and:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g,!To")))] + "!TARGET_V32" + "@ + andq %2,%0 + and.b %2,%0 + and.b %2,%0 + andQ %b2,%0 + and.b %2,%0 + and.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")]) + +(define_insn "*andqi3_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r") + (and:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))] + "TARGET_V32" + "@ + andq %2,%0 + and.b %2,%0 + and.b %2,%0 + andQ %b2,%0 + and.b %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no") + (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")]) + +(define_insn "*andqi_lowpart_non_v32" + [(set (strict_low_part + (match_operand:QI 0 "register_operand" "+r,r,r")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "r,Q>,g")))] + "!TARGET_V32" + "@ + and.b %1,%0 + and.b %1,%0 + and.b %1,%0" + [(set_attr "slottable" "yes,yes,no")]) + +(define_insn "*andqi_lowpart_v32" + [(set (strict_low_part + (match_operand:QI 0 "register_operand" "+r,r,r")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "r,Q>,g")))] + "TARGET_V32" + "@ + and.b %1,%0 + and.b %1,%0 + and.b %1,%0" + [(set_attr "slottable" "yes,yes,no") + (set_attr "cc" "noov32")]) + +;; Bitwise or. + +;; Same comment as anddi3 applies here - no need for such a pattern. + +;; It seems there's no need to jump through hoops to get good code such as +;; with andsi3. + +(define_expand "ior3" + [(set (match_operand:BWD 0 "register_operand") + (ior:BWD (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "general_operand")))] + "" + "") + +(define_insn "*iorsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r,r,r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0, 0,0,r") + (match_operand:SI 2 "general_operand" "I, r,Q>,n,g,!To")))] + "!TARGET_V32" + "@ + orq %2,%0 + or.d %2,%0 + or.d %2,%0 + oR.%s2 %2,%0 + or.d %2,%0 + or.d %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,no,no") + (set_attr "cc" "normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*iorsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r") + (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0") + (match_operand:SI 2 "general_operand" "I,r,Q>,n,g")))] + "TARGET_V32" + "@ + orq %2,%0 + or.d %2,%0 + or.d %2,%0 + oR.%s2 %2,%0 + or.d %2,%0" + [(set_attr "slottable" "yes,yes,yes,no,no") + (set_attr "cc" "noov32,noov32,noov32,clobber,noov32")]) + +(define_insn "*iorhi3_non_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r, r,r,r,r") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0,0, 0,0,0,r") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g,!To")))] + "!TARGET_V32" + "@ + orq %2,%0 + or.w %2,%0 + or.w %2,%0 + or.w %2,%0 + oRq %b2,%0 + or.w %2,%0 + or.w %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no,no") + (set_attr "cc" "clobber,normal,normal,normal,clobber,normal,normal")]) + +(define_insn "*iorhi3_v32" + [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r,r") + (ior:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0,0") + (match_operand:HI 2 "general_operand" "I,r,Q>,L,O,g")))] + "TARGET_V32" + "@ + orq %2,%0 + or.w %2,%0 + or.w %2,%0 + or.w %2,%0 + oRq %b2,%0 + or.w %2,%0" + [(set_attr "slottable" "yes,yes,yes,no,yes,no") + (set_attr "cc" "clobber,noov32,noov32,noov32,clobber,noov32")]) + +(define_insn "*iorqi3_non_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r, r,r,r") + (ior:QI (match_operand:QI 1 "register_operand" "%0,0,0, 0,0,r") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g,!To")))] + "!TARGET_V32" + "@ + orq %2,%0 + or.b %2,%0 + or.b %2,%0 + orQ %b2,%0 + or.b %2,%0 + or.b %2,%1,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no,no") + (set_attr "cc" "clobber,normal,normal,clobber,normal,normal")]) + +(define_insn "*iorqi3_v32" + [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r") + (ior:QI (match_operand:QI 1 "register_operand" "%0,0,0,0,0") + (match_operand:QI 2 "general_operand" "I,r,Q>,O,g")))] + "TARGET_V32" + "@ + orq %2,%0 + or.b %2,%0 + or.b %2,%0 + orQ %b2,%0 + or.b %2,%0" + [(set_attr "slottable" "yes,yes,yes,yes,no") + (set_attr "cc" "clobber,noov32,noov32,clobber,noov32")]) + +;; Exclusive-or + +;; See comment about "anddi3" for xordi3 - no need for such a pattern. +;; FIXME: Do we really need the shorter variants? + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "xor %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "xor3" + [(set (match_operand:BW 0 "register_operand" "=r") + (xor:BW (match_operand:BW 1 "register_operand" "%0") + (match_operand:BW 2 "register_operand" "r")))] + "" + "xor %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +;; Negation insns. + +;; Questionable use, here mostly as a (slightly usable) define_expand +;; example. + +(define_expand "negsf2" + [(set (match_dup 2) + (match_dup 3)) + (parallel [(set (match_operand:SF 0 "register_operand" "=r") + (neg:SF (match_operand:SF 1 + "register_operand" "0"))) + (use (match_dup 2))])] + "" +{ + operands[2] = gen_reg_rtx (SImode); + operands[3] = GEN_INT (1 << 31); +}) + +(define_insn "*expanded_negsf2" + [(set (match_operand:SF 0 "register_operand" "=r") + (neg:SF (match_operand:SF 1 "register_operand" "0"))) + (use (match_operand:SI 2 "register_operand" "r"))] + "" + "xor %2,%0" + [(set_attr "slottable" "yes")]) + +;; No "negdi2" although we could make one up that may be faster than +;; the one in libgcc. + +(define_insn "neg2" + [(set (match_operand:BWD 0 "register_operand" "=r") + (neg:BWD (match_operand:BWD 1 "register_operand" "r")))] + "" + "neg %1,%0" + [(set_attr "slottable" "yes")]) + +;; One-complements. + +;; See comment on anddi3 - no need for a DImode pattern. +;; See also xor comment. + +(define_insn "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (not:SI (match_operand:SI 1 "register_operand" "0")))] + "" + "not %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "one_cmpl2" + [(set (match_operand:BW 0 "register_operand" "=r") + (not:BW (match_operand:BW 1 "register_operand" "0")))] + "" + "not %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "clobber")]) + +;; Arithmetic/Logical shift right (and SI left). + +(define_insn "si3" + [(set (match_operand:SI 0 "register_operand" "=r") + (shift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonmemory_operand" "Kcr")))] + "" +{ + if (REG_S_P (operands[2])) + return ".d %2,%0"; + + return "q %2,%0"; +} + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Since gcc gets lost, and forgets to zero-extend the source (or mask +;; the destination) when it changes shifts of lower modes into SImode, +;; it is better to make these expands an anonymous patterns instead of +;; the more correct define_insns. This occurs when gcc thinks that is +;; is better to widen to SImode and use immediate shift count. + +;; FIXME: Is this legacy or still true for gcc >= 2.7.2? + +;; FIXME: Can't parametrize sign_extend and zero_extend (before +;; mentioning "shiftrt"), so we need two patterns. +(define_expand "ashr3" + [(set (match_dup 3) + (sign_extend:SI (match_operand:BW 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" ""))) + (set (match_dup 5) (ashiftrt:SI (match_dup 3) (match_dup 4))) + (set (match_operand:BW 0 "general_operand" "") + (subreg:BW (match_dup 5) 0))] + "" +{ + int i; + + for (i = 3; i < 6; i++) + operands[i] = gen_reg_rtx (SImode); +}) + +(define_expand "lshr3" + [(set (match_dup 3) + (zero_extend:SI (match_operand:BW 1 "nonimmediate_operand" ""))) + (set (match_dup 4) + (zero_extend:SI (match_operand:BW 2 "nonimmediate_operand" ""))) + (set (match_dup 5) (lshiftrt:SI (match_dup 3) (match_dup 4))) + (set (match_operand:BW 0 "general_operand" "") + (subreg:BW (match_dup 5) 0))] + "" +{ + int i; + + for (i = 3; i < 6; i++) + operands[i] = gen_reg_rtx (SImode); +}) + +(define_insn "*expanded_" + [(set (match_operand:BW 0 "register_operand" "=r") + (shiftrt:BW (match_operand:BW 1 "register_operand" "0") + (match_operand:BW 2 "register_operand" "r")))] + "" + " %2,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "*_lowpart" + [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r")) + (shiftrt:BW (match_dup 0) + (match_operand:BW 1 "register_operand" "r")))] + "" + " %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Arithmetic/logical shift left. + +;; For narrower modes than SI, we can use lslq although it makes cc +;; unusable. The win is that we do not have to reload the shift-count +;; into a register. + +(define_insn "ashl3" + [(set (match_operand:BW 0 "register_operand" "=r,r") + (ashift:BW (match_operand:BW 1 "register_operand" "0,0") + (match_operand:BW 2 "nonmemory_operand" "r,Kc")))] + "" +{ + return + (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > ) + ? "moveq 0,%0" + : (CONSTANT_P (operands[2]) + ? "lslq %2,%0" : "lsl %2,%0"); +} + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32,clobber")]) + +;; A strict_low_part matcher. + +(define_insn "*ashl_lowpart" + [(set (strict_low_part (match_operand:BW 0 "register_operand" "+r")) + (ashift:BW (match_dup 0) + (match_operand:HI 1 "register_operand" "r")))] + "" + "lsl %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Various strange insns that gcc likes. + +;; Fortunately, it is simple to construct an abssf (although it may not +;; be very much used in practice). + +(define_insn "abssf2" + [(set (match_operand:SF 0 "register_operand" "=r") + (abs:SF (match_operand:SF 1 "register_operand" "0")))] + "" + "lslq 1,%0\;lsrq 1,%0") + +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (abs:SI (match_operand:SI 1 "register_operand" "r")))] + "" + "abs %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; FIXME: GCC should be able to do these expansions itself. + +(define_expand "abs2" + [(set (match_dup 2) + (sign_extend:SI (match_operand:BW 1 "general_operand" ""))) + (set (match_dup 3) (abs:SI (match_dup 2))) + (set (match_operand:BW 0 "register_operand" "") + (subreg:BW (match_dup 3) 0))] + "" + "operands[2] = gen_reg_rtx (SImode); operands[3] = gen_reg_rtx (SImode);") + +(define_insn "clzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "register_operand" "r")))] + "TARGET_HAS_LZ" + "lz %1,%0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +(define_insn "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "0")))] + "TARGET_HAS_SWAP" + "swapwb %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; This instruction swaps all bits in a register. +;; That means that the most significant bit is put in the place +;; of the least significant bit, and so on. + +(define_insn "cris_swap_bits" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "0")] + CRIS_UNSPEC_SWAP_BITS))] + "TARGET_HAS_SWAP" + "swapwbr %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "noov32")]) + +;; Implement ctz using two instructions, one for bit swap and one for clz. +;; Defines a scratch register to avoid clobbering input. + +(define_expand "ctzsi2" + [(set (match_dup 2) + (match_operand:SI 1 "register_operand")) + (set (match_dup 2) + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_SWAP_BITS)) + (set (match_operand:SI 0 "register_operand") + (clz:SI (match_dup 2)))] + "TARGET_HAS_LZ && TARGET_HAS_SWAP" + "operands[2] = gen_reg_rtx (SImode);") + +;; Bound-insn. Defined to be the same as an unsigned minimum, which is an +;; operation supported by gcc. Used in casesi, but used now and then in +;; normal code too. + +(define_expand "uminsi3" + [(set (match_operand:SI 0 "register_operand" "") + (umin:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (MEM_P (operands[2]) && TARGET_V32) + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "*uminsi3_non_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r, r,r") + (umin:SI (match_operand:SI 1 "register_operand" "%0,0, 0,r") + (match_operand:SI 2 "general_operand" "r,Q>,g,!To")))] + "!TARGET_V32" +{ + if (CONST_INT_P (operands[2])) + { + /* Constant operands are zero-extended, so only 32-bit operands + may be negative. */ + if (INTVAL (operands[2]) >= 0) + { + if (INTVAL (operands[2]) < 256) + return "bound.b %2,%0"; + + if (INTVAL (operands[2]) < 65536) + return "bound.w %2,%0"; + } + } + else if (which_alternative == 3) + return "bound.d %2,%1,%0"; + + return "bound.d %2,%0"; +} + [(set_attr "slottable" "yes,yes,no,no")]) + +(define_insn "*uminsi3_v32" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (umin:SI (match_operand:SI 1 "register_operand" "%0,0") + (match_operand:SI 2 "nonmemory_operand" "r,i")))] + "TARGET_V32" +{ + if (GET_CODE (operands[2]) == CONST_INT) + { + /* Constant operands are zero-extended, so only 32-bit operands + may be negative. */ + if (INTVAL (operands[2]) >= 0) + { + if (INTVAL (operands[2]) < 256) + return "bound.b %2,%0"; + + if (INTVAL (operands[2]) < 65536) + return "bound.w %2,%0"; + } + } + + return "bound.d %2,%0"; +} + [(set_attr "slottable" "yes,no")]) + +;; Jump and branch insns. + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "ba %l0%#" + [(set_attr "slottable" "has_slot")]) + +;; Testcase gcc.c-torture/compile/991213-3.c fails if we allow a constant +;; here, since the insn is not recognized as an indirect jump by +;; jmp_uses_reg_or_mem used by computed_jump_p. Perhaps it is a kludge to +;; change from general_operand to nonimmediate_operand (at least the docs +;; should be changed), but then again the pattern is called indirect_jump. +(define_expand "indirect_jump" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand"))] + "" +{ + if (TARGET_V32 && MEM_P (operands[0])) + operands[0] = force_reg (SImode, operands[0]); +}) + +(define_insn "*indirect_jump_non_v32" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))] + "!TARGET_V32" + "jump %0") + +(define_insn "*indirect_jump_v32" + [(set (pc) (match_operand:SI 0 "register_operand" "r"))] + "TARGET_V32" + "jump %0%#" + [(set_attr "slottable" "has_slot")]) + +;; Return insn. Used whenever the epilogue is very simple; if it is only +;; a single ret or jump [sp+]. No allocated stack space or saved +;; registers are allowed. +;; Note that for this pattern, although named, it is ok to check the +;; context of the insn in the test, not only compiler switches. + +(define_expand "return" + [(return)] + "cris_simple_epilogue ()" + "cris_expand_return (cris_return_address_on_stack ()); DONE;") + +(define_insn "*return_expanded" + [(return)] + "" +{ + return cris_return_address_on_stack_for_return () + ? "jump [$sp+]" : "ret%#"; +} + [(set (attr "slottable") + (if_then_else + (match_test "cris_return_address_on_stack_for_return ()") + (const_string "no") + (const_string "has_return_slot")))]) + +(define_expand "prologue" + [(const_int 0)] + "TARGET_PROLOGUE_EPILOGUE" + "cris_expand_prologue (); DONE;") + +;; Note that the (return) from the expander itself is always the last +;; insn in the epilogue. +(define_expand "epilogue" + [(const_int 0)] + "TARGET_PROLOGUE_EPILOGUE" + "cris_expand_epilogue (); DONE;") + +;; Conditional branches. + +(define_expand "cbranch4" + [(set (cc0) (compare + (match_operand:BWD 1 "nonimmediate_operand") + (match_operand:BWD 2 "general_operand"))) + (set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(cc0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "") + +(define_expand "cbranchdi4" + [(set (cc0) + (compare (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(cc0) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + if (TARGET_V32 && !REG_P (operands[1])) + operands[1] = force_reg (DImode, operands[1]); + if (TARGET_V32 && MEM_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); +}) + + +;; We suffer from the same overflow-bit-gets-in-the-way problem as +;; e.g. m68k, so we have to check if overflow bit is set on all "signed" +;; conditions. + +(define_insn "b" + [(set (pc) + (if_then_else (ncond (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "b %l0%#" + [(set_attr "slottable" "has_slot")]) + +(define_insn "b" + [(set (pc) + (if_then_else (ocond (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? 0 : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +(define_insn "b" + [(set (pc) + (if_then_else (rcond (cc0) + (const_int 0)) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? "b %l0%#" : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +;; Reversed anonymous patterns to the ones above, as mandated. + +(define_insn "*b_reversed" + [(set (pc) + (if_then_else (ncond (cc0) + (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "b %l0%#" + [(set_attr "slottable" "has_slot")]) + +(define_insn "*b_reversed" + [(set (pc) + (if_then_else (ocond (cc0) + (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? 0 : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +(define_insn "*b_reversed" + [(set (pc) + (if_then_else (rcond (cc0) + (const_int 0)) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? "b %l0%#" : "b %l0%#"; +} + [(set_attr "slottable" "has_slot")]) + +;; Set on condition: sCC. + +(define_expand "cstoredi4" + [(set (cc0) (compare + (match_operand:DI 2 "nonimmediate_operand") + (match_operand:DI 3 "general_operand"))) + (set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(cc0) (const_int 0)]))] + "" +{ + if (TARGET_V32 && !REG_P (operands[2])) + operands[2] = force_reg (DImode, operands[2]); + if (TARGET_V32 && MEM_P (operands[3])) + operands[3] = force_reg (DImode, operands[3]); +}) + +(define_expand "cstore4" + [(set (cc0) (compare + (match_operand:BWD 2 "nonimmediate_operand") + (match_operand:BWD 3 "general_operand"))) + (set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(cc0) (const_int 0)]))] + "" + "") + +;; Like bCC, we have to check the overflow bit for +;; signed conditions. + +(define_insn "s" + [(set (match_operand:SI 0 "register_operand" "=r") + (ncond:SI (cc0) (const_int 0)))] + "" + "s %0" + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +(define_insn "s" + [(set (match_operand:SI 0 "register_operand" "=r") + (rcond:SI (cc0) (const_int 0)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? "s %0" : "s %0"; +} + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +(define_insn "s" + [(set (match_operand:SI 0 "register_operand" "=r") + (ocond:SI (cc0) (const_int 0)))] + "" +{ + return + (cc_prev_status.flags & CC_NO_OVERFLOW) + ? 0 : "s %0"; +} + [(set_attr "slottable" "yes") + (set_attr "cc" "none")]) + +;; Call insns. + +;; We need to make these patterns "expand", since the real operand is +;; hidden in a (mem:QI ) inside operand[0] (call_value: operand[1]), +;; and cannot be checked if it were a "normal" pattern. +;; Note that "call" and "call_value" are *always* called with a +;; mem-operand for operand 0 and 1 respective. What happens for combined +;; instructions is a different issue. + +(define_expand "call" + [(parallel [(call (match_operand:QI 0 "cris_mem_call_operand" "") + (match_operand 1 "general_operand" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "" +{ + gcc_assert (MEM_P (operands[0])); + if (flag_pic) + cris_expand_pic_call_address (&operands[0]); +}) + +;; Accept *anything* as operand 1. Accept operands for operand 0 in +;; order of preference. + +(define_insn "*expanded_call_non_v32" + [(call (mem:QI (match_operand:SI 0 "general_operand" "r,Q>,g")) + (match_operand 1 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "!TARGET_V32" + "jsr %0") + +(define_insn "*expanded_call_v32" + [(call + (mem:QI + (match_operand:SI 0 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i")) + (match_operand 1 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "TARGET_V32" + "@ + jsr %0%# + jsr %0%# + bsr %0%# + bsr %0%#" + [(set_attr "slottable" "has_call_slot")]) + +;; Parallel when calculating and reusing address of indirect pointer +;; with simple offset. (Makes most sense with PIC.) It looks a bit +;; wrong not to have the clobber last, but that's the way combine +;; generates it (except it doesn' look into the *inner* mem, so this +;; just matches a peephole2). FIXME: investigate that. +(define_insn "*expanded_call_side" + [(call (mem:QI + (mem:SI + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r, r,r") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn")))) + (match_operand 2 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_operand:SI 3 "register_operand" "=*0,r,r") + (plus:SI (match_dup 0) + (match_dup 1)))] + "!TARGET_AVOID_GOTPLT && !TARGET_V32" + "jsr [%3=%0%S1]") + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "cris_mem_call_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "" +{ + gcc_assert (MEM_P (operands[1])); + if (flag_pic) + cris_expand_pic_call_address (&operands[1]); +}) + +;; Accept *anything* as operand 2. The validity other than "general" of +;; operand 0 will be checked elsewhere. Accept operands for operand 1 in +;; order of preference (Q includes r, but r is shorter, faster). +;; We also accept a PLT symbol. We output it as [rPIC+sym:GOTPLT] rather +;; than requiring getting rPIC + sym:PLT into a register. + +(define_insn "*expanded_call_value_non_v32" + [(set (match_operand 0 "nonimmediate_operand" "=g,g,g") + (call (mem:QI (match_operand:SI 1 "general_operand" "r,Q>,g")) + (match_operand 2 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "!TARGET_V32" + "Jsr %1" + [(set_attr "cc" "clobber")]) + +;; See similar call special-case. +(define_insn "*expanded_call_value_side" + [(set (match_operand 0 "nonimmediate_operand" "=g,g,g") + (call + (mem:QI + (mem:SI + (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r, r,r") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn")))) + (match_operand 3 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_operand:SI 4 "register_operand" "=*1,r,r") + (plus:SI (match_dup 1) + (match_dup 2)))] + "!TARGET_AVOID_GOTPLT && !TARGET_V32" + "Jsr [%4=%1%S2]" + [(set_attr "cc" "clobber")]) + +(define_insn "*expanded_call_value_v32" + [(set + (match_operand 0 "nonimmediate_operand" "=g,g,g,g") + (call + (mem:QI + (match_operand:SI 1 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i")) + (match_operand 2 "" ""))) + (clobber (reg:SI 16))] + "TARGET_V32" + "@ + Jsr %1%# + Jsr %1%# + Bsr %1%# + Bsr %1%#" + [(set_attr "cc" "clobber") + (set_attr "slottable" "has_call_slot")]) + +;; Used in debugging. No use for the direct pattern; unfilled +;; delayed-branches are taken care of by other means. + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "cc" "none")]) + +;; Same as the gdb trap breakpoint, will cause a SIGTRAP for +;; cris-linux* and crisv32-linux*, as intended. Will work in +;; freestanding environments with sufficient framework. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 8))] + "TARGET_TRAP_USING_BREAK8" + "break 8") + +;; We need to stop accesses to the stack after the memory is +;; deallocated. Unfortunately, reorg doesn't look at naked clobbers, +;; e.g. (insn ... (clobber (mem:BLK (stack_pointer_rtx)))) and we don't +;; want to use a naked (unspec_volatile) as that would stop any +;; scheduling in the epilogue. Hence we model it as a "real" insn that +;; sets the memory in an unspecified manner. FIXME: Unfortunately it +;; still has the effect of an unspec_volatile. +(define_insn "cris_frame_deallocated_barrier" + [(set (mem:BLK (reg:SI CRIS_SP_REGNUM)) + (unspec:BLK [(const_int 0)] CRIS_UNSPEC_FRAME_DEALLOC))] + "" + "" + [(set_attr "length" "0")]) + +;; We expand on casesi so we can use "bound" and "add offset fetched from +;; a table to pc" (adds.w [pc+%0.w],pc). + +;; Note: if you change the "parallel" (or add anything after it) in +;; this expansion, you must change the macro ASM_OUTPUT_CASE_END +;; accordingly, to add the default case at the end of the jump-table. + +(define_expand "cris_casesi_non_v32" + [(set (match_dup 5) (match_operand:SI 0 "general_operand" "")) + (set (match_dup 6) + (minus:SI (match_dup 5) + (match_operand:SI 1 "const_int_operand" "n"))) + (set (match_dup 7) + (umin:SI (match_dup 6) + (match_operand:SI 2 "const_int_operand" "n"))) + (parallel + [(set (pc) + (if_then_else + (ltu (match_dup 7) (match_dup 2)) + (plus:SI (sign_extend:SI + (mem:HI + (plus:SI (mult:SI (match_dup 7) (const_int 2)) + (pc)))) + (pc)) + (label_ref (match_operand 4 "" "")))) + (use (label_ref (match_operand 3 "" "")))])] + "" +{ + operands[2] = plus_constant (SImode, operands[2], 1); + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = gen_reg_rtx (SImode); +}) + +;; FIXME: Check effect of not JUMP_TABLES_IN_TEXT_SECTION. +(define_expand "cris_casesi_v32" + [(set (match_dup 5) (match_operand:SI 0 "general_operand")) + (set (match_dup 6) + (minus:SI (match_dup 5) + (match_operand:SI 1 "const_int_operand"))) + (set (match_dup 7) + (umin:SI (match_dup 6) + (match_operand:SI 2 "const_int_operand"))) + (set (match_dup 8) (match_dup 11)) + (set (match_dup 9) + (plus:SI (mult:SI (match_dup 7) (const_int 2)) + (match_dup 8))) + (set (match_dup 10) + (plus:SI (sign_extend:SI (mem:HI (match_dup 9))) + (match_dup 9))) + (parallel + [(set (pc) + (if_then_else + (ltu (unspec [(const_int 0)] CRIS_UNSPEC_CASESI) (match_dup 2)) + (match_dup 10) + (label_ref (match_operand 4 "" "")))) + (use (label_ref (match_dup 3)))])] + "TARGET_V32" +{ + int i; + rtx xlabel = gen_rtx_LABEL_REF (VOIDmode, operands[3]); + for (i = 5; i <= 10; i++) + operands[i] = gen_reg_rtx (SImode); + operands[2] = plus_constant (SImode, operands[2], 1); + + /* Don't forget to decorate labels too, for PIC. */ + operands[11] = flag_pic + ? gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xlabel), + CRIS_UNSPEC_PCREL)) + : xlabel; +}) + +(define_expand "casesi" + [(match_operand:SI 0 "general_operand") + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand") + (match_operand 3 "" "") + (match_operand 4 "" "")] + "" +{ + if (TARGET_V32) + emit_insn (gen_cris_casesi_v32 (operands[0], operands[1], operands[2], + operands[3], operands[4])); + else + emit_insn (gen_cris_casesi_non_v32 (operands[0], operands[1], operands[2], + operands[3], operands[4])); + DONE; +}) + +;; Split-patterns. Some of them have modes unspecified. This +;; should always be ok; if for no other reason sparc.md has it as +;; well. +;; +;; When register_operand is specified for an operand, we can get a +;; subreg as well (Axis-990331), so don't just assume that REG_P is true +;; for a register_operand and that REGNO can be used as is. It is best to +;; guard with REG_P, unless it is worth it to adjust for the subreg case. + +;; op [rx + 0],ry,rz +;; The index to rx is optimized into zero, and gone. + +;; First, recognize bound [rx],ry,rz; where [rx] is zero-extended, +;; and add/sub [rx],ry,rz, with zero or sign-extend on [rx]. +;; Split this into: +;; move ry,rz +;; op [rx],rz +;; Lose if rz=ry or rx=rz. +;; Call this op-extend-split. +;; Do not match for V32; the addo and addi shouldn't be split +;; up. + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 4 "cris_operand_extend_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")])]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_op_dup 3 [(match_dup 2)])]))] + "") + +;; As op-extend-split, but recognize and split op [rz],ry,rz into +;; ext [rz],rz +;; op ry,rz +;; Do this for plus or bound only, being commutative operations, since we +;; have swapped the operands. +;; Call this op-extend-split-rx=rz + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 4 "cris_plus_or_bound_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")])]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2)])) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_dup 1)]))] + "") + +;; As the op-extend-split, but swapped operands, and only for +;; plus or bound, being the commutative extend-operators. FIXME: Why is +;; this needed? Is it? +;; Call this op-extend-split-swapped + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 4 "cris_plus_or_bound_operator" + [(match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")]) + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_op_dup 3 [(match_dup 2)])]))] + "") + +;; As op-extend-split-rx=rz, but swapped operands, only for plus or +;; bound. Call this op-extend-split-swapped-rx=rz. + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 4 "cris_plus_or_bound_operator" + [(match_operator + 3 "cris_extend_operator" + [(match_operand 2 "memory_operand" "")]) + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2)])) + (set (match_dup 0) + (match_op_dup + 4 [(match_dup 0) + (match_dup 1)]))] + "") + +;; As op-extend-split, but the mem operand is not extended. +;; +;; op [rx],ry,rz changed into +;; move ry,rz +;; op [rx],rz +;; lose if ry=rz or rx=rz +;; Call this op-extend. + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 3 "cris_orthogonal_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "memory_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 2)]))] + "") + +;; As op-extend-split-rx=rz, non-extended. +;; Call this op-split-rx=rz + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 3 "cris_commutative_orth_op" + [(match_operand 2 "memory_operand" "") + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) + && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) != REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 1)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 2)]))] + "") + +;; As op-extend-split-swapped, nonextended. +;; Call this op-split-swapped. + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 3 "cris_commutative_orth_op" + [(match_operand 1 "register_operand" "") + (match_operand 2 "memory_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 1)]))] + "") + +;; As op-extend-split-swapped-rx=rz, non-extended. +;; Call this op-split-swapped-rx=rz. + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 3 "cris_orthogonal_operator" + [(match_operand 2 "memory_operand" "") + (match_operand 1 "register_operand" "")]))] + "!TARGET_V32 + && REG_P (operands[0]) && REG_P (operands[1]) + && REGNO (operands[1]) != REGNO (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && REG_P (XEXP (operands[2], 0)) + && REGNO (XEXP (operands[2], 0)) == REGNO (operands[0])" + [(set (match_dup 0) + (match_dup 2)) + (set (match_dup 0) + (match_op_dup + 3 [(match_dup 0) + (match_dup 1)]))] + "") + +(include "sync.md") + +;; Splits for all cases in side-effect insns where (possibly after reload +;; and register allocation) rx and ry in [rx=ry+i] are equal. + +;; move.S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_mem_op" + [(plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "register_operand" ""))])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))])] + "REG_P (operands[3]) && REG_P (operands[4]) + && REGNO (operands[3]) == REGNO (operands[4])" + [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (set (match_dup 0) (match_dup 5))] + "operands[5] = replace_equiv_address (operands[6], operands[3]);") + +;; move.S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_mem_op" + [(plus:SI (match_operand:SI 1 "cris_bdap_operand" "") + (match_operand:SI 2 "cris_bdap_operand" ""))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 1) + (match_dup 2)))])] + "(rtx_equal_p (operands[3], operands[1]) + || rtx_equal_p (operands[3], operands[2]))" + [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (match_dup 4))] +{ + operands[4] = replace_equiv_address (operands[5], operands[3]); + cris_order_for_addsi3 (operands, 1); +}) + +;; move.S1 ry,[rx=rx+rz.S2] + +(define_split + [(parallel + [(set (match_operator + 6 "cris_mem_op" + [(plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (match_operand:SI 2 "register_operand" ""))]) + (match_operand 3 "register_operand" "")) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))])] + "REG_P (operands[2]) && REG_P (operands[4]) + && REGNO (operands[4]) == REGNO (operands[2])" + [(set (match_dup 4) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (match_dup 5) (match_dup 3))] + "operands[5] = replace_equiv_address (operands[6], operands[4]);") + +;; move.S1 ry,[rx=rx+i] + +(define_split + [(parallel + [(set (match_operator + 6 "cris_mem_op" + [(plus:SI (match_operand:SI 0 "cris_bdap_operand" "") + (match_operand:SI 1 "cris_bdap_operand" ""))]) + (match_operand 2 "register_operand" "")) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 0) + (match_dup 1)))])] + "(rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[3], operands[1]))" + [(set (match_dup 3) (plus:SI (match_dup 0) (match_dup 1))) + (set (match_dup 5) (match_dup 2))] +{ + operands[5] = replace_equiv_address (operands[6], operands[3]); + cris_order_for_addsi3 (operands, 0); +}) + +;; clear.[bwd] [rx=rx+rz.S2] + +(define_split + [(parallel + [(set (mem:BWD (plus:SI + (mult:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (match_operand:SI 2 "register_operand" ""))) + (const_int 0)) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (mult:SI (match_dup 0) + (match_dup 1)) + (match_dup 2)))])] + "REG_P (operands[2]) && REG_P (operands[3]) + && REGNO (operands[3]) == REGNO (operands[2])" + [(set (match_dup 3) (plus:SI (mult:SI (match_dup 0) (match_dup 1)) + (match_dup 2))) + (set (mem:BWD (match_dup 3)) (const_int 0))] + "") + +;; clear.[bwd] [rx=rx+i] + +(define_split + [(parallel + [(set (mem:BWD + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "") + (match_operand:SI 1 "cris_bdap_operand" ""))) + (const_int 0)) + (set (match_operand:SI 2 "register_operand" "") + (plus:SI (match_dup 0) + (match_dup 1)))])] + "(rtx_equal_p (operands[0], operands[2]) + || rtx_equal_p (operands[2], operands[1]))" + [(set (match_dup 2) (plus:SI (match_dup 0) (match_dup 1))) + (set (mem:BWD (match_dup 2)) (const_int 0))] + "cris_order_for_addsi3 (operands, 0);") + +;; mov(s|u).S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_extend_operator" + [(mem (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "register_operand" "")))])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (mult:SI (match_dup 1) + (match_dup 2)) + (match_dup 3)))])] + "REG_P (operands[3]) + && REG_P (operands[4]) + && REGNO (operands[3]) == REGNO (operands[4])" + [(set (match_dup 4) (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 6)]))] + "operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]);") + +;; mov(s|u).S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 4 "cris_extend_operator" + [(mem (plus:SI + (match_operand:SI 1 "cris_bdap_operand" "") + (match_operand:SI 2 "cris_bdap_operand" "")))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (match_dup 1) + (match_dup 2)))])] + "(rtx_equal_p (operands[1], operands[3]) + || rtx_equal_p (operands[2], operands[3]))" + [(set (match_dup 3) (plus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (match_op_dup 4 [(match_dup 5)]))] +{ + operands[5] = replace_equiv_address (XEXP (operands[4], 0), operands[3]); + cris_order_for_addsi3 (operands, 1); +}) + +;; op.S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_orthogonal_operator" + [(match_operand 1 "register_operand" "") + (mem (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" "")))])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 6)]))] +{ + operands[6] = replace_equiv_address (XEXP (operands[5], 1), operands[4]); + cris_order_for_addsi3 (operands, 2); +}) + +;; op.S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_orthogonal_operator" + [(match_operand 1 "register_operand" "") + (mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" "")))])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) + && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 7)]))] + "operands[7] = replace_equiv_address (XEXP (operands[6], 1), operands[5]);") + +;; op.S1 [rx=rx+rz.S2],ry (swapped) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_commutative_orth_op" + [(mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" ""))) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) + && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))] + "operands[7] = replace_equiv_address (XEXP (operands[6], 0), operands[5]);") + +;; op.S1 [rx=rx+i],ry (swapped) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_commutative_orth_op" + [(mem + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" ""))) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 6) (match_dup 1)]))] +{ + operands[6] = replace_equiv_address (XEXP (operands[5], 0), operands[4]); + cris_order_for_addsi3 (operands, 2); +}) + +;; op(s|u).S1 [rx=rx+rz.S2],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_operand_extend_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 7 "cris_extend_operator" + [(mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" "")))])])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) + && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 1) (match_dup 8)]))] + "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[7]), GET_MODE (operands[7]), + replace_equiv_address (XEXP (operands[7], 0), + operands[5]));") + +;; op(s|u).S1 [rx=rx+i],ry + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 5 "cris_operand_extend_operator" + [(match_operand 1 "register_operand" "") + (match_operator + 6 "cris_extend_operator" + [(mem + (plus:SI (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" "") + ))])])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 5 [(match_dup 1) (match_dup 7)]))] +{ + operands[7] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]), + replace_equiv_address (XEXP (operands[6], 0), + operands[4])); + cris_order_for_addsi3 (operands, 2); +}) + +;; op(s|u).S1 [rx=rx+rz.S2],ry (swapped, plus or bound) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 7 "cris_plus_or_bound_operator" + [(match_operator + 6 "cris_extend_operator" + [(mem (plus:SI + (mult:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")) + (match_operand:SI 4 "register_operand" "")))]) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 5 "register_operand" "") + (plus:SI (mult:SI (match_dup 2) + (match_dup 3)) + (match_dup 4)))])] + "REG_P (operands[4]) && REG_P (operands[5]) + && REGNO (operands[5]) == REGNO (operands[4])" + [(set (match_dup 5) (plus:SI (mult:SI (match_dup 2) (match_dup 3)) + (match_dup 4))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 8) (match_dup 1)]))] + "operands[8] = gen_rtx_fmt_e (GET_CODE (operands[6]), GET_MODE (operands[6]), + replace_equiv_address (XEXP (operands[6], 0), + operands[5]));") + +;; op(s|u).S1 [rx=rx+i],ry (swapped, plus or bound) + +(define_split + [(parallel + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_plus_or_bound_operator" + [(match_operator + 5 "cris_extend_operator" + [(mem (plus:SI + (match_operand:SI 2 "cris_bdap_operand" "") + (match_operand:SI 3 "cris_bdap_operand" "")))]) + (match_operand 1 "register_operand" "")])) + (set (match_operand:SI 4 "register_operand" "") + (plus:SI (match_dup 2) + (match_dup 3)))])] + "(rtx_equal_p (operands[4], operands[2]) + || rtx_equal_p (operands[4], operands[3]))" + [(set (match_dup 4) (plus:SI (match_dup 2) (match_dup 3))) + (set (match_dup 0) (match_op_dup 6 [(match_dup 7) (match_dup 1)]))] +{ + operands[7] = gen_rtx_fmt_e (GET_CODE (operands[5]), GET_MODE (operands[5]), + replace_equiv_address (XEXP (operands[5], 0), + operands[4])); + cris_order_for_addsi3 (operands, 2); +}) + +;; Splits for addressing prefixes that have no side-effects, so we can +;; fill a delay slot. Never split if we lose something, though. + +;; If we have a +;; move [indirect_ref],rx +;; where indirect ref = {const, [r+], [r]}, it costs as much as +;; move indirect_ref,rx +;; move [rx],rx +;; Take care not to allow indirect_ref = register. + +;; We're not allowed to generate copies of registers with different mode +;; until after reload; copying pseudos upsets reload. CVS as of +;; 2001-08-24, unwind-dw2-fde.c, _Unwind_Find_FDE ICE in +;; cselib_invalidate_regno. Also, don't do this for the stack-pointer, +;; as we don't want it set temporarily to an invalid value. + +(define_split ; indir_to_reg_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operand 1 "indirect_operand" ""))] + "reload_completed + && REG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (MEM_P (XEXP (operands[1], 0)) || CONSTANT_P (XEXP (operands[1], 0))) + && REGNO (operands[0]) < CRIS_LAST_GENERAL_REGISTER" + [(set (match_dup 2) (match_dup 4)) + (set (match_dup 0) (match_dup 3))] + "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0])); + operands[3] = replace_equiv_address (operands[1], operands[2]); + operands[4] = XEXP (operands[1], 0);") + +;; As the above, but MOVS and MOVU. + +(define_split + [(set (match_operand 0 "cris_nonsp_register_operand" "") + (match_operator + 4 "cris_extend_operator" + [(match_operand 1 "indirect_operand" "")]))] + "reload_completed + && REG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (MEM_P (XEXP (operands[1], 0)) + || CONSTANT_P (XEXP (operands[1], 0)))" + [(set (match_dup 2) (match_dup 5)) + (set (match_dup 0) (match_op_dup 4 [(match_dup 3)]))] + "operands[2] = gen_rtx_REG (Pmode, REGNO (operands[0])); + operands[3] = replace_equiv_address (XEXP (operands[4], 0), operands[2]); + operands[5] = XEXP (operands[1], 0);") + +;; Various peephole optimizations. +;; +;; Watch out: when you exchange one set of instructions for another, the +;; condition codes setting must be the same, or you have to CC_INIT or +;; whatever is appropriate, in the pattern before you emit the +;; assembly text. This is best done here, not in cris_notice_update_cc, +;; to keep changes local to their cause. +;; +;; Do not add patterns that you do not know will be matched. +;; Please also add a self-contained testcase. + +;; We have trouble with and:s and shifts. Maybe something is broken in +;; gcc? Or it could just be that bit-field insn expansion is a bit +;; suboptimal when not having extzv insns. +;; Testcase for the following four peepholes: gcc.dg/cris-peep2-xsrand.c + +(define_peephole2 ; asrandb (peephole casesi+31) + [(set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) + (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 + && INTVAL (operands[2]) < 255 + && INTVAL (operands[1]) > 23 + /* Check that the and-operation enables us to use logical-shift. */ + && (INTVAL (operands[2]) + & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (QImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode)); +}) + +(define_peephole2 ; asrandw (peephole casesi+32) + [(set (match_operand:SI 0 "register_operand" "") + (ashiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 + && INTVAL (operands[2]) < 65535 + && INTVAL (operands[2]) != 255 + && INTVAL (operands[1]) > 15 + /* Check that the and-operation enables us to use logical-shift. */ + && (INTVAL (operands[2]) + & ((HOST_WIDE_INT) -1 << (32 - INTVAL (operands[1])))) == 0" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (HImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode)); +}) + +(define_peephole2 ; lsrandb (peephole casesi+33) + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 + && INTVAL (operands[2]) < 255 + && INTVAL (operands[1]) > 23" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:QI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (QImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), QImode)); +}) + +(define_peephole2 ; lsrandw (peephole casesi+34) + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" ""))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand 2 "const_int_operand" "")))] + "INTVAL (operands[2]) > 31 && INTVAL (operands[2]) < 65535 + && INTVAL (operands[2]) != 255 + && INTVAL (operands[1]) > 15" + [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 1))) + (set (match_dup 3) (and:HI (match_dup 3) (match_dup 4)))] + ;; FIXME: CC0 is valid except for the M bit. +{ + operands[3] = gen_rtx_REG (HImode, REGNO (operands[0])); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]), HImode)); +}) + + +;; Change +;; add.d n,rx +;; move [rx],ry +;; into +;; move [rx=rx+n],ry +;; when -128 <= n <= 127. +;; This will reduce the size of the assembler code for n = [-128..127], +;; and speed up accordingly. Don't match if the previous insn is +;; (set rx rz) because that combination is matched by another peephole. +;; No stable test-case. + +(define_peephole2 ; moversideqi (peephole casesi+35) + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand 3 "register_operand" "") + (match_operator 4 "cris_mem_op" [(match_dup 0)]))] + "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD + && REGNO (operands[3]) != REGNO (operands[0]) + && (cris_base_p (operands[1], true) || cris_base_p (operands[2], true)) + && !satisfies_constraint_J (operands[2]) + && !satisfies_constraint_N (operands[2]) + && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128) + && TARGET_SIDE_EFFECT_PREFIXES" + [(parallel + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])] + ;; Checking the previous insn is a bit too awkward for the condition. +{ + rtx prev = prev_nonnote_insn (curr_insn); + if (prev != NULL_RTX) + { + rtx set = single_set (prev); + if (set != NULL_RTX + && REG_S_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (operands[0]) + && REG_S_P (SET_SRC (set))) + FAIL; + } + operands[5] + = replace_equiv_address (operands[4], + gen_rtx_PLUS (SImode, + operands[1], operands[2])); +}) + +;; Vice versa: move ry,[rx=rx+n] + +(define_peephole2 ; movemsideqi (peephole casesi+36) + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operator 3 "cris_mem_op" [(match_dup 0)]) + (match_operand 4 "register_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[4])) <= UNITS_PER_WORD + && REGNO (operands[4]) != REGNO (operands[0]) + && (cris_base_p (operands[1], true) || cris_base_p (operands[2], true)) + && !satisfies_constraint_J (operands[2]) + && !satisfies_constraint_N (operands[2]) + && (INTVAL (operands[2]) >= -128 && INTVAL (operands[2]) < 128) + && TARGET_SIDE_EFFECT_PREFIXES" + [(parallel + [(set (match_dup 5) (match_dup 4)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])] + "operands[5] + = replace_equiv_address (operands[3], + gen_rtx_PLUS (SImode, + operands[1], operands[2]));") + +;; As above, change: +;; add.d n,rx +;; op.d [rx],ry +;; into: +;; op.d [rx=rx+n],ry +;; Saves when n = [-128..127]. +;; +;; Splitting and joining combinations for side-effect modes are slightly +;; out of hand. They probably will not save the time they take typing in, +;; not to mention the bugs that creep in. FIXME: Get rid of as many of +;; the splits and peepholes as possible. +;; No stable test-case. + +(define_peephole2 ; mover2side (peephole casesi+37) + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand 3 "register_operand" "") + (match_operator 4 "cris_orthogonal_operator" + [(match_dup 3) + (match_operator + 5 "cris_mem_op" [(match_dup 0)])]))] + ;; FIXME: What about DFmode? + ;; Change to GET_MODE_SIZE (GET_MODE (operands[3])) <= UNITS_PER_WORD? + "GET_MODE (operands[3]) != DImode + && REGNO (operands[0]) != REGNO (operands[3]) + && !satisfies_constraint_J (operands[2]) + && !satisfies_constraint_N (operands[2]) + && INTVAL (operands[2]) >= -128 + && INTVAL (operands[2]) <= 127 + && TARGET_SIDE_EFFECT_PREFIXES" + [(parallel + [(set (match_dup 3) (match_op_dup 4 [(match_dup 3) (match_dup 6)])) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])] + "operands[6] + = replace_equiv_address (operands[5], + gen_rtx_PLUS (SImode, + operands[1], operands[2]));") + +;; Sometimes, for some reason the pattern +;; move x,rx +;; add y,rx +;; move [rx],rz +;; will occur. Solve this, and likewise for to-memory. +;; No stable test-case. + +(define_peephole2 ; moverside (peephole casesi+38) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "cris_bdap_biap_operand" "")) + (set (match_dup 0) + (plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "") + (match_operand:SI 3 "cris_bdap_biap_operand" ""))) + (set (match_operand 4 "register_operand" "") + (match_operator 5 "cris_mem_op" [(match_dup 0)]))] + "(rtx_equal_p (operands[2], operands[0]) + || rtx_equal_p (operands[3], operands[0])) + && cris_side_effect_mode_ok (PLUS, operands, 0, + (REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + (! REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + -1, 4)" + [(parallel + [(set (match_dup 4) (match_dup 6)) + (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])] +{ + rtx otherop + = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2]; + + /* Make sure we have canonical RTX so we match the insn pattern - + not a constant in the first operand. We also require the order + (plus reg mem) to match the final pattern. */ + if (CONSTANT_P (otherop) || MEM_P (otherop)) + { + operands[7] = operands[1]; + operands[8] = otherop; + } + else + { + operands[7] = otherop; + operands[8] = operands[1]; + } + operands[6] + = replace_equiv_address (operands[5], + gen_rtx_PLUS (SImode, + operands[7], operands[8])); +}) + +;; As above but to memory. +;; FIXME: Split movemside and moverside into variants and prune +;; the ones that don't trig. +;; No stable test-case. + +(define_peephole2 ; movemside (peephole casesi+39) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "cris_bdap_biap_operand" "")) + (set (match_dup 0) + (plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "") + (match_operand:SI 3 "cris_bdap_biap_operand" ""))) + (set (match_operator 4 "cris_mem_op" [(match_dup 0)]) + (match_operand 5 "register_operand" ""))] + "(rtx_equal_p (operands[2], operands[0]) + || rtx_equal_p (operands[3], operands[0])) + && cris_side_effect_mode_ok (PLUS, operands, 0, + (REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + (! REG_S_P (operands[1]) + ? 1 + : (rtx_equal_p (operands[2], operands[0]) + ? 3 : 2)), + -1, 5)" + [(parallel + [(set (match_dup 6) (match_dup 5)) + (set (match_dup 0) (plus:SI (match_dup 7) (match_dup 8)))])] +{ + rtx otherop + = rtx_equal_p (operands[2], operands[0]) ? operands[3] : operands[2]; + + /* Make sure we have canonical RTX so we match the insn pattern - + not a constant in the first operand. We also require the order + (plus reg mem) to match the final pattern. */ + if (CONSTANT_P (otherop) || MEM_P (otherop)) + { + operands[7] = operands[1]; + operands[8] = otherop; + } + else + { + operands[7] = otherop; + operands[8] = operands[1]; + } + operands[6] + = replace_equiv_address (operands[4], + gen_rtx_PLUS (SImode, + operands[7], operands[8])); +}) + +;; Another spotted bad code: +;; move rx,ry +;; move [ry],ry +;; No stable test-case. + +(define_peephole2 ; movei (peephole casesi+42) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (match_operand 2 "register_operand" "") + (match_operator 3 "cris_mem_op" [(match_dup 0)]))] + "REGNO (operands[0]) == REGNO (operands[2]) + && (REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[1]))) + && GET_MODE_SIZE (GET_MODE (operands[2])) <= UNITS_PER_WORD" + [(set (match_dup 2) (match_dup 4))] + "operands[4] = replace_equiv_address (operands[3], operands[1]);") + +;; move.d [r10+16],r9 +;; and.d r12,r9 +;; change to +;; and.d [r10+16],r12,r9 +;; With generalization of the operation, the size and the addressing mode. +;; This seems to be the result of a quirk in register allocation +;; missing the three-operand cases when having different predicates. +;; Maybe that it matters that it is a commutative operation. +;; This pattern helps that situation, but there's still the increased +;; register pressure. +;; Note that adding the noncommutative variant did not show any matches +;; in ipps and cc1, so it's not here. +;; No stable test-case. + +(define_peephole2 ; op3 (peephole casesi+44) + [(set (match_operand 0 "register_operand" "") + (match_operator + 6 "cris_mem_op" + [(plus:SI + (match_operand:SI 1 "cris_bdap_biap_operand" "") + (match_operand:SI 2 "cris_bdap_biap_operand" ""))])) + (set (match_dup 0) + (match_operator + 5 "cris_commutative_orth_op" + [(match_operand 3 "register_operand" "") + (match_operand 4 "register_operand" "")]))] + "(rtx_equal_p (operands[3], operands[0]) + || rtx_equal_p (operands[4], operands[0])) + && ! rtx_equal_p (operands[3], operands[4]) + && (REG_S_P (operands[1]) || REG_S_P (operands[2])) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD" + [(set (match_dup 0) (match_op_dup 5 [(match_dup 7) (match_dup 6)]))] + "operands[7] + = rtx_equal_p (operands[3], operands[0]) ? operands[4] : operands[3];") + +;; There seems to be no other way to make GCC (including 4.8/trunk at +;; r186932) optimally reload an instruction that looks like +;; and.d reg_or_mem,const_32__65535,other_reg +;; where other_reg is the destination. +;; It should be: +;; movu.[bw] reg_or_mem,reg_32 +;; and.[bw] trunc_int_for_mode([bw], const_32__65535),reg_32 ;; or andq +;; but it turns into: +;; move.d reg_or_mem,reg_32 +;; and.d const_32__65535,reg_32 +;; Fix it with these two peephole2's. +;; Testcases: gcc.dg/cris-peep2-andu1.c gcc.dg/cris-peep2-andu2.c + +(define_peephole2 ; andu (casesi+45) + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "nonimmediate_operand" "")) + (set (match_operand:SI 2 "register_operand" "") + (and:SI (match_dup 0) + (match_operand:SI 3 "const_int_operand" "")))] + ;; Since the size of the memory access could be made different here, + ;; don't do this for a mem-volatile access. + "REGNO (operands[2]) == REGNO (operands[0]) + && INTVAL (operands[3]) <= 65535 && INTVAL (operands[3]) >= 0 + && !satisfies_constraint_I (operands[3]) + && !side_effects_p (operands[1]) + && (!REG_P (operands[1]) + || REGNO (operands[1]) <= CRIS_LAST_GENERAL_REGISTER)" + ;; FIXME: CC0 valid except for M (i.e. CC_NOT_NEGATIVE). + [(set (match_dup 0) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] +{ + enum machine_mode zmode = INTVAL (operands[3]) <= 255 ? QImode : HImode; + enum machine_mode amode + = satisfies_constraint_O (operands[3]) ? SImode : zmode; + rtx op1 + = (REG_S_P (operands[1]) + ? gen_rtx_REG (zmode, REGNO (operands[1])) + : adjust_address (operands[1], zmode, 0)); + operands[4] + = gen_rtx_ZERO_EXTEND (SImode, op1); + operands[5] = gen_rtx_REG (amode, REGNO (operands[0])); + operands[6] + = gen_rtx_AND (amode, gen_rtx_REG (amode, REGNO (operands[0])), + GEN_INT (trunc_int_for_mode (INTVAL (operands[3]), + amode == SImode + ? QImode : amode))); +}) + +;; Since r186861, gcc.dg/cris-peep2-andu2.c trigs this pattern, with which +;; we fix up e.g.: +;; movu.b 254,$r9. +;; and.d $r10,$r9 +;; into: +;; movu.b $r10,$r9 +;; andq -2,$r9. +;; Only do this for values fitting the quick immediate operand. +(define_peephole2 ; andqu (casesi+46) + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "const_int_operand")) + (set (match_dup 0) + (and:SI (match_dup 0) (match_operand:SI 2 "nonimmediate_operand")))] + ;; Since the size of the memory access will be made different here, + ;; don't do this for a volatile access or a post-incremented address. + "satisfies_constraint_O (operands[1]) + && !side_effects_p (operands[2]) + && !reg_overlap_mentioned_p (operands[0], operands[2])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (and:SI (match_dup 0) (match_dup 4)))] +{ + enum machine_mode zmode = INTVAL (operands[2]) <= 255 ? QImode : HImode; + rtx op1 + = (REG_S_P (operands[2]) + ? gen_rtx_REG (zmode, REGNO (operands[2])) + : adjust_address (operands[2], zmode, 0)); + operands[3] = gen_rtx_ZERO_EXTEND (SImode, op1); + operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode)); +}) + +;; Try and avoid GOTPLT reads escaping a call: transform them into +;; PLT. Curiously (but thankfully), peepholes for instructions +;; *without side-effects* that just feed a call (or call_value) are +;; not matched neither in a build or test-suite, so those patterns are +;; omitted. + +;; A "normal" move where we don't check the consumer. + +(define_peephole2 ; gotplt-to-plt + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))]))] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)" + [(set (match_dup 0) (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL))) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))] + "") + +;; And one set with a side-effect getting the PLTGOT offset. +;; First call and call_value variants. + +(define_peephole2 ; gotplt-to-plt-side-call + [(parallel + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI + 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))]) + (parallel [(call (mem:QI (match_dup 0)) + (match_operand 4 "" "")) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && peep2_reg_dead_p (2, operands[0])" + [(parallel [(call (mem:QI (match_dup 1)) + (match_dup 4)) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_dup 3) + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] + CRIS_UNSPEC_PLTGOTREAD))))])] + "") + +(define_peephole2 ; gotplt-to-plt-side-call-value + [(parallel + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI + 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))]) + (parallel [(set (match_operand 5 "" "") + (call (mem:QI (match_dup 0)) + (match_operand 4 "" ""))) + (clobber (reg:SI CRIS_SRP_REGNUM))])] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && peep2_reg_dead_p (2, operands[0])" + [(parallel [(set (match_dup 5) + (call (mem:QI (match_dup 1)) + (match_dup 4))) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_dup 3) + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] + CRIS_UNSPEC_PLTGOTREAD))))])] + "") + +(define_peephole2 ; gotplt-to-plt-side + [(parallel + [(set + (match_operand:SI 0 "register_operand" "") + (match_operator:SI + 1 "cris_mem_op" + [(plus:SI + (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_operand:SI + 2 "cris_general_operand_or_symbol" "")] + CRIS_UNSPEC_PLTGOTREAD)))])) + (set (match_operand:SI 3 "register_operand" "") + (plus:SI (reg:SI CRIS_GOT_REGNUM) + (const:SI + (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))))])] + "flag_pic + && cris_valid_pic_const (XEXP (XEXP (operands[1], 0), 1), true) + && REGNO_REG_CLASS (REGNO (operands[0])) == REGNO_REG_CLASS (0)" + [(set (match_dup 3) + (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLTGOTREAD))) + (set (match_dup 3) (plus:SI (match_dup 3) (reg:SI CRIS_GOT_REGNUM))) + (set (match_dup 0) + (const:SI (unspec:SI [(match_dup 2)] CRIS_UNSPEC_PLT_GOTREL))) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI CRIS_GOT_REGNUM)))] + "") + +;; Local variables: +;; mode:emacs-lisp +;; comment-start: ";; " +;; eval: (set-syntax-table (copy-sequence (syntax-table))) +;; eval: (modify-syntax-entry ?[ "(]") +;; eval: (modify-syntax-entry ?] ")[") +;; eval: (modify-syntax-entry ?{ "(}") +;; eval: (modify-syntax-entry ?} "){") +;; eval: (setq indent-tabs-mode t) +;; End: diff --git a/gcc-4.9/gcc/config/cris/cris.opt b/gcc-4.9/gcc/config/cris/cris.opt new file mode 100644 index 000000000..d359c8948 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/cris.opt @@ -0,0 +1,202 @@ +; Options for the CRIS port of the compiler. + +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; TARGET_MUL_BUG: Whether or not to work around multiplication +; instruction hardware bug when generating code for models where +; it may be present. From the trouble report for Etrax 100 LX: +; "A multiply operation may cause incorrect cache behaviour +; under some specific circumstances. The problem can occur if +; the instruction following the multiply instruction causes a +; cache miss, and multiply operand 1 (source operand) bits +; [31:27] matches the logical mapping of the mode register +; address (0xb0....), and bits [9:2] of operand 1 matches the +; TLB register address (0x258-0x25f). There is such a mapping +; in kernel mode or when the MMU is off. Normally there is no +; such mapping in user mode, and the problem will therefore +; probably not occur in Linux user mode programs." +; +; We have no sure-fire way to know from within GCC that we're +; compiling a user program. For example, -fpic/PIC is used in +; libgcc which is linked into the kernel. However, the +; workaround option -mno-mul-bug can be safely used per-package +; when compiling programs. The same goes for general user-only +; libraries such as glibc, since there's no user-space +; driver-like program that gets a mapping of I/O registers (all +; on the same page, including the TLB registers). +mmul-bug-workaround +Target Report Mask(MUL_BUG) +Work around bug in multiplication instruction + +; TARGET_ETRAX4_ADD: Instruction-set additions from Etrax 4 and up. +; (Just "lz".) +metrax4 +Target Report Mask(ETRAX4_ADD) +Compile for ETRAX 4 (CRIS v3) + +; See cris_handle_option. +metrax100 +Target Report RejectNegative +Compile for ETRAX 100 (CRIS v8) + +; See cris_handle_option. +mno-etrax100 +Target Report RejectNegative Undocumented + +mpdebug +Target Report Mask(PDEBUG) +Emit verbose debug information in assembly code + +; TARGET_CCINIT: Whether to use condition-codes generated by +; insns other than the immediately preceding compare/test insn. +; Used to check for errors in notice_update_cc. +mcc-init +Target Report Mask(CCINIT) +Do not use condition codes from normal instructions + +; TARGET_SIDE_EFFECT_PREFIXES: Whether to use side-effect +; patterns. Used to debug the [rx=ry+i] type patterns. +mside-effects +Target Report RejectNegative Mask(SIDE_EFFECT_PREFIXES) Undocumented + +mno-side-effects +Target Report RejectNegative InverseMask(SIDE_EFFECT_PREFIXES) +Do not emit addressing modes with side-effect assignment + +; TARGET_STACK_ALIGN: Whether to *keep* (not force) alignment of +; stack at 16 (or 32, depending on TARGET_ALIGN_BY_32) bits. +mstack-align +Target Report RejectNegative Mask(STACK_ALIGN) Undocumented + +mno-stack-align +Target Report RejectNegative InverseMask(STACK_ALIGN) +Do not tune stack alignment + +; TARGET_DATA_ALIGN: Whether to do alignment on individual +; modifiable objects. +mdata-align +Target Report RejectNegative Mask(DATA_ALIGN) Undocumented + +mno-data-align +Target Report RejectNegative InverseMask(DATA_ALIGN) +Do not tune writable data alignment + +; TARGET_CONST_ALIGN: Whether to do alignment on individual +; non-modifiable objects. +mconst-align +Target Report RejectNegative Mask(CONST_ALIGN) Undocumented + +mno-const-align +Target Report RejectNegative InverseMask(CONST_ALIGN) +Do not tune code and read-only data alignment + +; See cris_handle_option. +m32-bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m32bit +Target Report RejectNegative +Align code and data to 32 bits + +; See cris_handle_option. +m16-bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m16bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m8-bit +Target Report RejectNegative Undocumented + +; See cris_handle_option. +m8bit +Target Report RejectNegative +Don't align items in code or data + +; TARGET_PROLOGUE_EPILOGUE: Whether or not to omit function +; prologue and epilogue. +mprologue-epilogue +Target Report RejectNegative Mask(PROLOGUE_EPILOGUE) Undocumented + +mno-prologue-epilogue +Target Report RejectNegative InverseMask(PROLOGUE_EPILOGUE) +Do not emit function prologue or epilogue + +; We have to handle this m-option here since we can't wash it +; off in both CC1_SPEC and CC1PLUS_SPEC. + +mbest-lib-options +Target Report RejectNegative +Use the most feature-enabling options allowed by other options + +; FIXME: The following comment relates to gcc before cris.opt. +; Check if it's still valid: +; We must call it "override-" since calling it "no-" will cause +; gcc.c to forget it, if there's a "later" -mbest-lib-options. +; Kludgy, but needed for some multilibbed files. +moverride-best-lib-options +Target Report RejectNegative +Override -mbest-lib-options + +mcpu= +Target Report RejectNegative Joined Undocumented Var(cris_cpu_str) + +march= +Target Report RejectNegative Joined Var(cris_cpu_str) +-march=ARCH Generate code for the specified chip or CPU version + +mtune= +Target Report RejectNegative Joined Var(cris_tune_str) +-mtune=ARCH Tune alignment for the specified chip or CPU version + +mmax-stackframe= +Target Report RejectNegative Joined Var(cris_max_stackframe_str) +-mmax-stackframe=SIZE Warn when a stackframe is larger than the specified size + +max-stackframe= +Target Report RejectNegative Joined Undocumented Var(cris_max_stackframe_str) + +mtrap-using-break8 +Target Report Var(cris_trap_using_break8) Init(2) +Emit traps as \"break 8\", default for CRIS v3 and up. If disabled, calls to abort() are used. + +mtrap-unaligned-atomic +Target Report Var(cris_trap_unaligned_atomic) Init(2) +Emit checks causing \"break 8\" instructions to execute when applying atomic builtins on misaligned memory + +munaligned-atomic-may-use-library +Target Report Var(cris_atomics_calling_libfunc) Init(2) +Handle atomic builtins that may be applied to unaligned data by calling library functions. Overrides -mtrap-unaligned-atomic. + +; TARGET_SVINTO: Currently this just affects alignment. FIXME: +; Redundant with TARGET_ALIGN_BY_32, or put machine stuff here? +; This and the others below could just as well be variables and +; TARGET_* defines in cris.h. +Mask(SVINTO) + +; TARGET_ALIGN_BY_32: Say that all alignment specifications say +; to prefer 32 rather than 16 bits. +Mask(ALIGN_BY_32) + +; TARGET_AVOID_GOTPLT is referred to in the .c and the .md so we +; need to allocate the flag and macros here. +Mask(AVOID_GOTPLT) diff --git a/gcc-4.9/gcc/config/cris/elf.opt b/gcc-4.9/gcc/config/cris/elf.opt new file mode 100644 index 000000000..f759d0173 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/elf.opt @@ -0,0 +1,25 @@ +; ELF-specific options for the CRIS port of the compiler. + +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +melf +Target Report RejectNegative Undocumented + +sim +Driver JoinedOrMissing diff --git a/gcc-4.9/gcc/config/cris/linux.h b/gcc-4.9/gcc/config/cris/linux.h new file mode 100644 index 000000000..af27e1089 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/linux.h @@ -0,0 +1,150 @@ +/* Definitions for GCC. Part of the machine description for CRIS. + Copyright (C) 2001-2014 Free Software Foundation, Inc. + Contributed by Axis Communications. Written by Hans-Peter Nilsson. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* After the first "Node:" comment comes all preprocessor directives and + attached declarations described in the info files, the "Using and + Porting GCC" manual (uapgcc), in the same order as found in the "Target + macros" section in the gcc-2.9x CVS edition of 2000-03-17. FIXME: Not + really, but needs an update anyway. + + There is no generic copy-of-uapgcc comment, you'll have to see uapgcc + for that. If applicable, there is a CRIS-specific comment. The order + of macro definitions follow the order in the manual. Every section in + the manual (node in the info pages) has an introductory `Node: + ' comment. If no macros are defined for a section, only + the section-comment is present. */ + +/* This file defines the macros for cris-axis-linux-gnu that are not + covered by cris.h, elfos.h and (config/)linux.h. */ + +/* Make sure we have a valid TARGET_CPU_DEFAULT, so we can assume it + and take shortcuts below. */ +#ifndef TARGET_CPU_DEFAULT +#error "TARGET_CPU_DEFAULT not defined" +#elif (TARGET_CPU_DEFAULT+0) != 10 && (TARGET_CPU_DEFAULT+0) != 32 +#error "TARGET_CPU_DEFAULT must be 10 or 32, or this file be updated" +#endif + +/* Node: Instruction Output */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* Node: Driver */ +/* These macros are CRIS-specific, but used in target driver macros. */ + +#undef CRIS_CPP_SUBTARGET_SPEC +#if TARGET_CPU_DEFAULT == 32 +# define CRIS_CPP_SUBTARGET_SPEC \ + "%{pthread:-D_REENTRANT}\ + %{!march=*:%{!mcpu=*:-D__arch_v32 -D__CRIS_arch_version=32}}" +#else +# define CRIS_CPP_SUBTARGET_SPEC \ + "%{pthread:-D_REENTRANT}\ + %{!march=*:%{!mcpu=*:-D__arch_v10 -D__CRIS_arch_version=10}}" +#endif + +#undef CRIS_CC1_SUBTARGET_SPEC +#if TARGET_CPU_DEFAULT == 32 +# define CRIS_CC1_SUBTARGET_SPEC \ + "%{!march=*:%{!mcpu=*:-march=v32}}" +#define CRIS_SUBTARGET_DEFAULT_ARCH MASK_AVOID_GOTPLT +#else +# define CRIS_CC1_SUBTARGET_SPEC \ + "%{!march=*:%{!mcpu=*:-march=v10}}" +#define CRIS_SUBTARGET_DEFAULT_ARCH 0 +#endif + +#undef CRIS_ASM_SUBTARGET_SPEC +#if TARGET_CPU_DEFAULT == 32 +# define CRIS_ASM_SUBTARGET_SPEC \ + "--em=criself \ + %{!march=*:%{!mcpu=*:--march=v32}} \ + %{!fleading-underscore:--no-underscore}\ + %{fPIC|fpic|fPIE|fpie: --pic}" +#else +# define CRIS_ASM_SUBTARGET_SPEC \ + "--em=criself \ + %{!march=*:%{!mcpu=*:--march=v10}} \ + %{!fleading-underscore:--no-underscore}\ + %{fPIC|fpic|fPIE|fpie: --pic}" +#endif + +/* Previously controlled by target_flags. */ +#undef TARGET_LINUX +#define TARGET_LINUX 1 + +#undef CRIS_SUBTARGET_DEFAULT +#define CRIS_SUBTARGET_DEFAULT \ + (MASK_SVINTO \ + + MASK_ETRAX4_ADD \ + + MASK_ALIGN_BY_32 \ + + CRIS_SUBTARGET_DEFAULT_ARCH) + +#undef CRIS_DEFAULT_CPU_VERSION +#define CRIS_DEFAULT_CPU_VERSION CRIS_CPU_NG + +#define GLIBC_DYNAMIC_LINKER "/lib/ld.so.1" + +#undef CRIS_LINK_SUBTARGET_SPEC +#define CRIS_LINK_SUBTARGET_SPEC \ + "-mcrislinux\ + %{shared} %{static}\ + %{symbolic:-Bdynamic} %{static:-Bstatic}\ + %{!shared:%{!static:\ + %{rdynamic:-export-dynamic}\ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}\ + %{!r:%{O2|O3: --gc-sections}}" + + +/* Node: Run-time Target */ + +/* For the cris-*-linux* subtarget. */ +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do \ + { \ + GNU_USER_TARGET_OS_CPP_BUILTINS(); \ + if (flag_leading_underscore <= 0) \ + builtin_define ("__NO_UNDERSCORES__"); \ + } \ + while (0) + +/* Node: Type Layout */ + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* Node: Sections */ + +/* GNU/Linux has crti and crtn and does not need the + CRT_CALL_STATIC_FUNCTION trick in cris.h. */ +#undef CRT_CALL_STATIC_FUNCTION + +/* + * Local variables: + * eval: (c-set-style "gnu") + * indent-tabs-mode: t + * End: + */ diff --git a/gcc-4.9/gcc/config/cris/linux.opt b/gcc-4.9/gcc/config/cris/linux.opt new file mode 100644 index 000000000..b5a19e9ad --- /dev/null +++ b/gcc-4.9/gcc/config/cris/linux.opt @@ -0,0 +1,33 @@ +; GNU/Linux-specific options for the CRIS port of the compiler. + +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; Provide a legacy -mlinux option. +mlinux +Target Report RejectNegative Undocumented + +mno-gotplt +Target Report RejectNegative Mask(AVOID_GOTPLT) +Together with -fpic and -fPIC, do not use GOTPLT references + +; There's a small added setup cost with using GOTPLT references +; for the first (resolving) call, but should in total be a win +; both in code-size and execution-time. +mgotplt +Target Report RejectNegative InverseMask(AVOID_GOTPLT) Undocumented diff --git a/gcc-4.9/gcc/config/cris/predicates.md b/gcc-4.9/gcc/config/cris/predicates.md new file mode 100644 index 000000000..0169b0b71 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/predicates.md @@ -0,0 +1,178 @@ +;; Operand and operator predicates for the GCC CRIS port. +;; Copyright (C) 2005-2014 Free Software Foundation, Inc. + +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; Operator predicates. + +(define_predicate "cris_orthogonal_operator" + (match_code "plus, minus, ior, and, umin")) + +(define_predicate "cris_commutative_orth_op" + (match_code "plus, ior, and, umin")) + +;; By the name, you might think we should include MULT. We don't because +;; it doesn't accept the same addressing modes as the others (only +;; registers) and there's also the problem of handling TARGET_MUL_BUG. + +(define_predicate "cris_operand_extend_operator" + (match_code "plus, minus, umin")) + +(define_predicate "cris_additive_operand_extend_operator" + (match_code "plus, minus")) + +(define_predicate "cris_extend_operator" + (match_code "zero_extend, sign_extend")) + +(define_predicate "cris_plus_or_bound_operator" + (match_code "plus, umin")) + +;; Used as an operator to get a handle on a already-known-valid MEM rtx:es +;; (no need to validate the address), where some address expression parts +;; have their own match_operand. + +(define_predicate "cris_mem_op" + (match_code "mem")) + +(define_predicate "cris_load_multiple_op" + (and (match_code "parallel") + (match_test "cris_movem_load_rest_p (op, 0)"))) + +(define_predicate "cris_store_multiple_op" + (and (match_code "parallel") + (match_test "cris_store_multiple_op_p (op)"))) + + +;; Operand helper predicates. + +(define_predicate "cris_bdap_const_operand" + (and (match_code "label_ref, symbol_ref, const_int, const_double, const") + (ior (not (match_test "flag_pic")) + (match_test "cris_valid_pic_const (op, true)")))) + +(define_predicate "cris_simple_address_operand" + (ior (match_operand:SI 0 "register_operand") + (and (match_code "post_inc") + (match_test "register_operand (XEXP (op, 0), Pmode)")))) + +(define_predicate "cris_simple_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "mem") + (match_test "cris_simple_address_operand (XEXP (op, 0), + Pmode)")))) + +(define_predicate "cris_nonsp_register_operand" + (and (match_operand 0 "register_operand") + (match_test "op != stack_pointer_rtx"))) + +;; The caller needs to use :SI. +(define_predicate "cris_bdap_sign_extend_operand" +; Disabled until +; or is committed. + (match_test "0")) +; (and (match_code "sign_extend") +; (and (match_test "MEM_P (XEXP (op, 0))") +; (match_test "cris_simple_address_operand (XEXP (XEXP (op, 0), 0), +; Pmode)")))) + +;; FIXME: Should not have to test for 1. +(define_predicate "cris_scale_int_operand" + (and (match_code "const_int") + (ior (ior (match_test "op == GEN_INT (4)") + (match_test "op == const2_rtx")) + (match_test "op == const1_rtx")))) + +;; FIXME: Should be able to assume (reg int). +(define_predicate "cris_biap_mult_operand" + (and (match_code "mult") + (ior (and (match_test "register_operand (XEXP (op, 0), Pmode)") + (match_test "cris_scale_int_operand (XEXP (op, 1), Pmode)")) + (and (match_test "cris_scale_int_operand (XEXP (op, 0), Pmode)") + (match_test "register_operand (XEXP (op, 1), Pmode)"))))) + + +;; Operand predicates. + +;; This checks a part of an address, the one that is not a plain register +;; for an addressing mode using BDAP. +;; Allowed operands are either: +;; a) a register +;; b) a CONST operand (but not a symbol when generating PIC) +;; c) a [r] or [r+] in SImode, or sign-extend from HI or QI. + +(define_predicate "cris_bdap_operand" + (ior (match_operand 0 "cris_bdap_const_operand") + (ior (match_operand:SI 0 "cris_simple_operand") + (match_operand:SI 0 "cris_bdap_sign_extend_operand")))) + +;; This is similar to cris_bdap_operand: +;; It checks a part of an address, the one that is not a plain register +;; for an addressing mode using BDAP or BIAP. +;; Allowed operands are either: +;; a) a register +;; b) a CONST operand (but not a symbol when generating PIC) +;; c) a mult of (1, 2 or 4) and a register +;; d) a [r] or [r+] in SImode, or sign-extend from HI or QI. */ + +(define_predicate "cris_bdap_biap_operand" + (ior (match_operand 0 "cris_bdap_operand") + (match_operand 0 "cris_biap_mult_operand"))) + +;; Since with -fPIC, not all symbols are valid PIC symbols or indeed +;; general_operands, we have to have a predicate that matches it for the +;; "movsi" expander. +;; FIXME: Can s/special_// when PR 20413 is fixed. + +(define_special_predicate "cris_general_operand_or_symbol" + (ior (match_operand 0 "general_operand") + (and (match_code "const, symbol_ref, label_ref") + ; The following test is actually just an assertion. + (match_test "cris_pic_symbol_type_of (op) != cris_no_symbol")))) + +;; A predicate for the anon movsi expansion, one that fits a PCREL +;; operand as well as general_operand. + +(define_special_predicate "cris_general_operand_or_pic_source" + (ior (match_operand 0 "general_operand") + (and (match_test "flag_pic") + (match_test "cris_valid_pic_const (op, false)")))) + +;; Since a PLT symbol is not a general_operand, we have to have a +;; predicate that matches it when we need it. We use this in the expanded +;; "call" and "call_value" anonymous patterns. + +(define_predicate "cris_nonmemory_operand_or_callable_symbol" + (ior (match_operand 0 "nonmemory_operand") + (and (match_code "const") + (and + (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC") + (ior + (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PLT_PCREL") + (match_test "XINT (XEXP (op, 0), 1) == CRIS_UNSPEC_PCREL")))))) + +;; This matches a (MEM (general_operand)) or +;; (MEM (cris_general_operand_or_symbol)). The second one isn't a valid +;; memory_operand, so we need this predicate to recognize call +;; destinations before we change them to a PLT operand (by wrapping in +;; UNSPEC CRIS_UNSPEC_PLT). + +(define_predicate "cris_mem_call_operand" + (and (match_code "mem") + (ior (match_operand 0 "memory_operand") + (match_test "cris_general_operand_or_symbol (XEXP (op, 0), + Pmode)")))) diff --git a/gcc-4.9/gcc/config/cris/sync.md b/gcc-4.9/gcc/config/cris/sync.md new file mode 100644 index 000000000..7f10aa43d --- /dev/null +++ b/gcc-4.9/gcc/config/cris/sync.md @@ -0,0 +1,314 @@ +;; GCC machine description for CRIS atomic memory sequences. +;; Copyright (C) 2012-2014 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; The CRIS atomic support yields code in three flavors, depending on +;; the CPU for which code is generated: +;; +;; - Plain old CRIS v0 (..v8) +;; - CRIS v10 (as used in ETRAX 100 LX) +;; - CRIS v32 (as used in ETRAX FS) +;; +;; The last two alternatives are similar, of LL/SC type. They may +;; fail for other reasons; an exception, a cache miss or a bus request +;; from other parts of the system. The difference between them is +;; just in what condition-codes are used to track LL and success or +;; failure for the store. See the chapter on integral read-write +;; operations, chapter 1.13 in "ETRAX 100LX Programmers Manual", +;; +;; and chapter 2.1 in "ETRAX FS Designer's reference", +;; . +;; Note that the datum being stored has to be contained fully within a +;; cache-line to be integral. A failure to store the data integrally +;; will be flagged, but the store may still have happened in part, +;; which translates most usefully into the data having to be +;; "naturally aligned" to work. Natural alignment is verified in the +;; generated code and will by default cause for unaligned pointers a +;; "break 8" to be executed or optionally a call to abort(). Beware +;; that options -m16bit and -m8bit may cause data to be unaligned +;; where it was otherwise aligned. Data has a better chance of being +;; aligned if it is declared with e.g. __attribute__ ((__align__ (4))). +;; +;; The "plain old v0..v8 flavor" just assumes there's a single CPU in +;; the system, that no other parts of the system have access to memory +;; used for atomic accesses and since there's no user mode without +;; access to interrupt flags (another assumption), it just turns off +;; interrupts while doing the access. Here, alignment is neither +;; required nor asserted. + +(define_c_enum "" + [ + CRIS_UNSPEC_ATOMIC_OP + CRIS_UNSPEC_ATOMIC_SWAP_MEM + CRIS_UNSPEC_ATOMIC_SWAP_BOOL + ]) + +(define_constants [(CRIS_CCR_INTERRUPT_BIT 5)]) + +;; We use "mult" as a placeholder for "nand" (which does not have a +;; separate binary rtx operation) so we can use an iterator in the +;; define_expand and define_insn and avoid having a separate +;; mostly-identical copy. You will see the "mult" operator in rtl +;; dumps, but it shouldn't matter as its use has one of its operands +;; inside an unspec_volatile. + +(define_code_iterator atomic_op [plus minus ior and xor mult]) + +(define_code_attr atomic_op_name + [(plus "add") (minus "sub") (and "and") (ior "or") (xor "xor") (mult "nand")]) + +;; The operator nonatomic-operand can be memory, constant or register +;; for all but xor. We can't use memory or addressing modes with +;; side-effects though, so just use registers and literal constants. +(define_code_attr atomic_op_op_cnstr + [(plus "ri") (minus "ri") (and "ri") (ior "ri") (xor "r") (mult "ri")]) + +(define_code_attr atomic_op_op_pred + [(plus "nonmemory_operand") (minus "nonmemory_operand") + (and "nonmemory_operand") (ior "nonmemory_operand") + (xor "register_operand") (mult "nonmemory_operand")]) + +;; Pairs of these are used to insert the "not" after the "and" for nand. +(define_code_attr atomic_op_mnem_pre_op2 ;; Upper-case only to simplify testing. + [(plus "%P2") (minus "Sub.d %2") (and "And%q2 %2") (ior "Or%q2 %2") (xor "Xor %2") + (mult "aNd%q2 %2")]) + +(define_code_attr atomic_op_mnem_post_op3 + [(plus "") (minus "") (and "") (ior "") (xor "") (mult "not %3\;")]) + +;; For SImode, emit "q" for operands -31..31. +(define_mode_attr qm3 [(SI "%q3") (HI ".w") (QI ".b")]) + +(define_expand "atomic_fetch_" + [(match_operand:BWD 0 "register_operand") + (match_operand:BWD 1 "memory_operand") + (match_operand:BWD 2 "") + (match_operand 3) + (atomic_op:BWD (match_dup 0) (match_dup 1))] + "mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS" +{ + enum memmodel mmodel = (enum memmodel) INTVAL (operands[3]); + + if (mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC) + cris_emit_trap_for_misalignment (operands[1]); + + if (need_atomic_barrier_p (mmodel, true)) + expand_mem_thread_fence (mmodel); + + emit_insn (gen_cris_atomic_fetch__1 (operands[0], + operands[1], + operands[2])); + if (need_atomic_barrier_p (mmodel, false)) + expand_mem_thread_fence (mmodel); + + DONE; +}) + +(define_insn "cris_atomic_fetch__1" + [(set (match_operand:BWD 1 "memory_operand" "+Q") + (atomic_op:BWD + (unspec_volatile:BWD [(match_dup 1)] CRIS_UNSPEC_ATOMIC_OP) + ;; FIXME: improve constants more for plus, minus, and, ior. + ;; FIXME: handle memory operands without side-effects. + (match_operand:BWD 2 "" ""))) + (set (match_operand:BWD 0 "register_operand" "=&r") + (match_dup 1)) + (clobber (match_scratch:SI 3 "=&r"))] + "mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS" +{ + /* Can't be too sure; better ICE if this happens. */ + gcc_assert (!reg_overlap_mentioned_p (operands[2], operands[1])); + + if (TARGET_V32) + return + "clearf p\n" + ".Lsync.%=:\;" + "move %1,%0\;" + "move.d %0,%3\;" + ",%3\;" + "ax\;" + "move %3,%1\;" + "bcs .Lsync.%=\;" + "clearf p"; + else if (cris_cpu_version == 10) + return + "clearf\n" + ".Lsync.%=:\;" + "move %1,%0\;" + "move.d %0,%3\;" + ",%3\;" + "ax\;" + "move %3,%1\;" + "bwf .Lsync.%=\;" + "clearf"; + else + { + /* This one is for CRIS versions without load-locked-store-conditional + machinery; assume single-core-non-shared-memory without user + mode/supervisor mode distinction, and just disable interrupts + while performing the operation. + Rather than making this pattern more complex by freeing another + register or stack position to save condition codes (the value + of the interrupt-enabled bit), we check whether interrupts were + enabled before we disabled them and branch to a version + with/without afterwards re-enabling them. */ + rtx ops[5]; + + /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */ + memcpy (ops, operands, sizeof(ops)); + ops[4] = GEN_INT (CRIS_CCR_INTERRUPT_BIT); + + output_asm_insn ("move $ccr,%3\;" + "di\;" + "move %1,%0\;" + "btstq %4,%3", + ops); + return + "bmi .Lsync.irqon.%=\;" + "move.d %0,%3\;" + + ",%3\;" + "ba .Lsync.irqoff.%=\;" + "move %3,%1\n" + + ".Lsync.irqon.%=:\;" + ",%3\;" + "move %3,%1\;" + "ei\n" + ".Lsync.irqoff.%=:"; + } +}) + +;; This pattern is more-or-less assumed to always exist if any of the +;; other atomic patterns exist (see e.g. comment at the +;; can_compare_and_swap_p call in omp-low.c, 4.8 era). We'd slightly +;; prefer atomic_exchange over this, but having both would be +;; redundant. +;; FIXME: handle memory without side-effects for operand[3]. +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand") + (match_operand:BWD 1 "register_operand") + (match_operand:BWD 2 "memory_operand") + (match_operand:BWD 3 "nonmemory_operand") + (match_operand:BWD 4 "register_operand") + (match_operand 5) + (match_operand 6) + (match_operand 7)] + "mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS" +{ + enum memmodel mmodel = (enum memmodel) INTVAL (operands[6]); + + if (mode != QImode && TARGET_TRAP_UNALIGNED_ATOMIC) + cris_emit_trap_for_misalignment (operands[2]); + + if (need_atomic_barrier_p (mmodel, true)) + expand_mem_thread_fence (mmodel); + + emit_insn (gen_cris_atomic_compare_and_swap_1 (operands[0], + operands[1], + operands[2], + operands[3], + operands[4])); + if (need_atomic_barrier_p (mmodel, false)) + expand_mem_thread_fence (mmodel); + + DONE; +}) + +(define_insn "cris_atomic_compare_and_swap_1" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec_volatile:SI + [(match_operand:BWD 2 "memory_operand" "+Q") + (match_operand:BWD 3 "nonmemory_operand" "ri")] + CRIS_UNSPEC_ATOMIC_SWAP_BOOL)) + (set (match_operand:BWD 1 "register_operand" "=&r") (match_dup 2)) + (set (match_dup 2) + (unspec_volatile:BWD + [(match_dup 2) + (match_dup 3) + (match_operand:BWD 4 "register_operand" "r")] + CRIS_UNSPEC_ATOMIC_SWAP_MEM))] + "mode == QImode || !TARGET_ATOMICS_MAY_CALL_LIBFUNCS" +{ + if (TARGET_V32) + return + "\n.Lsync.repeat.%=:\;" + "clearf p\;" + "move %2,%1\;" + "cmp %3,%1\;" + "bne .Lsync.after.%=\;" + "ax\;" + + "move %4,%2\;" + "bcs .Lsync.repeat.%=\n" + ".Lsync.after.%=:\;" + "seq %0"; + else if (cris_cpu_version == 10) + return + "\n.Lsync.repeat.%=:\;" + "clearf\;" + "move %2,%1\;" + "cmp %3,%1\;" + "bne .Lsync.after.%=\;" + "ax\;" + + "move %4,%2\;" + "bwf .Lsync.repeat.%=\n" + ".Lsync.after.%=:\;" + "seq %0"; + else + { + /* This one is for CRIS versions without load-locked-store-conditional + machinery; assume single-core-non-shared-memory without user + mode/supervisor mode distinction, and just disable interrupts + while performing the operation. + Rather than making this pattern more complex by freeing another + register or stack position to save condition codes (the value + of the interrupt-enabled bit), we check whether interrupts were + enabled before we disabled them and branch to a version + with/without afterwards re-enabling them. */ + rtx ops[4]; + + /* We have no available macro to stringify CRIS_CCR_INTERRUPT_BIT. */ + memcpy (ops, operands, sizeof(ops)); + ops[3] = GEN_INT (CRIS_CCR_INTERRUPT_BIT); + + output_asm_insn ("move $ccr,%0\;" + "di\;" + "move %2,%1\;" + "btstq %3,%0", + ops); + return + "bmi .Lsync.irqon.%=\;" + "nop\;" + + "cmp %3,%1\;" + "bne .Lsync.after.%=\;" + "seq %0\;" + "ba .Lsync.after.%=\;" + "move %4,%2\n" + + ".Lsync.irqon.%=:\;" + "cmp %3,%1\;" + "bne .Lsync.after.%=\;" + "seq %0\;" + "move %4,%2\;" + "ei\n" + ".Lsync.after.%=:"; + } +}) diff --git a/gcc-4.9/gcc/config/cris/t-cris b/gcc-4.9/gcc/config/cris/t-cris new file mode 100644 index 000000000..a58566525 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/t-cris @@ -0,0 +1,29 @@ +# +# t-cris +# +# The Makefile fragment to include when compiling gcc et al for CRIS. +# +# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . +# +# The makefile macros etc. are included in the order found in the +# section "Target Fragment" in the gcc info-files (or the paper copy) of +# "Using and Porting GCC" + +$(out_object_file): gt-cris.h +gt-cris.h : s-gtype ; @true diff --git a/gcc-4.9/gcc/config/cris/t-elfmulti b/gcc-4.9/gcc/config/cris/t-elfmulti new file mode 100644 index 000000000..1e9cf72d0 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/t-elfmulti @@ -0,0 +1,31 @@ +# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = march=v8/march=v10/march=v32 +MULTILIB_DIRNAMES = v8 v10 v32 +MULTILIB_MATCHES = \ + march?v8=mcpu?v8 \ + march?v10=mcpu?etrax100lx \ + march?v10=mcpu?ng \ + march?v10=march?etrax100lx \ + march?v10=march?ng \ + march?v10=march?v11 \ + march?v10=mcpu?v11 \ + march?v10=mcpu?v10 \ + march?v32=mcpu?v32 +MULTILIB_EXTRA_OPTS = mbest-lib-options diff --git a/gcc-4.9/gcc/config/cris/t-linux b/gcc-4.9/gcc/config/cris/t-linux new file mode 100644 index 000000000..71a964936 --- /dev/null +++ b/gcc-4.9/gcc/config/cris/t-linux @@ -0,0 +1,5 @@ +# We *know* we have a limits.h in the glibc library, with extra +# definitions needed for e.g. libgfortran. +ifneq ($(inhibit_libc),true) +LIMITS_H_TEST = : +endif diff --git a/gcc-4.9/gcc/config/darwin-c.c b/gcc-4.9/gcc/config/darwin-c.c new file mode 100644 index 000000000..892ba3547 --- /dev/null +++ b/gcc-4.9/gcc/config/darwin-c.c @@ -0,0 +1,775 @@ +/* Darwin support needed only by C/C++ frontends. + Copyright (C) 2001-2014 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "cpplib.h" +#include "tree.h" +#include "target.h" +#include "incpath.h" +#include "c-family/c-common.h" +#include "c-family/c-pragma.h" +#include "c-family/c-format.h" +#include "diagnostic-core.h" +#include "flags.h" +#include "tm_p.h" +#include "cppdefault.h" +#include "prefix.h" +#include "c-family/c-target.h" +#include "c-family/c-target-def.h" +#include "cgraph.h" +#include "../../libcpp/internal.h" + +/* Pragmas. */ + +#define BAD(gmsgid) do { warning (OPT_Wpragmas, gmsgid); return; } while (0) +#define BAD2(msgid, arg) do { warning (OPT_Wpragmas, msgid, arg); return; } while (0) + +static bool using_frameworks = false; + +static const char *find_subframework_header (cpp_reader *pfile, const char *header, + cpp_dir **dirp); + +typedef struct align_stack +{ + int alignment; + struct align_stack * prev; +} align_stack; + +static struct align_stack * field_align_stack = NULL; + +/* Maintain a small stack of alignments. This is similar to pragma + pack's stack, but simpler. */ + +static void +push_field_alignment (int bit_alignment) +{ + align_stack *entry = XNEW (align_stack); + + entry->alignment = maximum_field_alignment; + entry->prev = field_align_stack; + field_align_stack = entry; + + maximum_field_alignment = bit_alignment; +} + +static void +pop_field_alignment (void) +{ + if (field_align_stack) + { + align_stack *entry = field_align_stack; + + maximum_field_alignment = entry->alignment; + field_align_stack = entry->prev; + free (entry); + } + else + error ("too many #pragma options align=reset"); +} + +/* Handlers for Darwin-specific pragmas. */ + +void +darwin_pragma_ignore (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + /* Do nothing. */ +} + +/* #pragma options align={mac68k|power|reset} */ + +void +darwin_pragma_options (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + const char *arg; + tree t, x; + + if (pragma_lex (&t) != CPP_NAME) + BAD ("malformed '#pragma options', ignoring"); + arg = IDENTIFIER_POINTER (t); + if (strcmp (arg, "align")) + BAD ("malformed '#pragma options', ignoring"); + if (pragma_lex (&t) != CPP_EQ) + BAD ("malformed '#pragma options', ignoring"); + if (pragma_lex (&t) != CPP_NAME) + BAD ("malformed '#pragma options', ignoring"); + + if (pragma_lex (&x) != CPP_EOF) + warning (OPT_Wpragmas, "junk at end of '#pragma options'"); + + arg = IDENTIFIER_POINTER (t); + if (!strcmp (arg, "mac68k")) + push_field_alignment (16); + else if (!strcmp (arg, "power")) + push_field_alignment (0); + else if (!strcmp (arg, "reset")) + pop_field_alignment (); + else + BAD ("malformed '#pragma options align={mac68k|power|reset}', ignoring"); +} + +/* #pragma unused ([var {, var}*]) */ + +void +darwin_pragma_unused (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + tree decl, x; + int tok; + + if (pragma_lex (&x) != CPP_OPEN_PAREN) + BAD ("missing '(' after '#pragma unused', ignoring"); + + while (1) + { + tok = pragma_lex (&decl); + if (tok == CPP_NAME && decl) + { + tree local = lookup_name (decl); + if (local && (TREE_CODE (local) == PARM_DECL + || TREE_CODE (local) == VAR_DECL)) + { + TREE_USED (local) = 1; + DECL_READ_P (local) = 1; + } + tok = pragma_lex (&x); + if (tok != CPP_COMMA) + break; + } + } + + if (tok != CPP_CLOSE_PAREN) + BAD ("missing ')' after '#pragma unused', ignoring"); + + if (pragma_lex (&x) != CPP_EOF) + BAD ("junk at end of '#pragma unused'"); +} + +/* Parse the ms_struct pragma. */ +void +darwin_pragma_ms_struct (cpp_reader *pfile ATTRIBUTE_UNUSED) +{ + const char *arg; + tree t; + + if (pragma_lex (&t) != CPP_NAME) + BAD ("malformed '#pragma ms_struct', ignoring"); + arg = IDENTIFIER_POINTER (t); + + if (!strcmp (arg, "on")) + darwin_ms_struct = true; + else if (!strcmp (arg, "off") || !strcmp (arg, "reset")) + darwin_ms_struct = false; + else + BAD ("malformed '#pragma ms_struct {on|off|reset}', ignoring"); + + if (pragma_lex (&t) != CPP_EOF) + BAD ("junk at end of '#pragma ms_struct'"); +} + +static struct frameworks_in_use { + size_t len; + const char *name; + cpp_dir* dir; +} *frameworks_in_use; +static int num_frameworks = 0; +static int max_frameworks = 0; + + +/* Remember which frameworks have been seen, so that we can ensure + that all uses of that framework come from the same framework. DIR + is the place where the named framework NAME, which is of length + LEN, was found. We copy the directory name from NAME, as it will be + freed by others. */ + +static void +add_framework (const char *name, size_t len, cpp_dir *dir) +{ + char *dir_name; + int i; + for (i = 0; i < num_frameworks; ++i) + { + if (len == frameworks_in_use[i].len + && strncmp (name, frameworks_in_use[i].name, len) == 0) + { + return; + } + } + if (i >= max_frameworks) + { + max_frameworks = i*2; + max_frameworks += i == 0; + frameworks_in_use = XRESIZEVEC (struct frameworks_in_use, + frameworks_in_use, max_frameworks); + } + dir_name = XNEWVEC (char, len + 1); + memcpy (dir_name, name, len); + dir_name[len] = '\0'; + frameworks_in_use[num_frameworks].name = dir_name; + frameworks_in_use[num_frameworks].len = len; + frameworks_in_use[num_frameworks].dir = dir; + ++num_frameworks; +} + +/* Recall if we have seen the named framework NAME, before, and where + we saw it. NAME is LEN bytes long. The return value is the place + where it was seen before. */ + +static struct cpp_dir* +find_framework (const char *name, size_t len) +{ + int i; + for (i = 0; i < num_frameworks; ++i) + { + if (len == frameworks_in_use[i].len + && strncmp (name, frameworks_in_use[i].name, len) == 0) + { + return frameworks_in_use[i].dir; + } + } + return 0; +} + +/* There are two directories in a framework that contain header files, + Headers and PrivateHeaders. We search Headers first as it is more + common to upgrade a header from PrivateHeaders to Headers and when + that is done, the old one might hang around and be out of data, + causing grief. */ + +struct framework_header {const char * dirName; int dirNameLen; }; +static struct framework_header framework_header_dirs[] = { + { "Headers", 7 }, + { "PrivateHeaders", 14 }, + { NULL, 0 } +}; + +/* Returns a pointer to a malloced string that contains the real pathname + to the file, given the base name and the name. */ + +static char * +framework_construct_pathname (const char *fname, cpp_dir *dir) +{ + const char *buf; + size_t fname_len, frname_len; + cpp_dir *fast_dir; + char *frname; + struct stat st; + int i; + + /* Framework names must have a / in them. */ + buf = strchr (fname, '/'); + if (buf) + fname_len = buf - fname; + else + return 0; + + fast_dir = find_framework (fname, fname_len); + + /* Framework includes must all come from one framework. */ + if (fast_dir && dir != fast_dir) + return 0; + + frname = XNEWVEC (char, strlen (fname) + dir->len + 2 + + strlen(".framework/") + strlen("PrivateHeaders")); + strncpy (&frname[0], dir->name, dir->len); + frname_len = dir->len; + if (frname_len && frname[frname_len-1] != '/') + frname[frname_len++] = '/'; + strncpy (&frname[frname_len], fname, fname_len); + frname_len += fname_len; + strncpy (&frname[frname_len], ".framework/", strlen (".framework/")); + frname_len += strlen (".framework/"); + + if (fast_dir == 0) + { + frname[frname_len-1] = 0; + if (stat (frname, &st) == 0) + { + /* As soon as we find the first instance of the framework, + we stop and never use any later instance of that + framework. */ + add_framework (fname, fname_len, dir); + } + else + { + /* If we can't find the parent directory, no point looking + further. */ + free (frname); + return 0; + } + frname[frname_len-1] = '/'; + } + + /* Append framework_header_dirs and header file name */ + for (i = 0; framework_header_dirs[i].dirName; i++) + { + strncpy (&frname[frname_len], + framework_header_dirs[i].dirName, + framework_header_dirs[i].dirNameLen); + strcpy (&frname[frname_len + framework_header_dirs[i].dirNameLen], + &fname[fname_len]); + + if (stat (frname, &st) == 0) + return frname; + } + + free (frname); + return 0; +} + +/* Search for FNAME in sub-frameworks. pname is the context that we + wish to search in. Return the path the file was found at, + otherwise return 0. */ + +static const char* +find_subframework_file (const char *fname, const char *pname) +{ + char *sfrname; + const char *dot_framework = ".framework/"; + const char *bufptr; + int sfrname_len, i, fname_len; + struct cpp_dir *fast_dir; + static struct cpp_dir subframe_dir; + struct stat st; + + bufptr = strchr (fname, '/'); + + /* Subframework files must have / in the name. */ + if (bufptr == 0) + return 0; + + fname_len = bufptr - fname; + fast_dir = find_framework (fname, fname_len); + + /* Sub framework header filename includes parent framework name and + header name in the "CarbonCore/OSUtils.h" form. If it does not + include slash it is not a sub framework include. */ + bufptr = strstr (pname, dot_framework); + + /* If the parent header is not of any framework, then this header + cannot be part of any subframework. */ + if (!bufptr) + return 0; + + /* Now translate. For example, +- bufptr + fname = CarbonCore/OSUtils.h | + pname = /System/Library/Frameworks/Foundation.framework/Headers/Foundation.h + into + sfrname = /System/Library/Frameworks/Foundation.framework/Frameworks/CarbonCore.framework/Headers/OSUtils.h */ + + sfrname = XNEWVEC (char, strlen (pname) + strlen (fname) + 2 + + strlen ("Frameworks/") + strlen (".framework/") + + strlen ("PrivateHeaders")); + + bufptr += strlen (dot_framework); + + sfrname_len = bufptr - pname; + + strncpy (&sfrname[0], pname, sfrname_len); + + strncpy (&sfrname[sfrname_len], "Frameworks/", strlen ("Frameworks/")); + sfrname_len += strlen("Frameworks/"); + + strncpy (&sfrname[sfrname_len], fname, fname_len); + sfrname_len += fname_len; + + strncpy (&sfrname[sfrname_len], ".framework/", strlen (".framework/")); + sfrname_len += strlen (".framework/"); + + /* Append framework_header_dirs and header file name */ + for (i = 0; framework_header_dirs[i].dirName; i++) + { + strncpy (&sfrname[sfrname_len], + framework_header_dirs[i].dirName, + framework_header_dirs[i].dirNameLen); + strcpy (&sfrname[sfrname_len + framework_header_dirs[i].dirNameLen], + &fname[fname_len]); + + if (stat (sfrname, &st) == 0) + { + if (fast_dir != &subframe_dir) + { + if (fast_dir) + warning (0, "subframework include %s conflicts with framework include", + fname); + else + add_framework (fname, fname_len, &subframe_dir); + } + + return sfrname; + } + } + free (sfrname); + + return 0; +} + +/* Add PATH to the system includes. PATH must be malloc-ed and + NUL-terminated. System framework paths are C++ aware. */ + +static void +add_system_framework_path (char *path) +{ + int cxx_aware = 1; + cpp_dir *p; + + p = XNEW (cpp_dir); + p->next = NULL; + p->name = path; + p->sysp = 1 + !cxx_aware; + p->construct = framework_construct_pathname; + using_frameworks = 1; + + add_cpp_dir_path (p, SYSTEM); +} + +/* Add PATH to the bracket includes. PATH must be malloc-ed and + NUL-terminated. */ + +void +add_framework_path (char *path) +{ + cpp_dir *p; + + p = XNEW (cpp_dir); + p->next = NULL; + p->name = path; + p->sysp = 0; + p->construct = framework_construct_pathname; + using_frameworks = 1; + + add_cpp_dir_path (p, BRACKET); +} + +static const char *framework_defaults [] = + { + "/System/Library/Frameworks", + "/Library/Frameworks", + }; + +/* Register the GNU objective-C runtime include path if STDINC. */ + +void +darwin_register_objc_includes (const char *sysroot, const char *iprefix, + int stdinc) +{ + const char *fname; + size_t len; + /* We do not do anything if we do not want the standard includes. */ + if (!stdinc) + return; + + fname = GCC_INCLUDE_DIR "-gnu-runtime"; + + /* Register the GNU OBJC runtime include path if we are compiling OBJC + with GNU-runtime. */ + + if (c_dialect_objc () && !flag_next_runtime) + { + char *str; + /* See if our directory starts with the standard prefix. + "Translate" them, i.e. replace /usr/local/lib/gcc... with + IPREFIX and search them first. */ + if (iprefix && (len = cpp_GCC_INCLUDE_DIR_len) != 0 && !sysroot + && !strncmp (fname, cpp_GCC_INCLUDE_DIR, len)) + { + str = concat (iprefix, fname + len, NULL); + /* FIXME: wrap the headers for C++awareness. */ + add_path (str, SYSTEM, /*c++aware=*/false, false); + } + + /* Should this directory start with the sysroot? */ + if (sysroot) + str = concat (sysroot, fname, NULL); + else + str = update_path (fname, ""); + + add_path (str, SYSTEM, /*c++aware=*/false, false); + } +} + + +/* Register all the system framework paths if STDINC is true and setup + the missing_header callback for subframework searching if any + frameworks had been registered. */ + +void +darwin_register_frameworks (const char *sysroot, + const char *iprefix ATTRIBUTE_UNUSED, int stdinc) +{ + if (stdinc) + { + size_t i; + + /* Setup default search path for frameworks. */ + for (i=0; imissing_header = find_subframework_header; +} + +/* Search for HEADER in context dependent way. The return value is + the malloced name of a header to try and open, if any, or NULL + otherwise. This is called after normal header lookup processing + fails to find a header. We search each file in the include stack, + using FUNC, starting from the most deeply nested include and + finishing with the main input file. We stop searching when FUNC + returns nonzero. */ + +static const char* +find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp) +{ + const char *fname = header; + struct cpp_buffer *b; + const char *n; + + for (b = cpp_get_buffer (pfile); + b && cpp_get_file (b) && cpp_get_path (cpp_get_file (b)); + b = cpp_get_prev (b)) + { + n = find_subframework_file (fname, cpp_get_path (cpp_get_file (b))); + if (n) + { + /* Logically, the place where we found the subframework is + the place where we found the Framework that contains the + subframework. This is useful for tracking wether or not + we are in a system header. */ + *dirp = cpp_get_dir (cpp_get_file (b)); + return n; + } + } + + return 0; +} + +/* Return the value of darwin_macosx_version_min suitable for the + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ macro, + so '10.4.2' becomes 1040. The lowest digit is always zero. + Print a warning if the version number can't be understood. */ +static const char * +version_as_macro (void) +{ + static char result[] = "1000"; + + if (strncmp (darwin_macosx_version_min, "10.", 3) != 0) + goto fail; + if (! ISDIGIT (darwin_macosx_version_min[3])) + goto fail; + result[2] = darwin_macosx_version_min[3]; + if (darwin_macosx_version_min[4] != '\0' + && darwin_macosx_version_min[4] != '.') + goto fail; + + return result; + + fail: + error ("unknown value %qs of -mmacosx-version-min", + darwin_macosx_version_min); + return "1000"; +} + +/* Define additional CPP flags for Darwin. */ + +#define builtin_define(TXT) cpp_define (pfile, TXT) + +void +darwin_cpp_builtins (cpp_reader *pfile) +{ + builtin_define ("__MACH__"); + builtin_define ("__APPLE__"); + + /* __APPLE_CC__ is defined as some old Apple include files expect it + to be defined and won't work if it isn't. */ + builtin_define_with_value ("__APPLE_CC__", "1", false); + + if (darwin_constant_cfstrings) + builtin_define ("__CONSTANT_CFSTRINGS__"); + + builtin_define_with_value ("__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__", + version_as_macro(), false); + + /* Since we do not (at 4.6) support ObjC gc for the NeXT runtime, the + following will cause a syntax error if one tries to compile gc attributed + items. However, without this, NeXT system headers cannot be parsed + properly (on systems >= darwin 9). */ + if (flag_objc_gc) + { + builtin_define ("__strong=__attribute__((objc_gc(strong)))"); + builtin_define ("__weak=__attribute__((objc_gc(weak)))"); + builtin_define ("__OBJC_GC__"); + } + else + { + builtin_define ("__strong="); + builtin_define ("__weak="); + } + + if (CPP_OPTION (pfile, objc) && flag_objc_abi == 2) + builtin_define ("__OBJC2__"); +} + +/* Handle C family front-end options. */ + +static bool +handle_c_option (size_t code, + const char *arg, + int value ATTRIBUTE_UNUSED) +{ + switch (code) + { + default: + /* Unrecognized options that we said we'd handle turn into + errors if not listed here. */ + return false; + + case OPT_iframework: + add_system_framework_path (xstrdup (arg)); + break; + + case OPT_fapple_kext: + ; + } + + /* We recognized the option. */ + return true; +} + +/* Allow ObjC* access to CFStrings. */ +static tree +darwin_objc_construct_string (tree str) +{ + if (!darwin_constant_cfstrings) + { + /* Even though we are not using CFStrings, place our literal + into the cfstring_htab hash table, so that the + darwin_constant_cfstring_p() function will see it. */ + darwin_enter_string_into_cfstring_table (str); + /* Fall back to NSConstantString. */ + return NULL_TREE; + } + + return darwin_build_constant_cfstring (str); +} + +/* The string ref type is created as CFStringRef by therefore, we + must match for it explicitly, since it's outside the gcc code. */ + +static bool +darwin_cfstring_ref_p (const_tree strp) +{ + tree tn; + if (!strp || TREE_CODE (strp) != POINTER_TYPE) + return false; + + tn = TYPE_NAME (strp); + if (tn) + tn = DECL_NAME (tn); + return (tn + && IDENTIFIER_POINTER (tn) + && !strncmp (IDENTIFIER_POINTER (tn), "CFStringRef", 8)); +} + +/* At present the behavior of this is undefined and it does nothing. */ +static void +darwin_check_cfstring_format_arg (tree ARG_UNUSED (format_arg), + tree ARG_UNUSED (args_list)) +{ +} + +/* The extra format types we recognize. */ +EXPORTED_CONST format_kind_info darwin_additional_format_types[] = { + { "CFString", NULL, NULL, NULL, NULL, + NULL, NULL, + FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0, + NULL, NULL + } +}; + + +/* Support routines to dump the class references for NeXT ABI v1, aka + 32-bits ObjC-2.0, as top-level asms. + The following two functions should only be called from + objc/objc-next-runtime-abi-01.c. */ + +static void +darwin_objc_declare_unresolved_class_reference (const char *name) +{ + const char *lazy_reference = ".lazy_reference\t"; + const char *hard_reference = ".reference\t"; + const char *reference = MACHOPIC_INDIRECT ? lazy_reference : hard_reference; + size_t len = strlen (reference) + strlen(name) + 2; + char *buf = (char *) alloca (len); + + gcc_checking_assert (!strncmp (name, ".objc_class_name_", 17)); + + snprintf (buf, len, "%s%s", reference, name); + add_asm_node (build_string (strlen (buf), buf)); +} + +static void +darwin_objc_declare_class_definition (const char *name) +{ + const char *xname = targetm.strip_name_encoding (name); + size_t len = strlen (xname) + 7 + 5; + char *buf = (char *) alloca (len); + + gcc_checking_assert (!strncmp (name, ".objc_class_name_", 17) + || !strncmp (name, "*.objc_category_name_", 21)); + + /* Mimic default_globalize_label. */ + snprintf (buf, len, ".globl\t%s", xname); + add_asm_node (build_string (strlen (buf), buf)); + + snprintf (buf, len, "%s = 0", xname); + add_asm_node (build_string (strlen (buf), buf)); +} + +#undef TARGET_HANDLE_C_OPTION +#define TARGET_HANDLE_C_OPTION handle_c_option + +#undef TARGET_OBJC_CONSTRUCT_STRING_OBJECT +#define TARGET_OBJC_CONSTRUCT_STRING_OBJECT darwin_objc_construct_string + +#undef TARGET_OBJC_DECLARE_UNRESOLVED_CLASS_REFERENCE +#define TARGET_OBJC_DECLARE_UNRESOLVED_CLASS_REFERENCE \ + darwin_objc_declare_unresolved_class_reference + +#undef TARGET_OBJC_DECLARE_CLASS_DEFINITION +#define TARGET_OBJC_DECLARE_CLASS_DEFINITION \ + darwin_objc_declare_class_definition + +#undef TARGET_STRING_OBJECT_REF_TYPE_P +#define TARGET_STRING_OBJECT_REF_TYPE_P darwin_cfstring_ref_p + +#undef TARGET_CHECK_STRING_OBJECT_FORMAT_ARG +#define TARGET_CHECK_STRING_OBJECT_FORMAT_ARG darwin_check_cfstring_format_arg + +struct gcc_targetcm targetcm = TARGETCM_INITIALIZER; diff --git a/gcc-4.9/gcc/config/darwin-driver.c b/gcc-4.9/gcc/config/darwin-driver.c new file mode 100644 index 000000000..8b6ae9391 --- /dev/null +++ b/gcc-4.9/gcc/config/darwin-driver.c @@ -0,0 +1,224 @@ +/* Additional functions for the GCC driver on Darwin native. + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "gcc.h" +#include "opts.h" + +#ifndef CROSS_DIRECTORY_STRUCTURE +#include +#include "xregex.h" + +static bool +darwin_find_version_from_kernel (char *new_flag) +{ + char osversion[32]; + size_t osversion_len = sizeof (osversion) - 1; + static int osversion_name[2] = { CTL_KERN, KERN_OSRELEASE }; + int major_vers; + char minor_vers[6]; + char * version_p; + char * version_pend; + + /* Determine the version of the running OS. If we can't, warn user, + and do nothing. */ + if (sysctl (osversion_name, ARRAY_SIZE (osversion_name), osversion, + &osversion_len, NULL, 0) == -1) + { + warning (0, "sysctl for kern.osversion failed: %m"); + return false; + } + + /* Try to parse the first two parts of the OS version number. Warn + user and return if it doesn't make sense. */ + if (! ISDIGIT (osversion[0])) + goto parse_failed; + major_vers = osversion[0] - '0'; + version_p = osversion + 1; + if (ISDIGIT (*version_p)) + major_vers = major_vers * 10 + (*version_p++ - '0'); + if (major_vers > 4 + 9) + goto parse_failed; + if (*version_p++ != '.') + goto parse_failed; + version_pend = strchr(version_p, '.'); + if (!version_pend) + goto parse_failed; + if (! ISDIGIT (*version_p)) + goto parse_failed; + strncpy(minor_vers, version_p, version_pend - version_p); + minor_vers[version_pend - version_p] = '\0'; + + /* The major kernel version number is 4 plus the second OS version + component. */ + if (major_vers - 4 <= 4) + /* On 10.4 and earlier, the old linker is used which does not + support three-component system versions. */ + sprintf (new_flag, "10.%d", major_vers - 4); + else + sprintf (new_flag, "10.%d.%s", major_vers - 4, + minor_vers); + + return true; + + parse_failed: + warning (0, "couldn%'t understand kern.osversion %q.*s", + (int) osversion_len, osversion); + return false; +} + +#endif + +/* When running on a Darwin system and using that system's headers and + libraries, default the -mmacosx-version-min flag to be the version + of the system on which the compiler is running. + + When building cross or native cross compilers, default to the OSX + version of the target (as provided by the most specific target header + included in tm.h). This may be overidden by setting the flag explicitly + (or by the MACOSX_DEPLOYMENT_TARGET environment). */ + +static void +darwin_default_min_version (unsigned int *decoded_options_count, + struct cl_decoded_option **decoded_options) +{ + const unsigned int argc = *decoded_options_count; + struct cl_decoded_option *const argv = *decoded_options; + unsigned int i; + static char new_flag[sizeof ("10.0.0") + 6]; + + /* If the command-line is empty, just return. */ + if (argc <= 1) + return; + + /* Don't do this if the user specified -mmacosx-version-min= or + -mno-macosx-version-min. */ + for (i = 1; i < argc; i++) + if (argv[i].opt_index == OPT_mmacosx_version_min_) + return; + + /* Retrieve the deployment target from the environment and insert + it as a flag. */ + { + const char * macosx_deployment_target; + macosx_deployment_target = getenv ("MACOSX_DEPLOYMENT_TARGET"); + if (macosx_deployment_target + /* Apparently, an empty string for MACOSX_DEPLOYMENT_TARGET means + "use the default". Or, possibly "use 10.1". We choose + to ignore the environment variable, as if it was never set. */ + && macosx_deployment_target[0]) + { + ++*decoded_options_count; + *decoded_options = XNEWVEC (struct cl_decoded_option, + *decoded_options_count); + (*decoded_options)[0] = argv[0]; + generate_option (OPT_mmacosx_version_min_, macosx_deployment_target, + 1, CL_DRIVER, &(*decoded_options)[1]); + memcpy (*decoded_options + 2, argv + 1, + (argc - 1) * sizeof (struct cl_decoded_option)); + return; + } + } + +#ifndef CROSS_DIRECTORY_STRUCTURE + + /* Try to find the version from the kernel, if we fail - we print a message + and give up. */ + if (!darwin_find_version_from_kernel (new_flag)) + return; + +#else + + /* For cross-compilers, default to the target OS version. */ + + strncpy (new_flag, DEF_MIN_OSX_VERSION, sizeof (new_flag)); + +#endif /* CROSS_DIRECTORY_STRUCTURE */ + + /* Add the new flag. */ + ++*decoded_options_count; + *decoded_options = XNEWVEC (struct cl_decoded_option, + *decoded_options_count); + (*decoded_options)[0] = argv[0]; + generate_option (OPT_mmacosx_version_min_, new_flag, + 1, CL_DRIVER, &(*decoded_options)[1]); + memcpy (*decoded_options + 2, argv + 1, + (argc - 1) * sizeof (struct cl_decoded_option)); + return; + +} + +/* Translate -filelist and -framework options in *DECODED_OPTIONS + (size *DECODED_OPTIONS_COUNT) to use -Xlinker so that they are + considered to be linker inputs in the case that no other inputs are + specified. Handling these options in DRIVER_SELF_SPECS does not + suffice because specs are too late to add linker inputs, and + handling them in LINK_SPEC does not suffice because the linker will + not be called if there are no other inputs. When native, also + default the -mmacosx-version-min flag. */ + +void +darwin_driver_init (unsigned int *decoded_options_count, + struct cl_decoded_option **decoded_options) +{ + unsigned int i; + + for (i = 1; i < *decoded_options_count; i++) + { + if ((*decoded_options)[i].errors & CL_ERR_MISSING_ARG) + continue; + switch ((*decoded_options)[i].opt_index) + { +#if DARWIN_X86 + case OPT_arch: + if (!strcmp ((*decoded_options)[i].arg, "i386")) + generate_option (OPT_m32, NULL, 1, CL_DRIVER, &(*decoded_options)[i]); + else if (!strcmp ((*decoded_options)[i].arg, "x86_64")) + generate_option (OPT_m64, NULL, 1, CL_DRIVER, &(*decoded_options)[i]); + break; +#endif + + case OPT_filelist: + case OPT_framework: + ++*decoded_options_count; + *decoded_options = XRESIZEVEC (struct cl_decoded_option, + *decoded_options, + *decoded_options_count); + memmove (*decoded_options + i + 2, + *decoded_options + i + 1, + ((*decoded_options_count - i - 2) + * sizeof (struct cl_decoded_option))); + generate_option (OPT_Xlinker, (*decoded_options)[i].arg, 1, + CL_DRIVER, &(*decoded_options)[i + 1]); + generate_option (OPT_Xlinker, + (*decoded_options)[i].canonical_option[0], 1, + CL_DRIVER, &(*decoded_options)[i]); + break; + + default: + break; + } + } + + darwin_default_min_version (decoded_options_count, decoded_options); +} diff --git a/gcc-4.9/gcc/config/darwin-f.c b/gcc-4.9/gcc/config/darwin-f.c new file mode 100644 index 000000000..736df5b08 --- /dev/null +++ b/gcc-4.9/gcc/config/darwin-f.c @@ -0,0 +1,60 @@ +/* Darwin support needed only by Fortran frontends. + Copyright (C) 2008-2014 Free Software Foundation, Inc. + Contributed by Daniel Franke. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +/* Provide stubs for the hooks defined by darwin.h + TARGET_EXTRA_PRE_INCLUDES, TARGET_EXTRA_INCLUDES + + As both, gcc and gfortran link in incpath.o, we can not + conditionally undefine said hooks if fortran is build. + However, we can define do-nothing stubs of said hooks as + we are not interested in objc include files in Fortran. + + The hooks original purpose (see also darwin-c.c): + * darwin_register_objc_includes + Register the GNU objective-C runtime include path if STDINC. + + * darwin_register_frameworks + Register all the system framework paths if STDINC is true and setup + the missing_header callback for subframework searching if any + frameworks had been registered. */ + + +#include "ansidecl.h" + +/* Prototypes for functions below to avoid a lengthy list of includes + to achieve the same. */ +void darwin_register_objc_includes (const char *, const char *, int); +void darwin_register_frameworks (const char *, const char *, int); + + +void +darwin_register_objc_includes (const char *sysroot ATTRIBUTE_UNUSED, + const char *iprefix ATTRIBUTE_UNUSED, + int stdinc ATTRIBUTE_UNUSED) +{ +} + +void +darwin_register_frameworks (const char *sysroot ATTRIBUTE_UNUSED, + const char *iprefix ATTRIBUTE_UNUSED, + int stdinc ATTRIBUTE_UNUSED) +{ +} diff --git a/gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def b/gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def new file mode 100644 index 000000000..dc55bb674 --- /dev/null +++ b/gcc-4.9/gcc/config/darwin-ppc-ldouble-patch.def @@ -0,0 +1,113 @@ +/* Copyright (C) 2008-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +PATCH_BUILTIN (BUILT_IN_ACOSHL) +PATCH_BUILTIN (BUILT_IN_ACOSL) +PATCH_BUILTIN (BUILT_IN_ASINHL) +PATCH_BUILTIN (BUILT_IN_ASINL) +PATCH_BUILTIN (BUILT_IN_ATAN2L) +PATCH_BUILTIN (BUILT_IN_ATANHL) +PATCH_BUILTIN (BUILT_IN_ATANL) +PATCH_BUILTIN (BUILT_IN_CABSL) +PATCH_BUILTIN (BUILT_IN_CACOSHL) +PATCH_BUILTIN (BUILT_IN_CACOSL) +PATCH_BUILTIN (BUILT_IN_CARGL) +PATCH_BUILTIN (BUILT_IN_CASINHL) +PATCH_BUILTIN (BUILT_IN_CASINL) +PATCH_BUILTIN (BUILT_IN_CATANHL) +PATCH_BUILTIN (BUILT_IN_CATANL) +PATCH_BUILTIN (BUILT_IN_CBRTL) +PATCH_BUILTIN (BUILT_IN_CCOSHL) +PATCH_BUILTIN (BUILT_IN_CCOSL) +PATCH_BUILTIN (BUILT_IN_CEILL) +PATCH_BUILTIN (BUILT_IN_CEXPL) +PATCH_BUILTIN (BUILT_IN_CIMAGL) +PATCH_BUILTIN (BUILT_IN_CLOGL) +PATCH_BUILTIN (BUILT_IN_CONJL) +PATCH_BUILTIN (BUILT_IN_COPYSIGNL) +PATCH_BUILTIN (BUILT_IN_COSHL) +PATCH_BUILTIN (BUILT_IN_COSL) +PATCH_BUILTIN (BUILT_IN_CPOWL) +PATCH_BUILTIN (BUILT_IN_CPROJL) +PATCH_BUILTIN (BUILT_IN_CREALL) +PATCH_BUILTIN (BUILT_IN_CSINHL) +PATCH_BUILTIN (BUILT_IN_CSINL) +PATCH_BUILTIN (BUILT_IN_CSQRTL) +PATCH_BUILTIN (BUILT_IN_CTANHL) +PATCH_BUILTIN (BUILT_IN_CTANL) +PATCH_BUILTIN (BUILT_IN_ERFCL) +PATCH_BUILTIN (BUILT_IN_ERFL) +PATCH_BUILTIN (BUILT_IN_EXP2L) +PATCH_BUILTIN (BUILT_IN_EXPL) +PATCH_BUILTIN (BUILT_IN_EXPM1L) +PATCH_BUILTIN (BUILT_IN_FABSL) +PATCH_BUILTIN (BUILT_IN_FDIML) +PATCH_BUILTIN (BUILT_IN_FLOORL) +PATCH_BUILTIN (BUILT_IN_FMAL) +PATCH_BUILTIN (BUILT_IN_FMAXL) +PATCH_BUILTIN (BUILT_IN_FMINL) +PATCH_BUILTIN (BUILT_IN_FMODL) +PATCH_BUILTIN (BUILT_IN_FREXPL) +PATCH_BUILTIN (BUILT_IN_HYPOTL) +PATCH_BUILTIN (BUILT_IN_ILOGBL) +PATCH_BUILTIN (BUILT_IN_LDEXPL) +PATCH_BUILTIN (BUILT_IN_LGAMMAL) +PATCH_BUILTIN (BUILT_IN_LLRINTL) +PATCH_BUILTIN (BUILT_IN_LLROUNDL) +PATCH_BUILTIN (BUILT_IN_LOG10L) +PATCH_BUILTIN (BUILT_IN_LOG1PL) +PATCH_BUILTIN (BUILT_IN_LOG2L) +PATCH_BUILTIN (BUILT_IN_LOGBL) +PATCH_BUILTIN (BUILT_IN_LOGL) +PATCH_BUILTIN (BUILT_IN_LRINTL) +PATCH_BUILTIN (BUILT_IN_LROUNDL) +PATCH_BUILTIN (BUILT_IN_MODFL) +PATCH_BUILTIN (BUILT_IN_NANL) +PATCH_BUILTIN (BUILT_IN_NEARBYINTL) +PATCH_BUILTIN (BUILT_IN_NEXTAFTERL) +PATCH_BUILTIN (BUILT_IN_NEXTTOWARDL) +PATCH_BUILTIN (BUILT_IN_POWL) +PATCH_BUILTIN (BUILT_IN_REMAINDERL) +PATCH_BUILTIN (BUILT_IN_REMQUOL) +PATCH_BUILTIN (BUILT_IN_RINTL) +PATCH_BUILTIN (BUILT_IN_ROUNDL) +PATCH_BUILTIN (BUILT_IN_SCALBLNL) +PATCH_BUILTIN (BUILT_IN_SCALBNL) +PATCH_BUILTIN (BUILT_IN_SINHL) +PATCH_BUILTIN (BUILT_IN_SINL) +PATCH_BUILTIN (BUILT_IN_SQRTL) +PATCH_BUILTIN (BUILT_IN_TANHL) +PATCH_BUILTIN (BUILT_IN_TANL) +PATCH_BUILTIN (BUILT_IN_TGAMMAL) +PATCH_BUILTIN (BUILT_IN_TRUNCL) + +PATCH_BUILTIN_NO64 (BUILT_IN_VFPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VFSCANF) +PATCH_BUILTIN_NO64 (BUILT_IN_VPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSCANF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSNPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSPRINTF) +PATCH_BUILTIN_NO64 (BUILT_IN_VSSCANF) + +PATCH_BUILTIN_VARIADIC (BUILT_IN_FPRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_FSCANF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_PRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SCANF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SNPRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SPRINTF) +PATCH_BUILTIN_VARIADIC (BUILT_IN_SSCANF) diff --git a/gcc-4.9/gcc/config/darwin-protos.h b/gcc-4.9/gcc/config/darwin-protos.h new file mode 100644 index 000000000..20974c19e --- /dev/null +++ b/gcc-4.9/gcc/config/darwin-protos.h @@ -0,0 +1,127 @@ +/* Prototypes. + Copyright (C) 2001-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern void darwin_init_sections (void); +extern int name_needs_quotes (const char *); + +extern void machopic_validate_stub_or_non_lazy_ptr (const char *); + +extern void machopic_output_function_base_name (FILE *); +extern const char *machopic_indirection_name (rtx, bool); +extern const char *machopic_mcount_stub_name (void); +extern bool machopic_should_output_picbase_label (void); +extern const char *machopic_get_function_picbase (void); + +#ifdef RTX_CODE + +extern rtx machopic_gen_offset (rtx); +extern int machopic_operand_p (rtx); +extern int machopic_symbol_defined_p (rtx sym_ref); +extern enum machopic_addr_class machopic_classify_symbol (rtx); + +extern rtx machopic_indirect_data_reference (rtx, rtx); +extern rtx machopic_indirect_call_target (rtx); +extern rtx machopic_legitimize_pic_address (rtx, enum machine_mode, rtx); + +extern void machopic_asm_out_constructor (rtx, int); +extern void machopic_asm_out_destructor (rtx, int); +extern section *machopic_select_rtx_section (enum machine_mode, rtx, + unsigned HOST_WIDE_INT); +#endif /* RTX_CODE */ + +#ifdef TREE_CODE + +extern void machopic_define_symbol (rtx); +extern void darwin_encode_section_info (tree, rtx, int); +extern void darwin_set_default_type_attributes (tree); + +#endif /* TREE_CODE */ + +extern void machopic_finish (FILE *); + +extern int machopic_reloc_rw_mask (void); +extern section *machopic_select_section (tree, int, unsigned HOST_WIDE_INT); + +extern section *darwin_function_section (tree, enum node_frequency, bool, bool); +extern section *darwin_tm_clone_table_section (void); +extern void darwin_function_switched_text_sections (FILE *, tree, bool); + +extern void darwin_unique_section (tree decl, int reloc); +extern void darwin_asm_named_section (const char *, unsigned int, tree); +extern void darwin_non_lazy_pcrel (FILE *, rtx); + +extern void darwin_emit_unwind_label (FILE *, tree, int, int); +extern void darwin_emit_except_table_label (FILE *); + +extern void darwin_pragma_ignore (struct cpp_reader *); +extern void darwin_pragma_options (struct cpp_reader *); +extern void darwin_pragma_unused (struct cpp_reader *); +extern void darwin_pragma_ms_struct (struct cpp_reader *); + +extern void darwin_file_start (void); +extern void darwin_file_end (void); + +extern void darwin_asm_lto_start (void); +extern void darwin_asm_lto_end (void); + +extern void darwin_mark_decl_preserved (const char *); + +extern tree darwin_handle_kext_attribute (tree *, tree, tree, int, bool *); +extern tree darwin_handle_weak_import_attribute (tree *node, tree name, + tree args, int flags, + bool * no_add_attrs); +extern void machopic_output_stub (FILE *, const char *, const char *); +extern void darwin_globalize_label (FILE *, const char *); +extern void darwin_assemble_visibility (tree, int); + +extern void darwin_asm_output_dwarf_delta (FILE *, int, const char *, + const char *); +extern void darwin_asm_output_dwarf_offset (FILE *, int, const char *, + section *); + +extern void darwin_asm_declare_object_name (FILE *, const char *, tree); +extern void darwin_asm_declare_constant_name (FILE *, const char *, + const_tree, HOST_WIDE_INT); + +extern void darwin_output_aligned_bss (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, unsigned int); + +extern void darwin_asm_output_aligned_decl_local (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); +extern void darwin_asm_output_aligned_decl_common (FILE *, tree, const char *, + unsigned HOST_WIDE_INT, + unsigned int); + +extern bool darwin_binds_local_p (const_tree); +extern void darwin_cpp_builtins (struct cpp_reader *); + +extern tree darwin_init_cfstring_builtins (unsigned); +extern tree darwin_fold_builtin (tree, int, tree *, bool); +extern bool darwin_cfstring_p (tree); +extern tree darwin_build_constant_cfstring (tree); +extern void darwin_enter_string_into_cfstring_table (tree); + +extern void darwin_asm_output_anchor (rtx symbol); +extern bool darwin_use_anchors_for_symbol_p (const_rtx symbol); +extern bool darwin_kextabi_p (void); +extern void darwin_override_options (void); +extern void darwin_patch_builtins (void); +extern void darwin_rename_builtins (void); +extern bool darwin_libc_has_function (enum function_class fn_class); diff --git a/gcc-4.9/gcc/config/darwin-sections.def b/gcc-4.9/gcc/config/darwin-sections.def new file mode 100644 index 000000000..23474e125 --- /dev/null +++ b/gcc-4.9/gcc/config/darwin-sections.def @@ -0,0 +1,195 @@ +/* Copyright (C) 2005-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Since Darwin's ld will not allow zero-sized objects, and gcc wants them, + we emit one byte (in darwin.c) when such an object is encountered. + + This messes up section anchoring because the emitted byte is not counted + outside the port. To cope with this, we set aside sections for zero-sized + objects and disallow those sections from participating in section anchors + ("zobj_" sections, below). + + Items that might be coalesced by the linker are prevented from participating, + (and those in mergeable sections are disallowed in varasm.c). */ + +/* The .text section is generated in varasm.c */ +DEF_SECTION (text_coal_section, SECTION_CODE|SECTION_NO_ANCHOR, + ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", 0) + +DEF_SECTION (text_hot_section, SECTION_CODE, + ".section __TEXT,__text_hot,regular,pure_instructions", 0) +DEF_SECTION (text_cold_section, SECTION_CODE, + ".section __TEXT,__text_cold,regular,pure_instructions", 0) +DEF_SECTION (text_startup_section, SECTION_CODE, + ".section __TEXT,__text_startup,regular,pure_instructions", 0) +DEF_SECTION (text_exit_section, SECTION_CODE, + ".section __TEXT,__text_exit,regular,pure_instructions", 0) + +DEF_SECTION (text_hot_coal_section, SECTION_CODE, + ".section __TEXT,__text_hot_coal,coalesced,pure_instructions", 0) +DEF_SECTION (text_cold_coal_section, SECTION_CODE, + ".section __TEXT,__text_cold_coal,coalesced,pure_instructions", 0) +DEF_SECTION (text_startup_coal_section, SECTION_CODE, + ".section __TEXT,__text_stt_coal,coalesced,pure_instructions", 0) +DEF_SECTION (text_exit_coal_section, SECTION_CODE, + ".section __TEXT,__text_exit_coal,coalesced,pure_instructions", 0) + +/* const */ +DEF_SECTION (const_section, 0, ".const", 0) +DEF_SECTION (const_coal_section, SECTION_NO_ANCHOR, + ".section __TEXT,__const_coal,coalesced", 0) +/* Place to put zero-sized to avoid issues with section anchors. */ +DEF_SECTION (zobj_const_section, SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_const", 0) + +/* Write-able data. '.data' handled in varasm.c */ +DEF_SECTION (static_data_section, SECTION_WRITE, ".static_data", 0) +DEF_SECTION (data_coal_section, SECTION_WRITE|SECTION_NO_ANCHOR, + ".section __DATA,__datacoal_nt,coalesced", 0) +/* Place to put zero-sized to avoid issues with section anchors. */ +DEF_SECTION (zobj_data_section, SECTION_WRITE|SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_data", 0) + +/* BSS - .lcomm / .zerofill __DATA,__bss sections cannot be switched to + explicitly (will create an assembler error). */ +DEF_SECTION (zobj_bss_section, SECTION_WRITE|SECTION_BSS|SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_bss", 0) + +/* const data */ +DEF_SECTION (const_data_section, 0, ".const_data", 0) +DEF_SECTION (const_data_coal_section, SECTION_NO_ANCHOR, + ".section __DATA,__const_coal,coalesced", 0) +/* Place to put zero-sized to avoid issues with section anchors. */ +DEF_SECTION (zobj_const_data_section, SECTION_NO_ANCHOR, + ".section\t__DATA,__zobj_cnst_data", 0) + +/* Strings and other literals. */ +DEF_SECTION (cstring_section, SECTION_MERGE | SECTION_STRINGS, ".cstring", 0) +DEF_SECTION (literal4_section, SECTION_MERGE, ".literal4", 0) +DEF_SECTION (literal8_section, SECTION_MERGE, ".literal8", 0) +DEF_SECTION (literal16_section, SECTION_MERGE, ".literal16", 0) +/* Unlike constant NSStrings, constant CFStrings do not live in the + __OBJC segment since they may also occur in pure C or C++ programs. */ +DEF_SECTION (cfstring_constant_object_section, 0, + ".section __DATA, __cfstring", 0) + +/* Module init, term, constructors & destructors. */ +DEF_SECTION (mod_init_section, 0, ".mod_init_func", 0) +DEF_SECTION (mod_term_section, 0, ".mod_term_func", 0) +DEF_SECTION (constructor_section, 0, ".constructor", 0) +DEF_SECTION (destructor_section, 0, ".destructor", 0) + +/* Objective-C ABI=0 (Original version) sections. */ +DEF_SECTION (objc_class_section, 0, ".objc_class", 1) +DEF_SECTION (objc_meta_class_section, 0, ".objc_meta_class", 1) +DEF_SECTION (objc_category_section, 0, ".objc_category", 1) +DEF_SECTION (objc_class_vars_section, 0, ".objc_class_vars", 1) +DEF_SECTION (objc_instance_vars_section, 0, ".objc_instance_vars", 1) +DEF_SECTION (objc_cls_meth_section, 0, ".objc_cls_meth", 1) +DEF_SECTION (objc_inst_meth_section, 0, ".objc_inst_meth", 1) +DEF_SECTION (objc_cat_cls_meth_section, 0, ".objc_cat_cls_meth", 1) +DEF_SECTION (objc_cat_inst_meth_section, 0, ".objc_cat_inst_meth", 1) +DEF_SECTION (objc_selector_refs_section, SECTION_MERGE, ".objc_message_refs", 1) +DEF_SECTION (objc_selector_fixup_section, 0, + ".section __OBJC, __sel_fixup, regular, no_dead_strip", 1) +DEF_SECTION (objc_symbols_section, 0, ".objc_symbols", 1) +DEF_SECTION (objc_module_info_section, 0, ".objc_module_info", 1) +DEF_SECTION (objc_protocol_section, 0, ".objc_protocol", 1) +DEF_SECTION (objc_string_object_section, 0, ".objc_string_object", 1) +DEF_SECTION (objc_constant_string_object_section, 0, + ".section __OBJC, __cstring_object, regular, no_dead_strip", 0) + +/* Fix-and-Continue image marker. */ +DEF_SECTION (objc_image_info_section, 0, + ".section __OBJC, __image_info, regular, no_dead_strip", 1) +DEF_SECTION (objc_class_names_section, 0, ".objc_class_names", 1) +DEF_SECTION (objc_meth_var_names_section, 0, ".objc_meth_var_names", 1) +DEF_SECTION (objc_meth_var_types_section, 0, ".objc_meth_var_types", 1) +DEF_SECTION (objc_cls_refs_section, SECTION_MERGE, ".objc_cls_refs", 1) + +/* Stubs and symbol indirection sections. */ +/* lazy symbol pointers. */ +DEF_SECTION (machopic_lazy_symbol_ptr_section, SECTION_NO_ANCHOR, + ".lazy_symbol_pointer", 0) +DEF_SECTION (machopic_lazy_symbol_ptr2_section, SECTION_NO_ANCHOR, + ".section __DATA, __la_sym_ptr2,lazy_symbol_pointers", 0) +DEF_SECTION (machopic_lazy_symbol_ptr3_section, SECTION_NO_ANCHOR, + ".section __DATA, __la_sym_ptr3,lazy_symbol_pointers", 0) +/* non-lazy symbol pointers. */ +DEF_SECTION (machopic_nl_symbol_ptr_section, SECTION_NO_ANCHOR, + MACHOPIC_NL_SYMBOL_PTR_SECTION, 0) +/* Symbol stubs. */ +DEF_SECTION (machopic_symbol_stub_section, SECTION_NO_ANCHOR, + ".symbol_stub", 0) +DEF_SECTION (machopic_symbol_stub1_section, SECTION_NO_ANCHOR, + ".section __TEXT,__symbol_stub1,symbol_stubs," + "pure_instructions,16", 0) +/* PIC symbol stubs. */ +DEF_SECTION (machopic_picsymbol_stub_section, SECTION_NO_ANCHOR, + ".picsymbol_stub", 0) +DEF_SECTION (machopic_picsymbol_stub1_section, SECTION_NO_ANCHOR, + ".section __TEXT,__picsymbolstub1,symbol_stubs," + "pure_instructions,32", 0) +DEF_SECTION (machopic_picsymbol_stub2_section, SECTION_NO_ANCHOR, + ".section __TEXT,__picsymbolstub2,symbol_stubs,pure_instructions,25", 0) +DEF_SECTION (machopic_picsymbol_stub3_section, SECTION_NO_ANCHOR, + ".section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5", 0) + +/* Exception-related. */ +DEF_SECTION (darwin_exception_section, SECTION_NO_ANCHOR, + ".section __DATA,__gcc_except_tab", 0) +DEF_SECTION (darwin_eh_frame_section, SECTION_NO_ANCHOR, + ".section " EH_FRAME_SECTION_NAME ",__eh_frame" + EH_FRAME_SECTION_ATTR, 0) + +/* Sections for ObjC ABI=1 (ObjC 'V1' extensions) */ +DEF_SECTION (objc1_class_ext_section, 0, + ".section __OBJC, __class_ext, regular, no_dead_strip", 1) +DEF_SECTION (objc1_prop_list_section, 0, + ".section __OBJC, __property, regular, no_dead_strip", 1) +DEF_SECTION (objc1_protocol_ext_section, 0, + ".section __OBJC, __protocol_ext, regular, no_dead_strip", 1) + +/* Sections for ObjC ABI=2 (m64). */ +DEF_SECTION (objc2_message_refs_section, 0, + ".section __DATA, __objc_msgrefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_classdefs_section, 0, ".section __DATA, __objc_data", 1) +DEF_SECTION (objc2_metadata_section, 0, ".section __DATA, __objc_const", 1) + +DEF_SECTION (objc2_classrefs_section, 0, + ".section __DATA, __objc_classrefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_classlist_section, 0, + ".section __DATA, __objc_classlist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_categorylist_section, 0, + ".section __DATA, __objc_catlist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_selector_refs_section, 0, + ".section __DATA, __objc_selrefs, literal_pointers, no_dead_strip", 1) +DEF_SECTION (objc2_nonlazy_class_section, 0, + ".section __DATA, __objc_nlclslist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_nonlazy_category_section, 0, + ".section __DATA, __objc_nlcatlist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_protocollist_section, 0, + ".section __DATA, __objc_protolist, regular, no_dead_strip", 1) +DEF_SECTION (objc2_protocolrefs_section, 0, + ".section __DATA, __objc_protorefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_super_classrefs_section, 0, + ".section __DATA, __objc_superrefs, regular, no_dead_strip", 1) +DEF_SECTION (objc2_image_info_section, 0, + ".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1) +DEF_SECTION (objc2_constant_string_object_section, 0, + ".section __DATA, __objc_stringobj, regular, no_dead_strip", 1) diff --git a/gcc-4.9/gcc/config/darwin.c b/gcc-4.9/gcc/config/darwin.c new file mode 100644 index 000000000..adf370d4d --- /dev/null +++ b/gcc-4.9/gcc/config/darwin.c @@ -0,0 +1,3663 @@ +/* Functions for generic Darwin as target machine for GNU C compiler. + Copyright (C) 1989-2014 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "tree.h" +#include "stringpool.h" +#include "varasm.h" +#include "stor-layout.h" +#include "expr.h" +#include "reload.h" +#include "function.h" +#include "ggc.h" +#include "langhooks.h" +#include "target.h" +#include "tm_p.h" +#include "diagnostic-core.h" +#include "toplev.h" +#include "hashtab.h" +#include "df.h" +#include "debug.h" +#include "obstack.h" +#include "pointer-set.h" +#include "hash-table.h" +#include "vec.h" +#include "basic-block.h" +#include "tree-ssa-alias.h" +#include "internal-fn.h" +#include "gimple-fold.h" +#include "tree-eh.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "gimplify.h" +#include "lto-streamer.h" + +/* Darwin supports a feature called fix-and-continue, which is used + for rapid turn around debugging. When code is compiled with the + -mfix-and-continue flag, two changes are made to the generated code + that allow the system to do things that it would normally not be + able to do easily. These changes allow gdb to load in + recompilation of a translation unit that has been changed into a + running program and replace existing functions and methods of that + translation unit with versions of those functions and methods + from the newly compiled translation unit. The new functions access + the existing static symbols from the old translation unit, if the + symbol existed in the unit to be replaced, and from the new + translation unit, otherwise. + + The changes are to insert 5 nops at the beginning of all functions + and to use indirection to get at static symbols. The 5 nops + are required by consumers of the generated code. Currently, gdb + uses this to patch in a jump to the overriding function, this + allows all uses of the old name to forward to the replacement, + including existing function pointers and virtual methods. See + rs6000_emit_prologue for the code that handles the nop insertions. + + The added indirection allows gdb to redirect accesses to static + symbols from the newly loaded translation unit to the existing + symbol, if any. @code{static} symbols are special and are handled by + setting the second word in the .non_lazy_symbol_pointer data + structure to symbol. See indirect_data for the code that handles + the extra indirection, and machopic_output_indirection and its use + of MACHO_SYMBOL_STATIC for the code that handles @code{static} + symbol indirection. */ + +/* For darwin >= 9 (OSX 10.5) the linker is capable of making the necessary + branch islands and we no longer need to emit darwin stubs. + However, if we are generating code for earlier systems (or for use in the + kernel) the stubs might still be required, and this will be set true. */ +int darwin_emit_branch_islands = false; + +typedef struct GTY(()) cdtor_record { + rtx symbol; + int priority; /* [con/de]structor priority */ + int position; /* original position */ +} cdtor_record; + +static GTY(()) vec *ctors = NULL; +static GTY(()) vec *dtors = NULL; + +/* A flag to determine whether we are running c++ or obj-c++. This has to be + settable from non-c-family contexts too (i.e. we can't use the c_dialect_ + functions). */ +int darwin_running_cxx; + +/* Some code-gen now depends on OS major version numbers (at least). */ +int generating_for_darwin_version ; + +/* Section names. */ +section * darwin_sections[NUM_DARWIN_SECTIONS]; + +/* While we transition to using in-tests instead of ifdef'd code. */ +#ifndef HAVE_lo_sum +#define HAVE_lo_sum 0 +#define gen_macho_high(a,b) (a) +#define gen_macho_low(a,b,c) (a) +#endif + +/* True if we're setting __attribute__ ((ms_struct)). */ +int darwin_ms_struct = false; + +/* Earlier versions of Darwin as do not recognize an alignment field in + .comm directives, this should be set for versions that allow it. */ +int emit_aligned_common = false; + +/* A get_unnamed_section callback used to switch to an ObjC section. + DIRECTIVE is as for output_section_asm_op. */ + +static void +output_objc_section_asm_op (const void *directive) +{ + static bool been_here = false; + + /* The NeXT ObjC Runtime requires these sections to be present and in + order in the object. The code below implements this by emitting + a section header for each ObjC section the first time that an ObjC + section is requested. */ + if (! been_here) + { + section *saved_in_section = in_section; + static const enum darwin_section_enum tomark[] = + { + /* written, cold -> hot */ + objc_cat_cls_meth_section, + objc_cat_inst_meth_section, + objc_string_object_section, + objc_constant_string_object_section, + objc_selector_refs_section, + objc_selector_fixup_section, + objc_cls_refs_section, + objc_class_section, + objc_meta_class_section, + /* shared, hot -> cold */ + objc_cls_meth_section, + objc_inst_meth_section, + objc_protocol_section, + objc_class_names_section, + objc_meth_var_types_section, + objc_meth_var_names_section, + objc_category_section, + objc_class_vars_section, + objc_instance_vars_section, + objc_module_info_section, + objc_symbols_section, + }; + /* ABI=1 */ + static const enum darwin_section_enum tomarkv1[] = + { + objc1_protocol_ext_section, + objc1_class_ext_section, + objc1_prop_list_section + } ; + /* ABI=2 */ + static const enum darwin_section_enum tomarkv2[] = + { + objc2_message_refs_section, + objc2_classdefs_section, + objc2_metadata_section, + objc2_classrefs_section, + objc2_classlist_section, + objc2_categorylist_section, + objc2_selector_refs_section, + objc2_nonlazy_class_section, + objc2_nonlazy_category_section, + objc2_protocollist_section, + objc2_protocolrefs_section, + objc2_super_classrefs_section, + objc2_image_info_section, + objc2_constant_string_object_section + } ; + size_t i; + + been_here = true; + if (flag_objc_abi < 2) + { + for (i = 0; i < ARRAY_SIZE (tomark); i++) + switch_to_section (darwin_sections[tomark[i]]); + if (flag_objc_abi == 1) + for (i = 0; i < ARRAY_SIZE (tomarkv1); i++) + switch_to_section (darwin_sections[tomarkv1[i]]); + } + else + for (i = 0; i < ARRAY_SIZE (tomarkv2); i++) + switch_to_section (darwin_sections[tomarkv2[i]]); + /* Make sure we don't get varasm.c out of sync with us. */ + switch_to_section (saved_in_section); + } + output_section_asm_op (directive); +} + + +/* Private flag applied to disable section-anchors in a particular section. */ +#define SECTION_NO_ANCHOR SECTION_MACH_DEP + + +/* Implement TARGET_ASM_INIT_SECTIONS. */ + +void +darwin_init_sections (void) +{ +#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC) \ + darwin_sections[NAME] = \ + get_unnamed_section (FLAGS, (OBJC \ + ? output_objc_section_asm_op \ + : output_section_asm_op), \ + "\t" DIRECTIVE); +#include "config/darwin-sections.def" +#undef DEF_SECTION + + readonly_data_section = darwin_sections[const_section]; + exception_section = darwin_sections[darwin_exception_section]; + eh_frame_section = darwin_sections[darwin_eh_frame_section]; +} + +int +name_needs_quotes (const char *name) +{ + int c; + while ((c = *name++) != '\0') + if (! ISIDNUM (c) + && c != '.' && c != '$' && c != '_' ) + return 1; + return 0; +} + +/* Return true if SYM_REF can be used without an indirection. */ +int +machopic_symbol_defined_p (rtx sym_ref) +{ + if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_DEFINED) + return true; + + /* If a symbol references local and is not an extern to this + file, then the symbol might be able to declared as defined. */ + if (SYMBOL_REF_LOCAL_P (sym_ref) && ! SYMBOL_REF_EXTERNAL_P (sym_ref)) + { + /* If the symbol references a variable and the variable is a + common symbol, then this symbol is not defined. */ + if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_VARIABLE) + { + tree decl = SYMBOL_REF_DECL (sym_ref); + if (!decl) + return true; + if (DECL_COMMON (decl)) + return false; + } + return true; + } + return false; +} + +/* This module assumes that (const (symbol_ref "foo")) is a legal pic + reference, which will not be changed. */ + +enum machopic_addr_class +machopic_classify_symbol (rtx sym_ref) +{ + bool function_p; + + function_p = SYMBOL_REF_FUNCTION_P (sym_ref); + if (machopic_symbol_defined_p (sym_ref)) + return (function_p + ? MACHOPIC_DEFINED_FUNCTION : MACHOPIC_DEFINED_DATA); + else + return (function_p + ? MACHOPIC_UNDEFINED_FUNCTION : MACHOPIC_UNDEFINED_DATA); +} + +#ifndef TARGET_FIX_AND_CONTINUE +#define TARGET_FIX_AND_CONTINUE 0 +#endif + +/* Indicate when fix-and-continue style code generation is being used + and when a reference to data should be indirected so that it can be + rebound in a new translation unit to reference the original instance + of that data. Symbol names that are for code generation local to + the translation unit are bound to the new translation unit; + currently this means symbols that begin with L or _OBJC_; + otherwise, we indicate that an indirect reference should be made to + permit the runtime to rebind new instances of the translation unit + to the original instance of the data. */ + +static int +indirect_data (rtx sym_ref) +{ + int lprefix; + const char *name; + + /* If we aren't generating fix-and-continue code, don't do anything + special. */ + if (TARGET_FIX_AND_CONTINUE == 0) + return 0; + + /* Otherwise, all symbol except symbols that begin with L or _OBJC_ + are indirected. Symbols that begin with L and _OBJC_ are always + bound to the current translation unit as they are used for + generated local data of the translation unit. */ + + name = XSTR (sym_ref, 0); + + lprefix = (((name[0] == '*' || name[0] == '&') + && (name[1] == 'L' || (name[1] == '"' && name[2] == 'L'))) + || (strncmp (name, "_OBJC_", 6) == 0)); + + return ! lprefix; +} + +static int +machopic_data_defined_p (rtx sym_ref) +{ + if (indirect_data (sym_ref)) + return 0; + + switch (machopic_classify_symbol (sym_ref)) + { + case MACHOPIC_DEFINED_DATA: + case MACHOPIC_DEFINED_FUNCTION: + return 1; + default: + return 0; + } +} + +void +machopic_define_symbol (rtx mem) +{ + rtx sym_ref; + + gcc_assert (GET_CODE (mem) == MEM); + sym_ref = XEXP (mem, 0); + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED; +} + +/* Return either ORIG or: + + (const:P (unspec:P [ORIG] UNSPEC_MACHOPIC_OFFSET)) + + depending on MACHO_DYNAMIC_NO_PIC_P. */ +rtx +machopic_gen_offset (rtx orig) +{ + if (MACHO_DYNAMIC_NO_PIC_P) + return orig; + else + { + /* Play games to avoid marking the function as needing pic if we + are being called as part of the cost-estimation process. */ + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) + crtl->uses_pic_offset_table = 1; + orig = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), + UNSPEC_MACHOPIC_OFFSET); + return gen_rtx_CONST (Pmode, orig); + } +} + +static GTY(()) const char * function_base_func_name; +static GTY(()) int current_pic_label_num; +static GTY(()) int emitted_pic_label_num; + +static void +update_pic_label_number_if_needed (void) +{ + const char *current_name; + + /* When we are generating _get_pc thunks within stubs, there is no current + function. */ + if (current_function_decl) + { + current_name = + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl)); + if (function_base_func_name != current_name) + { + ++current_pic_label_num; + function_base_func_name = current_name; + } + } + else + { + ++current_pic_label_num; + function_base_func_name = "L_machopic_stub_dummy"; + } +} + +void +machopic_output_function_base_name (FILE *file) +{ + /* If dynamic-no-pic is on, we should not get here. */ + gcc_assert (!MACHO_DYNAMIC_NO_PIC_P); + + update_pic_label_number_if_needed (); + fprintf (file, "L%d$pb", current_pic_label_num); +} + +char curr_picbasename[32]; + +const char * +machopic_get_function_picbase (void) +{ + /* If dynamic-no-pic is on, we should not get here. */ + gcc_assert (!MACHO_DYNAMIC_NO_PIC_P); + + update_pic_label_number_if_needed (); + snprintf (curr_picbasename, 32, "L%d$pb", current_pic_label_num); + return (const char *) curr_picbasename; +} + +bool +machopic_should_output_picbase_label (void) +{ + update_pic_label_number_if_needed (); + + if (current_pic_label_num == emitted_pic_label_num) + return false; + + emitted_pic_label_num = current_pic_label_num; + return true; +} + +/* The suffix attached to non-lazy pointer symbols. */ +#define NON_LAZY_POINTER_SUFFIX "$non_lazy_ptr" +/* The suffix attached to stub symbols. */ +#define STUB_SUFFIX "$stub" + +typedef struct GTY (()) machopic_indirection +{ + /* The SYMBOL_REF for the entity referenced. */ + rtx symbol; + /* The name of the stub or non-lazy pointer. */ + const char * ptr_name; + /* True iff this entry is for a stub (as opposed to a non-lazy + pointer). */ + bool stub_p; + /* True iff this stub or pointer pointer has been referenced. */ + bool used; +} machopic_indirection; + +/* A table mapping stub names and non-lazy pointer names to + SYMBOL_REFs for the stubbed-to and pointed-to entities. */ + +static GTY ((param_is (struct machopic_indirection))) htab_t + machopic_indirections; + +/* Return a hash value for a SLOT in the indirections hash table. */ + +static hashval_t +machopic_indirection_hash (const void *slot) +{ + const machopic_indirection *p = (const machopic_indirection *) slot; + return htab_hash_string (p->ptr_name); +} + +/* Returns true if the KEY is the same as that associated with + SLOT. */ + +static int +machopic_indirection_eq (const void *slot, const void *key) +{ + return strcmp (((const machopic_indirection *) slot)->ptr_name, + (const char *) key) == 0; +} + +/* Return the name of the non-lazy pointer (if STUB_P is false) or + stub (if STUB_B is true) corresponding to the given name. */ + +const char * +machopic_indirection_name (rtx sym_ref, bool stub_p) +{ + char *buffer; + const char *name = XSTR (sym_ref, 0); + size_t namelen = strlen (name); + machopic_indirection *p; + void ** slot; + bool needs_quotes; + const char *suffix; + const char *prefix = user_label_prefix; + const char *quote = ""; + tree id; + + id = maybe_get_identifier (name); + if (id) + { + tree id_orig = id; + + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + id = TREE_CHAIN (id); + if (id != id_orig) + { + name = IDENTIFIER_POINTER (id); + namelen = strlen (name); + } + } + + if (name[0] == '*') + { + prefix = ""; + ++name; + --namelen; + } + + needs_quotes = name_needs_quotes (name); + if (needs_quotes) + { + quote = "\""; + } + + if (stub_p) + suffix = STUB_SUFFIX; + else + suffix = NON_LAZY_POINTER_SUFFIX; + + buffer = XALLOCAVEC (char, strlen ("&L") + + strlen (prefix) + + namelen + + strlen (suffix) + + 2 * strlen (quote) + + 1 /* '\0' */); + + /* Construct the name of the non-lazy pointer or stub. */ + sprintf (buffer, "&%sL%s%s%s%s", quote, prefix, name, suffix, quote); + + if (!machopic_indirections) + machopic_indirections = htab_create_ggc (37, + machopic_indirection_hash, + machopic_indirection_eq, + /*htab_del=*/NULL); + + slot = htab_find_slot_with_hash (machopic_indirections, buffer, + htab_hash_string (buffer), INSERT); + if (*slot) + { + p = (machopic_indirection *) *slot; + } + else + { + p = ggc_alloc_machopic_indirection (); + p->symbol = sym_ref; + p->ptr_name = xstrdup (buffer); + p->stub_p = stub_p; + p->used = false; + *slot = p; + } + + return p->ptr_name; +} + +/* Return the name of the stub for the mcount function. */ + +const char* +machopic_mcount_stub_name (void) +{ + rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "*mcount"); + return machopic_indirection_name (symbol, /*stub_p=*/true); +} + +/* If NAME is the name of a stub or a non-lazy pointer , mark the stub + or non-lazy pointer as used -- and mark the object to which the + pointer/stub refers as used as well, since the pointer/stub will + emit a reference to it. */ + +void +machopic_validate_stub_or_non_lazy_ptr (const char *name) +{ + machopic_indirection *p; + + p = ((machopic_indirection *) + (htab_find_with_hash (machopic_indirections, name, + htab_hash_string (name)))); + if (p && ! p->used) + { + const char *real_name; + tree id; + + p->used = true; + + /* Do what output_addr_const will do when we actually call it. */ + if (SYMBOL_REF_DECL (p->symbol)) + mark_decl_referenced (SYMBOL_REF_DECL (p->symbol)); + + real_name = targetm.strip_name_encoding (XSTR (p->symbol, 0)); + + id = maybe_get_identifier (real_name); + if (id) + mark_referenced (id); + } +} + +/* Transform ORIG, which may be any data source, to the corresponding + source using indirections. */ + +rtx +machopic_indirect_data_reference (rtx orig, rtx reg) +{ + rtx ptr_ref = orig; + + if (! MACHOPIC_INDIRECT) + return orig; + + if (GET_CODE (orig) == SYMBOL_REF) + { + int defined = machopic_data_defined_p (orig); + + if (defined && MACHO_DYNAMIC_NO_PIC_P) + { + if (DARWIN_PPC) + { + /* Create a new register for CSE opportunities. */ + rtx hi_reg = (!can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode)); + emit_insn (gen_macho_high (hi_reg, orig)); + emit_insn (gen_macho_low (reg, hi_reg, orig)); + return reg; + } + else if (DARWIN_X86) + return orig; + else + /* some other cpu -- writeme! */ + gcc_unreachable (); + } + else if (defined) + { + rtx offset = NULL; + if (DARWIN_PPC || HAVE_lo_sum) + offset = machopic_gen_offset (orig); + + if (DARWIN_PPC) + { + rtx hi_sum_reg = (!can_create_pseudo_p () + ? reg + : gen_reg_rtx (Pmode)); + + gcc_assert (reg); + + emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, + gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + gen_rtx_HIGH (Pmode, offset)))); + emit_insn (gen_rtx_SET (Pmode, reg, + gen_rtx_LO_SUM (Pmode, hi_sum_reg, + copy_rtx (offset)))); + + orig = reg; + } + else if (HAVE_lo_sum) + { + gcc_assert (reg); + + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_HIGH (Pmode, offset))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, reg, + copy_rtx (offset)))); + emit_use (pic_offset_table_rtx); + + orig = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, reg); + } + return orig; + } + + ptr_ref = (gen_rtx_SYMBOL_REF + (Pmode, + machopic_indirection_name (orig, /*stub_p=*/false))); + + SYMBOL_REF_DATA (ptr_ref) = SYMBOL_REF_DATA (orig); + + ptr_ref = gen_const_mem (Pmode, ptr_ref); + machopic_define_symbol (ptr_ref); + + if (DARWIN_X86 + && reg + && MACHO_DYNAMIC_NO_PIC_P) + { + emit_insn (gen_rtx_SET (Pmode, reg, ptr_ref)); + ptr_ref = reg; + } + + return ptr_ref; + } + else if (GET_CODE (orig) == CONST) + { + /* If "(const (plus ...", walk the PLUS and return that result. + PLUS processing (below) will restore the "(const ..." if + appropriate. */ + if (GET_CODE (XEXP (orig, 0)) == PLUS) + return machopic_indirect_data_reference (XEXP (orig, 0), reg); + else + return orig; + } + else if (GET_CODE (orig) == MEM) + { + XEXP (ptr_ref, 0) = + machopic_indirect_data_reference (XEXP (orig, 0), reg); + return ptr_ref; + } + else if (GET_CODE (orig) == PLUS) + { + rtx base, result; + /* When the target is i386, this code prevents crashes due to the + compiler's ignorance on how to move the PIC base register to + other registers. (The reload phase sometimes introduces such + insns.) */ + if (GET_CODE (XEXP (orig, 0)) == REG + && REGNO (XEXP (orig, 0)) == PIC_OFFSET_TABLE_REGNUM + /* Prevent the same register from being erroneously used + as both the base and index registers. */ + && (DARWIN_X86 && (GET_CODE (XEXP (orig, 1)) == CONST)) + && reg) + { + emit_move_insn (reg, XEXP (orig, 0)); + XEXP (ptr_ref, 0) = reg; + return ptr_ref; + } + + /* Legitimize both operands of the PLUS. */ + base = machopic_indirect_data_reference (XEXP (orig, 0), reg); + orig = machopic_indirect_data_reference (XEXP (orig, 1), + (base == reg ? 0 : reg)); + if (MACHOPIC_INDIRECT && (GET_CODE (orig) == CONST_INT)) + result = plus_constant (Pmode, base, INTVAL (orig)); + else + result = gen_rtx_PLUS (Pmode, base, orig); + + if (MACHOPIC_JUST_INDIRECT && GET_CODE (base) == MEM) + { + if (reg) + { + emit_move_insn (reg, result); + result = reg; + } + else + { + result = force_reg (GET_MODE (result), result); + } + } + + return result; + } + return ptr_ref; +} + +/* Transform TARGET (a MEM), which is a function call target, to the + corresponding symbol_stub if necessary. Return a new MEM. */ + +rtx +machopic_indirect_call_target (rtx target) +{ + if (! darwin_emit_branch_islands) + return target; + + if (GET_CODE (target) != MEM) + return target; + + if (MACHOPIC_INDIRECT + && GET_CODE (XEXP (target, 0)) == SYMBOL_REF + && !(SYMBOL_REF_FLAGS (XEXP (target, 0)) + & MACHO_SYMBOL_FLAG_DEFINED)) + { + rtx sym_ref = XEXP (target, 0); + const char *stub_name = machopic_indirection_name (sym_ref, + /*stub_p=*/true); + enum machine_mode mode = GET_MODE (sym_ref); + + XEXP (target, 0) = gen_rtx_SYMBOL_REF (mode, stub_name); + SYMBOL_REF_DATA (XEXP (target, 0)) = SYMBOL_REF_DATA (sym_ref); + MEM_READONLY_P (target) = 1; + MEM_NOTRAP_P (target) = 1; + } + + return target; +} + +rtx +machopic_legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) +{ + rtx pic_ref = orig; + + if (! MACHOPIC_INDIRECT) + return orig; + + /* First handle a simple SYMBOL_REF or LABEL_REF */ + if (GET_CODE (orig) == LABEL_REF + || (GET_CODE (orig) == SYMBOL_REF + )) + { + /* addr(foo) = &func+(foo-func) */ + orig = machopic_indirect_data_reference (orig, reg); + + if (GET_CODE (orig) == PLUS + && GET_CODE (XEXP (orig, 0)) == REG) + { + if (reg == 0) + return force_reg (mode, orig); + + emit_move_insn (reg, orig); + return reg; + } + + if (GET_CODE (orig) == MEM) + { + if (reg == 0) + { + gcc_assert (!reload_in_progress); + reg = gen_reg_rtx (Pmode); + } + +#if HAVE_lo_sum + if (MACHO_DYNAMIC_NO_PIC_P + && (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF + || GET_CODE (XEXP (orig, 0)) == LABEL_REF)) + { +#if defined (TARGET_TOC) /* ppc */ + rtx temp_reg = (!can_create_pseudo_p () + ? reg : + gen_reg_rtx (Pmode)); + rtx asym = XEXP (orig, 0); + rtx mem; + + emit_insn (gen_macho_high (temp_reg, asym)); + mem = gen_const_mem (GET_MODE (orig), + gen_rtx_LO_SUM (Pmode, temp_reg, + copy_rtx (asym))); + emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); +#else + /* Some other CPU -- WriteMe! but right now there are no other + platforms that can use dynamic-no-pic */ + gcc_unreachable (); +#endif + pic_ref = reg; + } + else + if (GET_CODE (XEXP (orig, 0)) == SYMBOL_REF + || GET_CODE (XEXP (orig, 0)) == LABEL_REF) + { + rtx offset = machopic_gen_offset (XEXP (orig, 0)); +#if defined (TARGET_TOC) /* i.e., PowerPC */ + /* Generating a new reg may expose opportunities for + common subexpression elimination. */ + rtx hi_sum_reg = (!can_create_pseudo_p () + ? reg + : gen_reg_rtx (Pmode)); + rtx mem; + rtx insn; + rtx sum; + + sum = gen_rtx_HIGH (Pmode, offset); + if (! MACHO_DYNAMIC_NO_PIC_P) + sum = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, sum); + + emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, sum)); + + mem = gen_const_mem (GET_MODE (orig), + gen_rtx_LO_SUM (Pmode, + hi_sum_reg, + copy_rtx (offset))); + insn = emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); + set_unique_reg_note (insn, REG_EQUAL, pic_ref); + + pic_ref = reg; +#else + emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM)); + + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_HIGH (Pmode, + gen_rtx_CONST (Pmode, + offset)))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, reg, + gen_rtx_CONST (Pmode, + copy_rtx (offset))))); + pic_ref = gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, reg); +#endif + } + else +#endif /* HAVE_lo_sum */ + { + rtx pic = pic_offset_table_rtx; + if (GET_CODE (pic) != REG) + { + emit_move_insn (reg, pic); + pic = reg; + } +#if 0 + emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM)); +#endif + + if (reload_in_progress) + df_set_regs_ever_live (REGNO (pic), true); + pic_ref = gen_rtx_PLUS (Pmode, pic, + machopic_gen_offset (XEXP (orig, 0))); + } + +#if !defined (TARGET_TOC) + emit_move_insn (reg, pic_ref); + pic_ref = gen_const_mem (GET_MODE (orig), reg); +#endif + } + else + { + +#if HAVE_lo_sum + if (GET_CODE (orig) == SYMBOL_REF + || GET_CODE (orig) == LABEL_REF) + { + rtx offset = machopic_gen_offset (orig); +#if defined (TARGET_TOC) /* i.e., PowerPC */ + rtx hi_sum_reg; + + if (reg == 0) + { + gcc_assert (!reload_in_progress); + reg = gen_reg_rtx (Pmode); + } + + hi_sum_reg = reg; + + emit_insn (gen_rtx_SET (Pmode, hi_sum_reg, + (MACHO_DYNAMIC_NO_PIC_P) + ? gen_rtx_HIGH (Pmode, offset) + : gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, + gen_rtx_HIGH (Pmode, + offset)))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, + hi_sum_reg, + copy_rtx (offset)))); + pic_ref = reg; +#else + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_HIGH (Pmode, offset))); + emit_insn (gen_rtx_SET (VOIDmode, reg, + gen_rtx_LO_SUM (Pmode, reg, + copy_rtx (offset)))); + pic_ref = gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, reg); +#endif + } + else +#endif /* HAVE_lo_sum */ + { + if (REG_P (orig) + || GET_CODE (orig) == SUBREG) + { + return orig; + } + else + { + rtx pic = pic_offset_table_rtx; + if (GET_CODE (pic) != REG) + { + emit_move_insn (reg, pic); + pic = reg; + } +#if 0 + emit_use (pic_offset_table_rtx); +#endif + if (reload_in_progress) + df_set_regs_ever_live (REGNO (pic), true); + pic_ref = gen_rtx_PLUS (Pmode, + pic, + machopic_gen_offset (orig)); + } + } + } + + if (GET_CODE (pic_ref) != REG) + { + if (reg != 0) + { + emit_move_insn (reg, pic_ref); + return reg; + } + else + { + return force_reg (mode, pic_ref); + } + } + else + { + return pic_ref; + } + } + + else if (GET_CODE (orig) == SYMBOL_REF) + return orig; + + else if (GET_CODE (orig) == PLUS + && (GET_CODE (XEXP (orig, 0)) == MEM + || GET_CODE (XEXP (orig, 0)) == SYMBOL_REF + || GET_CODE (XEXP (orig, 0)) == LABEL_REF) + && XEXP (orig, 0) != pic_offset_table_rtx + && GET_CODE (XEXP (orig, 1)) != REG) + + { + rtx base; + int is_complex = (GET_CODE (XEXP (orig, 0)) == MEM); + + base = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg); + orig = machopic_legitimize_pic_address (XEXP (orig, 1), + Pmode, (base == reg ? 0 : reg)); + if (GET_CODE (orig) == CONST_INT) + { + pic_ref = plus_constant (Pmode, base, INTVAL (orig)); + is_complex = 1; + } + else + pic_ref = gen_rtx_PLUS (Pmode, base, orig); + + if (reg && is_complex) + { + emit_move_insn (reg, pic_ref); + pic_ref = reg; + } + /* Likewise, should we set special REG_NOTEs here? */ + } + + else if (GET_CODE (orig) == CONST) + { + return machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg); + } + + else if (GET_CODE (orig) == MEM + && GET_CODE (XEXP (orig, 0)) == SYMBOL_REF) + { + rtx addr = machopic_legitimize_pic_address (XEXP (orig, 0), Pmode, reg); + addr = replace_equiv_address (orig, addr); + emit_move_insn (reg, addr); + pic_ref = reg; + } + + return pic_ref; +} + +/* Output the stub or non-lazy pointer in *SLOT, if it has been used. + DATA is the FILE* for assembly output. Called from + htab_traverse. */ + +static int +machopic_output_indirection (void **slot, void *data) +{ + machopic_indirection *p = *((machopic_indirection **) slot); + FILE *asm_out_file = (FILE *) data; + rtx symbol; + const char *sym_name; + const char *ptr_name; + + if (!p->used) + return 1; + + symbol = p->symbol; + sym_name = XSTR (symbol, 0); + ptr_name = p->ptr_name; + + if (p->stub_p) + { + char *sym; + char *stub; + tree id; + + id = maybe_get_identifier (sym_name); + if (id) + { + tree id_orig = id; + + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + id = TREE_CHAIN (id); + if (id != id_orig) + sym_name = IDENTIFIER_POINTER (id); + } + + sym = XALLOCAVEC (char, strlen (sym_name) + 2); + if (sym_name[0] == '*' || sym_name[0] == '&') + strcpy (sym, sym_name + 1); + else if (sym_name[0] == '-' || sym_name[0] == '+') + strcpy (sym, sym_name); + else + sprintf (sym, "%s%s", user_label_prefix, sym_name); + + stub = XALLOCAVEC (char, strlen (ptr_name) + 2); + if (ptr_name[0] == '*' || ptr_name[0] == '&') + strcpy (stub, ptr_name + 1); + else + sprintf (stub, "%s%s", user_label_prefix, ptr_name); + + machopic_output_stub (asm_out_file, sym, stub); + } + else if (! indirect_data (symbol) + && (machopic_symbol_defined_p (symbol) + || SYMBOL_REF_LOCAL_P (symbol))) + { + switch_to_section (data_section); + assemble_align (GET_MODE_ALIGNMENT (Pmode)); + assemble_label (asm_out_file, ptr_name); + assemble_integer (gen_rtx_SYMBOL_REF (Pmode, sym_name), + GET_MODE_SIZE (Pmode), + GET_MODE_ALIGNMENT (Pmode), 1); + } + else + { + rtx init = const0_rtx; + + switch_to_section (darwin_sections[machopic_nl_symbol_ptr_section]); + + /* Mach-O symbols are passed around in code through indirect + references and the original symbol_ref hasn't passed through + the generic handling and reference-catching in + output_operand, so we need to manually mark weak references + as such. */ + if (SYMBOL_REF_WEAK (symbol)) + { + tree decl = SYMBOL_REF_DECL (symbol); + gcc_assert (DECL_P (decl)); + + if (decl != NULL_TREE + && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) + /* Handle only actual external-only definitions, not + e.g. extern inline code or variables for which + storage has been allocated. */ + && !TREE_STATIC (decl)) + { + fputs ("\t.weak_reference ", asm_out_file); + assemble_name (asm_out_file, sym_name); + fputc ('\n', asm_out_file); + } + } + + assemble_name (asm_out_file, ptr_name); + fprintf (asm_out_file, ":\n"); + + fprintf (asm_out_file, "\t.indirect_symbol "); + assemble_name (asm_out_file, sym_name); + fprintf (asm_out_file, "\n"); + + /* Variables that are marked with MACHO_SYMBOL_STATIC need to + have their symbol name instead of 0 in the second entry of + the non-lazy symbol pointer data structure when they are + defined. This allows the runtime to rebind newer instances + of the translation unit with the original instance of the + symbol. */ + + if ((SYMBOL_REF_FLAGS (symbol) & MACHO_SYMBOL_STATIC) + && machopic_symbol_defined_p (symbol)) + init = gen_rtx_SYMBOL_REF (Pmode, sym_name); + + assemble_integer (init, GET_MODE_SIZE (Pmode), + GET_MODE_ALIGNMENT (Pmode), 1); + } + + return 1; +} + +void +machopic_finish (FILE *asm_out_file) +{ + if (machopic_indirections) + htab_traverse_noresize (machopic_indirections, + machopic_output_indirection, + asm_out_file); +} + +int +machopic_operand_p (rtx op) +{ + if (MACHOPIC_JUST_INDIRECT) + return (GET_CODE (op) == SYMBOL_REF + && machopic_symbol_defined_p (op)); + else + return (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == UNSPEC + && XINT (XEXP (op, 0), 1) == UNSPEC_MACHOPIC_OFFSET); +} + +/* This function records whether a given name corresponds to a defined + or undefined function or variable, for machopic_classify_ident to + use later. */ + +void +darwin_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED) +{ + rtx sym_ref; + + /* Do the standard encoding things first. */ + default_encode_section_info (decl, rtl, first); + + if (TREE_CODE (decl) != FUNCTION_DECL && TREE_CODE (decl) != VAR_DECL) + return; + + sym_ref = XEXP (rtl, 0); + if (TREE_CODE (decl) == VAR_DECL) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_VARIABLE; + + if (!DECL_EXTERNAL (decl) + && (!TREE_PUBLIC (decl) || !DECL_WEAK (decl)) + && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (decl)) + && ((TREE_STATIC (decl) + && (!DECL_COMMON (decl) || !TREE_PUBLIC (decl))) + || (!DECL_COMMON (decl) && DECL_INITIAL (decl) + && DECL_INITIAL (decl) != error_mark_node))) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED; + + if (! TREE_PUBLIC (decl)) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_STATIC; +} + +void +darwin_mark_decl_preserved (const char *name) +{ + fprintf (asm_out_file, "\t.no_dead_strip "); + assemble_name (asm_out_file, name); + fputc ('\n', asm_out_file); +} + +static section * +darwin_rodata_section (int weak, bool zsize) +{ + return (weak + ? darwin_sections[const_coal_section] + : (zsize ? darwin_sections[zobj_const_section] + : darwin_sections[const_section])); +} + +static section * +darwin_mergeable_string_section (tree exp, + unsigned HOST_WIDE_INT align) +{ + /* Darwin's ld expects to see non-writable string literals in the .cstring + section. Later versions of ld check and complain when CFStrings are + enabled. Therefore we shall force the strings into .cstring since we + don't support writable ones anyway. */ + if ((darwin_constant_cfstrings || flag_merge_constants) + && TREE_CODE (exp) == STRING_CST + && TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE + && align <= 256 + && (int_size_in_bytes (TREE_TYPE (exp)) + == TREE_STRING_LENGTH (exp)) + && ((size_t) TREE_STRING_LENGTH (exp) + == strlen (TREE_STRING_POINTER (exp)) + 1)) + return darwin_sections[cstring_section]; + + if (DARWIN_SECTION_ANCHORS && flag_section_anchors + && TREE_CODE (exp) == STRING_CST + && TREE_STRING_LENGTH (exp) == 0) + return darwin_sections[zobj_const_section]; + + return readonly_data_section; +} + +#ifndef HAVE_GAS_LITERAL16 +#define HAVE_GAS_LITERAL16 0 +#endif + +static section * +darwin_mergeable_constant_section (tree exp, + unsigned HOST_WIDE_INT align, + bool zsize) +{ + enum machine_mode mode = DECL_MODE (exp); + unsigned int modesize = GET_MODE_BITSIZE (mode); + + if (DARWIN_SECTION_ANCHORS + && flag_section_anchors + && zsize) + return darwin_sections[zobj_const_section]; + + if (flag_merge_constants + && mode != VOIDmode + && mode != BLKmode + && modesize <= align + && align >= 8 + && align <= 256 + && (align & (align -1)) == 0) + { + tree size = TYPE_SIZE_UNIT (TREE_TYPE (exp)); + + if (TREE_CODE (size) == INTEGER_CST + && TREE_INT_CST_LOW (size) == 4 + && TREE_INT_CST_HIGH (size) == 0) + return darwin_sections[literal4_section]; + else if (TREE_CODE (size) == INTEGER_CST + && TREE_INT_CST_LOW (size) == 8 + && TREE_INT_CST_HIGH (size) == 0) + return darwin_sections[literal8_section]; + else if (HAVE_GAS_LITERAL16 + && TARGET_64BIT + && TREE_CODE (size) == INTEGER_CST + && TREE_INT_CST_LOW (size) == 16 + && TREE_INT_CST_HIGH (size) == 0) + return darwin_sections[literal16_section]; + else + return readonly_data_section; + } + + return readonly_data_section; +} + +section * +darwin_tm_clone_table_section (void) +{ + return get_named_section (NULL, + "__DATA,__tm_clone_table,regular,no_dead_strip", + 3); +} + +int +machopic_reloc_rw_mask (void) +{ + return MACHOPIC_INDIRECT ? 3 : 0; +} + +/* We have to deal with ObjC/C++ metadata section placement in the common + code, since it will also be called from LTO. + + Return metadata attributes, if present (searching for ABI=2 first) + Return NULL_TREE if no such attributes are found. */ + +static tree +is_objc_metadata (tree decl) +{ + if (DECL_P (decl) + && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) + && DECL_ATTRIBUTES (decl)) + { + tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl)); + if (meta) + return meta; + meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl)); + if (meta) + return meta; + } + return NULL_TREE; +} + +static int classes_seen; +static int objc_metadata_seen; + +/* Return the section required for Objective C ABI 2 metadata. */ +static section * +darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base) +{ + const char *p; + tree ident = TREE_VALUE (meta); + gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE); + p = IDENTIFIER_POINTER (ident); + + gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi == 2); + + objc_metadata_seen = 1; + + if (base == data_section) + base = darwin_sections[objc2_metadata_section]; + + /* Most of the OBJC2 META-data end up in the base section, so check it + first. */ + if (!strncmp (p, "V2_BASE", 7)) + return base; + else if (!strncmp (p, "V2_STRG", 7)) + return darwin_sections[cstring_section]; + + else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7)) + return darwin_sections[objc2_classdefs_section]; + else if (!strncmp (p, "V2_MREF", 7)) + return darwin_sections[objc2_message_refs_section]; + else if (!strncmp (p, "V2_CLRF", 7)) + return darwin_sections[objc2_classrefs_section]; + else if (!strncmp (p, "V2_SURF", 7)) + return darwin_sections[objc2_super_classrefs_section]; + else if (!strncmp (p, "V2_NLCL", 7)) + return darwin_sections[objc2_nonlazy_class_section]; + else if (!strncmp (p, "V2_CLAB", 7)) + { + classes_seen = 1; + return darwin_sections[objc2_classlist_section]; + } + else if (!strncmp (p, "V2_SRFS", 7)) + return darwin_sections[objc2_selector_refs_section]; + else if (!strncmp (p, "V2_NLCA", 7)) + return darwin_sections[objc2_nonlazy_category_section]; + else if (!strncmp (p, "V2_CALA", 7)) + return darwin_sections[objc2_categorylist_section]; + + else if (!strncmp (p, "V2_PLST", 7)) + return darwin_sections[objc2_protocollist_section]; + else if (!strncmp (p, "V2_PRFS", 7)) + return darwin_sections[objc2_protocolrefs_section]; + + else if (!strncmp (p, "V2_INFO", 7)) + return darwin_sections[objc2_image_info_section]; + + else if (!strncmp (p, "V2_EHTY", 7)) + return darwin_sections[data_coal_section]; + + else if (!strncmp (p, "V2_CSTR", 7)) + return darwin_sections[objc2_constant_string_object_section]; + + /* Not recognized, default. */ + return base; +} + +/* Return the section required for Objective C ABI 0/1 metadata. */ +static section * +darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base) +{ + const char *p; + tree ident = TREE_VALUE (meta); + gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE); + p = IDENTIFIER_POINTER (ident); + + gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi < 2); + + objc_metadata_seen = 1; + + /* String sections first, cos there are lots of strings. */ + if (!strncmp (p, "V1_STRG", 7)) + return darwin_sections[cstring_section]; + else if (!strncmp (p, "V1_CLSN", 7)) + return darwin_sections[objc_class_names_section]; + else if (!strncmp (p, "V1_METN", 7)) + return darwin_sections[objc_meth_var_names_section]; + else if (!strncmp (p, "V1_METT", 7)) + return darwin_sections[objc_meth_var_types_section]; + + else if (!strncmp (p, "V1_CLAS", 7)) + { + classes_seen = 1; + return darwin_sections[objc_class_section]; + } + else if (!strncmp (p, "V1_META", 7)) + return darwin_sections[objc_meta_class_section]; + else if (!strncmp (p, "V1_CATG", 7)) + return darwin_sections[objc_category_section]; + else if (!strncmp (p, "V1_PROT", 7)) + return darwin_sections[objc_protocol_section]; + + else if (!strncmp (p, "V1_CLCV", 7)) + return darwin_sections[objc_class_vars_section]; + else if (!strncmp (p, "V1_CLIV", 7)) + return darwin_sections[objc_instance_vars_section]; + + else if (!strncmp (p, "V1_CLCM", 7)) + return darwin_sections[objc_cls_meth_section]; + else if (!strncmp (p, "V1_CLIM", 7)) + return darwin_sections[objc_inst_meth_section]; + else if (!strncmp (p, "V1_CACM", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (p, "V1_CAIM", 7)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (p, "V1_PNSM", 7)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (p, "V1_PCLM", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + + else if (!strncmp (p, "V1_CLPR", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (p, "V1_CAPR", 7)) + return darwin_sections[objc_category_section]; /* ??? CHECK me. */ + + else if (!strncmp (p, "V1_PRFS", 7)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (p, "V1_CLRF", 7)) + return darwin_sections[objc_cls_refs_section]; + else if (!strncmp (p, "V1_SRFS", 7)) + return darwin_sections[objc_selector_refs_section]; + + else if (!strncmp (p, "V1_MODU", 7)) + return darwin_sections[objc_module_info_section]; + else if (!strncmp (p, "V1_SYMT", 7)) + return darwin_sections[objc_symbols_section]; + else if (!strncmp (p, "V1_INFO", 7)) + return darwin_sections[objc_image_info_section]; + + else if (!strncmp (p, "V1_PLST", 7)) + return darwin_sections[objc1_prop_list_section]; + else if (!strncmp (p, "V1_PEXT", 7)) + return darwin_sections[objc1_protocol_ext_section]; + else if (!strncmp (p, "V1_CEXT", 7)) + return darwin_sections[objc1_class_ext_section]; + + else if (!strncmp (p, "V2_CSTR", 7)) + return darwin_sections[objc_constant_string_object_section]; + + return base; +} + +section * +machopic_select_section (tree decl, + int reloc, + unsigned HOST_WIDE_INT align) +{ + bool zsize, one, weak, ro; + section *base_section = NULL; + + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", DECL_ATTRIBUTES (decl))); + + zsize = (DECL_P (decl) + && (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == CONST_DECL) + && tree_to_uhwi (DECL_SIZE_UNIT (decl)) == 0); + + one = DECL_P (decl) + && TREE_CODE (decl) == VAR_DECL + && DECL_ONE_ONLY (decl); + + ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ; + + switch (categorize_decl_for_section (decl, reloc)) + { + case SECCAT_TEXT: + gcc_unreachable (); + break; + + case SECCAT_RODATA: + case SECCAT_SRODATA: + base_section = darwin_rodata_section (weak, zsize); + break; + + case SECCAT_RODATA_MERGE_STR: + base_section = darwin_mergeable_string_section (decl, align); + break; + + case SECCAT_RODATA_MERGE_STR_INIT: + base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align); + break; + + case SECCAT_RODATA_MERGE_CONST: + base_section = darwin_mergeable_constant_section (decl, align, zsize); + break; + + case SECCAT_DATA: + case SECCAT_DATA_REL: + case SECCAT_DATA_REL_LOCAL: + case SECCAT_DATA_REL_RO: + case SECCAT_DATA_REL_RO_LOCAL: + case SECCAT_SDATA: + case SECCAT_TDATA: + if (weak || one) + { + if (ro) + base_section = darwin_sections[const_data_coal_section]; + else + base_section = darwin_sections[data_coal_section]; + } + else if (DARWIN_SECTION_ANCHORS + && flag_section_anchors + && zsize) + { + /* If we're doing section anchors, then punt zero-sized objects into + their own sections so that they don't interfere with offset + computation for the remaining vars. This does not need to be done + for stuff in mergeable sections, since these are ineligible for + anchors. */ + if (ro) + base_section = darwin_sections[zobj_const_data_section]; + else + base_section = darwin_sections[zobj_data_section]; + } + else if (ro) + base_section = darwin_sections[const_data_section]; + else + base_section = data_section; + break; + case SECCAT_BSS: + case SECCAT_SBSS: + case SECCAT_TBSS: + if (weak || one) + base_section = darwin_sections[data_coal_section]; + else + { + if (!TREE_PUBLIC (decl)) + base_section = lcomm_section; + else if (bss_noswitch_section) + base_section = bss_noswitch_section; + else + base_section = data_section; + } + break; + + default: + gcc_unreachable (); + } + + /* Darwin weird special cases. + a) OBJC Meta-data. */ + if (DECL_P (decl) + && (TREE_CODE (decl) == VAR_DECL + || TREE_CODE (decl) == CONST_DECL) + && DECL_ATTRIBUTES (decl)) + { + tree meta = lookup_attribute ("OBJC2META", DECL_ATTRIBUTES (decl)); + if (meta) + return darwin_objc2_section (decl, meta, base_section); + meta = lookup_attribute ("OBJC1META", DECL_ATTRIBUTES (decl)); + if (meta) + return darwin_objc1_section (decl, meta, base_section); + meta = lookup_attribute ("OBJC1METG", DECL_ATTRIBUTES (decl)); + if (meta) + return base_section; /* GNU runtime is happy with it all in one pot. */ + } + + /* b) Constant string objects. */ + if (TREE_CODE (decl) == CONSTRUCTOR + && TREE_TYPE (decl) + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE + && TYPE_NAME (TREE_TYPE (decl))) + { + tree name = TYPE_NAME (TREE_TYPE (decl)); + if (TREE_CODE (name) == TYPE_DECL) + name = DECL_NAME (name); + + if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_ObjCString")) + { + if (flag_next_runtime) + { + if (flag_objc_abi == 2) + return darwin_sections[objc2_constant_string_object_section]; + else + return darwin_sections[objc_constant_string_object_section]; + } + else + return darwin_sections[objc_string_object_section]; + } + else if (!strcmp (IDENTIFIER_POINTER (name), "__builtin_CFString")) + return darwin_sections[cfstring_constant_object_section]; + else + return base_section; + } + /* c) legacy meta-data selection. */ + else if (TREE_CODE (decl) == VAR_DECL + && DECL_NAME (decl) + && TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE + && IDENTIFIER_POINTER (DECL_NAME (decl)) + && flag_next_runtime + && !strncmp (IDENTIFIER_POINTER (DECL_NAME (decl)), "_OBJC_", 6)) + { + const char *name = IDENTIFIER_POINTER (DECL_NAME (decl)); + static bool warned_objc_46 = false; + /* We shall assert that zero-sized objects are an error in ObjC + meta-data. */ + gcc_assert (tree_to_uhwi (DECL_SIZE_UNIT (decl)) != 0); + + /* ??? This mechanism for determining the metadata section is + broken when LTO is in use, since the frontend that generated + the data is not identified. We will keep the capability for + the short term - in case any non-Objective-C programs are using + it to place data in specified sections. */ + if (!warned_objc_46) + { + location_t loc = DECL_SOURCE_LOCATION (decl); + warning_at (loc, 0, "the use of _OBJC_-prefixed variable names" + " to select meta-data sections is deprecated at 4.6" + " and will be removed in 4.7"); + warned_objc_46 = true; + } + + if (!strncmp (name, "_OBJC_CLASS_METHODS_", 20)) + return darwin_sections[objc_cls_meth_section]; + else if (!strncmp (name, "_OBJC_INSTANCE_METHODS_", 23)) + return darwin_sections[objc_inst_meth_section]; + else if (!strncmp (name, "_OBJC_CATEGORY_CLASS_METHODS_", 29)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_CATEGORY_INSTANCE_METHODS_", 32)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (name, "_OBJC_CLASS_VARIABLES_", 22)) + return darwin_sections[objc_class_vars_section]; + else if (!strncmp (name, "_OBJC_INSTANCE_VARIABLES_", 25)) + return darwin_sections[objc_instance_vars_section]; + else if (!strncmp (name, "_OBJC_CLASS_PROTOCOLS_", 22)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_CLASS_NAME_", 17)) + return darwin_sections[objc_class_names_section]; + else if (!strncmp (name, "_OBJC_METH_VAR_NAME_", 20)) + return darwin_sections[objc_meth_var_names_section]; + else if (!strncmp (name, "_OBJC_METH_VAR_TYPE_", 20)) + return darwin_sections[objc_meth_var_types_section]; + else if (!strncmp (name, "_OBJC_CLASS_REFERENCES", 22)) + return darwin_sections[objc_cls_refs_section]; + else if (!strncmp (name, "_OBJC_CLASS_", 12)) + return darwin_sections[objc_class_section]; + else if (!strncmp (name, "_OBJC_METACLASS_", 16)) + return darwin_sections[objc_meta_class_section]; + else if (!strncmp (name, "_OBJC_CATEGORY_", 15)) + return darwin_sections[objc_category_section]; + else if (!strncmp (name, "_OBJC_SELECTOR_REFERENCES", 25)) + return darwin_sections[objc_selector_refs_section]; + else if (!strncmp (name, "_OBJC_SELECTOR_FIXUP", 20)) + return darwin_sections[objc_selector_fixup_section]; + else if (!strncmp (name, "_OBJC_SYMBOLS", 13)) + return darwin_sections[objc_symbols_section]; + else if (!strncmp (name, "_OBJC_MODULES", 13)) + return darwin_sections[objc_module_info_section]; + else if (!strncmp (name, "_OBJC_IMAGE_INFO", 16)) + return darwin_sections[objc_image_info_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_INSTANCE_METHODS_", 32)) + return darwin_sections[objc_cat_inst_meth_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_CLASS_METHODS_", 29)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_REFS_", 20)) + return darwin_sections[objc_cat_cls_meth_section]; + else if (!strncmp (name, "_OBJC_PROTOCOL_", 15)) + return darwin_sections[objc_protocol_section]; + else + return base_section; + } + + return base_section; +} + +/* This can be called with address expressions as "rtx". + They must go in "const". */ + +section * +machopic_select_rtx_section (enum machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) +{ + if (GET_MODE_SIZE (mode) == 8 + && (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE)) + return darwin_sections[literal8_section]; + else if (GET_MODE_SIZE (mode) == 4 + && (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE)) + return darwin_sections[literal4_section]; + else if (HAVE_GAS_LITERAL16 + && TARGET_64BIT + && GET_MODE_SIZE (mode) == 16 + && (GET_CODE (x) == CONST_INT + || GET_CODE (x) == CONST_DOUBLE + || GET_CODE (x) == CONST_VECTOR)) + return darwin_sections[literal16_section]; + else if (MACHOPIC_INDIRECT + && (GET_CODE (x) == SYMBOL_REF + || GET_CODE (x) == CONST + || GET_CODE (x) == LABEL_REF)) + return darwin_sections[const_data_section]; + else + return darwin_sections[const_section]; +} + +void +machopic_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + cdtor_record new_elt = {symbol, priority, vec_safe_length (ctors)}; + + vec_safe_push (ctors, new_elt); + + if (! MACHOPIC_INDIRECT) + fprintf (asm_out_file, ".reference .constructors_used\n"); +} + +void +machopic_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED) +{ + cdtor_record new_elt = {symbol, priority, vec_safe_length (dtors)}; + + vec_safe_push (dtors, new_elt); + + if (! MACHOPIC_INDIRECT) + fprintf (asm_out_file, ".reference .destructors_used\n"); +} + +static int +sort_cdtor_records (const void * a, const void * b) +{ + const cdtor_record *cda = (const cdtor_record *)a; + const cdtor_record *cdb = (const cdtor_record *)b; + if (cda->priority > cdb->priority) + return 1; + if (cda->priority < cdb->priority) + return -1; + if (cda->position > cdb->position) + return 1; + if (cda->position < cdb->position) + return -1; + return 0; +} + +static void +finalize_ctors () +{ + unsigned int i; + cdtor_record *elt; + + if (MACHOPIC_INDIRECT) + switch_to_section (darwin_sections[mod_init_section]); + else + switch_to_section (darwin_sections[constructor_section]); + + if (vec_safe_length (ctors) > 1) + ctors->qsort (sort_cdtor_records); + FOR_EACH_VEC_SAFE_ELT (ctors, i, elt) + { + assemble_align (POINTER_SIZE); + assemble_integer (elt->symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); + } +} + +static void +finalize_dtors () +{ + unsigned int i; + cdtor_record *elt; + + if (MACHOPIC_INDIRECT) + switch_to_section (darwin_sections[mod_term_section]); + else + switch_to_section (darwin_sections[destructor_section]); + + if (vec_safe_length (dtors) > 1) + dtors->qsort (sort_cdtor_records); + FOR_EACH_VEC_SAFE_ELT (dtors, i, elt) + { + assemble_align (POINTER_SIZE); + assemble_integer (elt->symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); + } +} + +void +darwin_globalize_label (FILE *stream, const char *name) +{ + if (!!strncmp (name, "_OBJC_", 6)) + default_globalize_label (stream, name); +} + +/* This routine returns non-zero if 'name' starts with the special objective-c + anonymous file-scope static name. It accommodates c++'s mangling of such + symbols (in this case the symbols will have form _ZL{d}*_OBJC_* d=digit). */ + +int +darwin_label_is_anonymous_local_objc_name (const char *name) +{ + const unsigned char *p = (const unsigned char *) name; + if (*p != '_') + return 0; + if (p[1] == 'Z' && p[2] == 'L') + { + p += 3; + while (*p >= '0' && *p <= '9') + p++; + } + return (!strncmp ((const char *)p, "_OBJC_", 6)); +} + +/* LTO support for Mach-O. + + This version uses three mach-o sections to encapsulate the (unlimited + number of) lto sections. + + __GNU_LTO, __lto_sections contains the concatented GNU LTO section data. + __GNU_LTO, __section_names contains the GNU LTO section names. + __GNU_LTO, __section_index contains an array of values that index these. + + Indexed thus: +
, +
+ . + + At present, for both m32 and m64 mach-o files each of these fields is + represented by a uint32_t. This is because, AFAICT, a mach-o object + cannot exceed 4Gb because the section_64 offset field (see below) is 32bits. + + uint32_t offset; + "offset An integer specifying the offset to this section in the file." */ + +/* Count lto section numbers. */ +static unsigned int lto_section_num = 0; + +/* A vector of information about LTO sections, at present, we only have + the name. TODO: see if we can get the data length somehow. */ +typedef struct GTY (()) darwin_lto_section_e { + const char *sectname; +} darwin_lto_section_e ; + +static GTY (()) vec *lto_section_names; + +/* Segment for LTO data. */ +#define LTO_SEGMENT_NAME "__GNU_LTO" + +/* Section wrapper scheme (used here to wrap the unlimited number of LTO + sections into three Mach-O ones). + NOTE: These names MUST be kept in sync with those in + libiberty/simple-object-mach-o. */ +#define LTO_SECTS_SECTION "__wrapper_sects" +#define LTO_NAMES_SECTION "__wrapper_names" +#define LTO_INDEX_SECTION "__wrapper_index" + +/* File to temporarily store LTO data. This is appended to asm_out_file + in darwin_end_file. */ +static FILE *lto_asm_out_file, *saved_asm_out_file; +static char *lto_asm_out_name; + +/* Prepare asm_out_file for LTO output. For darwin, this means hiding + asm_out_file and switching to an alternative output file. */ +void +darwin_asm_lto_start (void) +{ + gcc_assert (! saved_asm_out_file); + saved_asm_out_file = asm_out_file; + if (! lto_asm_out_name) + lto_asm_out_name = make_temp_file (".lto.s"); + lto_asm_out_file = fopen (lto_asm_out_name, "a"); + if (lto_asm_out_file == NULL) + fatal_error ("failed to open temporary file %s for LTO output", + lto_asm_out_name); + asm_out_file = lto_asm_out_file; +} + +/* Restore asm_out_file. */ +void +darwin_asm_lto_end (void) +{ + gcc_assert (saved_asm_out_file); + fclose (lto_asm_out_file); + asm_out_file = saved_asm_out_file; + saved_asm_out_file = NULL; +} + +static void +darwin_asm_dwarf_section (const char *name, unsigned int flags, tree decl); + +/* Called for the TARGET_ASM_NAMED_SECTION hook. */ + +void +darwin_asm_named_section (const char *name, + unsigned int flags, + tree decl ATTRIBUTE_UNUSED) +{ + /* LTO sections go in a special section that encapsulates the (unlimited) + number of GNU LTO sections within a single mach-o one. */ + if (strncmp (name, LTO_SECTION_NAME_PREFIX, + strlen (LTO_SECTION_NAME_PREFIX)) == 0) + { + darwin_lto_section_e e; + /* We expect certain flags to be set... */ + gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED)) + == (SECTION_DEBUG | SECTION_NAMED)); + + /* Switch to our combined section. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_SECTS_SECTION); + /* Output a label for the start of this sub-section. */ + fprintf (asm_out_file, "L_GNU_LTO%d:\t;# %s\n", + lto_section_num, name); + /* We have to jump through hoops to get the values of the intra-section + offsets... */ + fprintf (asm_out_file, "\t.set L$gnu$lto$offs%d,L_GNU_LTO%d-L_GNU_LTO0\n", + lto_section_num, lto_section_num); + fprintf (asm_out_file, + "\t.set L$gnu$lto$size%d,L_GNU_LTO%d-L_GNU_LTO%d\n", + lto_section_num, lto_section_num+1, lto_section_num); + lto_section_num++; + e.sectname = xstrdup (name); + /* Keep the names, we'll need to make a table later. + TODO: check that we do not revisit sections, that would break + the assumption of how this is done. */ + if (lto_section_names == NULL) + vec_alloc (lto_section_names, 16); + vec_safe_push (lto_section_names, e); + } + else if (strncmp (name, "__DWARF,", 8) == 0) + darwin_asm_dwarf_section (name, flags, decl); + else + fprintf (asm_out_file, "\t.section %s\n", name); +} + +void +darwin_unique_section (tree decl ATTRIBUTE_UNUSED, int reloc ATTRIBUTE_UNUSED) +{ + /* Darwin does not use unique sections. */ +} + +/* Handle __attribute__ ((apple_kext_compatibility)). + This only applies to darwin kexts for 2.95 compatibility -- it shrinks the + vtable for classes with this attribute (and their descendants) by not + outputting the new 3.0 nondeleting destructor. This means that such + objects CANNOT be allocated on the stack or as globals UNLESS they have + a completely empty `operator delete'. + Luckily, this fits in with the Darwin kext model. + + This attribute also disables gcc3's potential overlaying of derived + class data members on the padding at the end of the base class. */ + +tree +darwin_handle_kext_attribute (tree *node, tree name, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + /* APPLE KEXT stuff -- only applies with pure static C++ code. */ + if (! TARGET_KEXTABI) + { + warning (0, "%qE 2.95 vtable-compatibility attribute applies " + "only when compiling a kext", name); + + *no_add_attrs = true; + } + else if (TREE_CODE (*node) != RECORD_TYPE) + { + warning (0, "%qE 2.95 vtable-compatibility attribute applies " + "only to C++ classes", name); + + *no_add_attrs = true; + } + + return NULL_TREE; +} + +/* Handle a "weak_import" attribute; arguments as in + struct attribute_spec.handler. */ + +tree +darwin_handle_weak_import_attribute (tree *node, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool * no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_DECL && TREE_CODE (*node) != VAR_DECL) + { + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + } + else + declare_weak (*node); + + return NULL_TREE; +} + +/* Emit a label for an FDE, making it global and/or weak if appropriate. + The third parameter is nonzero if this is for exception handling. + The fourth parameter is nonzero if this is just a placeholder for an + FDE that we are omitting. */ + +void +darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty) +{ + char *lab ; + char buf[32]; + static int invok_count = 0; + static tree last_fun_decl = NULL_TREE; + + /* We use the linker to emit the .eh labels for Darwin 9 and above. */ + if (! for_eh || generating_for_darwin_version >= 9) + return; + + /* FIXME: This only works when the eh for all sections of a function is + emitted at the same time. If that changes, we would need to use a lookup + table of some form to determine what to do. Also, we should emit the + unadorned label for the partition containing the public label for a + function. This is of limited use, probably, since we do not currently + enable partitioning. */ + strcpy (buf, ".eh"); + if (decl && TREE_CODE (decl) == FUNCTION_DECL) + { + if (decl == last_fun_decl) + { + invok_count++; + snprintf (buf, 31, "$$part$$%d.eh", invok_count); + } + else + { + last_fun_decl = decl; + invok_count = 0; + } + } + + lab = concat (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), buf, NULL); + + if (TREE_PUBLIC (decl)) + { + targetm.asm_out.globalize_label (file, lab); + if (DECL_VISIBILITY (decl) == VISIBILITY_HIDDEN) + { + fputs ("\t.private_extern ", file); + assemble_name (file, lab); + fputc ('\n', file); + } + } + + if (DECL_WEAK (decl)) + { + fputs ("\t.weak_definition ", file); + assemble_name (file, lab); + fputc ('\n', file); + } + + assemble_name (file, lab); + if (empty) + { + fputs (" = 0\n", file); + + /* Mark the absolute .eh and .eh1 style labels as needed to + ensure that we don't dead code strip them and keep such + labels from another instantiation point until we can fix this + properly with group comdat support. */ + darwin_mark_decl_preserved (lab); + } + else + fputs (":\n", file); + + free (lab); +} + +static GTY(()) unsigned long except_table_label_num; + +void +darwin_emit_except_table_label (FILE *file) +{ + char section_start_label[30]; + + ASM_GENERATE_INTERNAL_LABEL (section_start_label, "GCC_except_table", + except_table_label_num++); + ASM_OUTPUT_LABEL (file, section_start_label); +} +/* Generate a PC-relative reference to a Mach-O non-lazy-symbol. */ + +void +darwin_non_lazy_pcrel (FILE *file, rtx addr) +{ + const char *nlp_name; + + gcc_assert (GET_CODE (addr) == SYMBOL_REF); + + nlp_name = machopic_indirection_name (addr, /*stub_p=*/false); + fputs ("\t.long\t", file); + ASM_OUTPUT_LABELREF (file, nlp_name); + fputs ("-.", file); +} + +/* If this is uncommented, details of each allocation will be printed + in the asm right before the actual code. WARNING - this will cause some + test-suite fails (since the printout will contain items that some tests + are not expecting) -- so don't leave it on by default (it bloats the + asm too). */ +/*#define DEBUG_DARWIN_MEM_ALLOCATORS*/ + +/* The first two of these routines are ostensibly just intended to put + names into the asm. However, they are both hijacked in order to ensure + that zero-sized items do not make their way into the output. Consequently, + we also need to make these participate in provisions for dealing with + such items in section anchors. */ + +/* The implementation of ASM_DECLARE_OBJECT_NAME. */ +/* The RTTI data (e.g., __ti4name) is common and public (and static), + but it does need to be referenced via indirect PIC data pointers. + The machopic_define_symbol calls are telling the machopic subsystem + that the name *is* defined in this module, so it doesn't need to + make them indirect. */ +void +darwin_asm_declare_object_name (FILE *file, + const char *nam, tree decl) +{ + const char *xname = nam; + unsigned HOST_WIDE_INT size; + bool local_def, weak; + + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + + local_def = DECL_INITIAL (decl) || (TREE_STATIC (decl) + && (!DECL_COMMON (decl) + || !TREE_PUBLIC (decl))); + + if (GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) + xname = IDENTIFIER_POINTER (DECL_NAME (decl)); + + if (local_def) + { + (* targetm.encode_section_info) (decl, DECL_RTL (decl), false); + if (!weak) + machopic_define_symbol (DECL_RTL (decl)); + } + + size = tree_to_uhwi (DECL_SIZE_UNIT (decl)); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d" + " stat %d com %d pub %d t-const %d t-ro %d init %lx\n", + xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"), + (unsigned long long)size, DECL_ALIGN (decl), local_def, + DECL_WEAK (decl), TREE_STATIC (decl), DECL_COMMON (decl), + TREE_PUBLIC (decl), TREE_CONSTANT (decl), TREE_READONLY (decl), + (unsigned long)DECL_INITIAL (decl)); +#endif + + /* Darwin needs help to support local zero-sized objects. + They must be made at least one byte, and the section containing must be + marked as unsuitable for section-anchors (see storage allocators below). + + For non-zero objects this output is handled by varasm.c. + */ + if (!size) + { + unsigned int l2align = 0; + + /* The align must be honored, even for zero-sized. */ + if (DECL_ALIGN (decl)) + { + l2align = floor_log2 (DECL_ALIGN (decl) / BITS_PER_UNIT); + fprintf (file, "\t.align\t%u\n", l2align); + } + + ASM_OUTPUT_LABEL (file, xname); + size = 1; + fprintf (file, "\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + + /* Check that we've correctly picked up the zero-sized item and placed it + properly. */ + gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors) + || (in_section + && (in_section->common.flags & SECTION_NO_ANCHOR))); + } + else + ASM_OUTPUT_LABEL (file, xname); +} + +/* The implementation of ASM_DECLARE_CONSTANT_NAME. */ +void +darwin_asm_declare_constant_name (FILE *file, const char *name, + const_tree exp ATTRIBUTE_UNUSED, + HOST_WIDE_INT size) +{ + assemble_label (file, name); + /* As for other items, we need at least one byte. */ + if (!size) + { + fputs ("\t.space\t1\n", file); + /* Check that we've correctly picked up the zero-sized item and placed it + properly. */ + gcc_assert ((!DARWIN_SECTION_ANCHORS || !flag_section_anchors) + || (in_section + && (in_section->common.flags & SECTION_NO_ANCHOR))); + } +} + +/* Darwin storage allocators. + + Zerofill sections are desirable for large blank data since, otherwise, these + data bloat objects (PR33210). + + However, section anchors don't work in .zerofill sections (one cannot switch + to a zerofill section). Ergo, for Darwin targets using section anchors we need + to put (at least some) data into 'normal' switchable sections. + + Here we set a relatively arbitrary value for the size of an object to trigger + zerofill when section anchors are enabled (anything bigger than a page for + current Darwin implementations). FIXME: there ought to be some objective way + to make this choice. + + When section anchor are off this is ignored anyway. */ + +#define BYTES_ZFILL 4096 + +/* Emit a chunk of data for items coalesced by the linker. */ +static void +darwin_emit_weak_or_comdat (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + /* Since the sections used here are coalesed, they will not be eligible + for section anchors, and therefore we don't need to break that out. */ + if (TREE_READONLY (decl) || TREE_CONSTANT (decl)) + switch_to_section (darwin_sections[const_data_coal_section]); + else + switch_to_section (darwin_sections[data_coal_section]); + + /* To be consistent, we'll allow darwin_asm_declare_object_name to assemble + the align info for zero-sized items... but do it here otherwise. */ + if (size && align) + fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT)); + + if (TREE_PUBLIC (decl)) + darwin_globalize_label (fp, name); + + /* ... and we let it deal with outputting one byte of zero for them too. */ + darwin_asm_declare_object_name (fp, name, decl); + if (size) + assemble_zeros (size); +} + +/* Emit a chunk of data for ObjC meta-data that got placed in BSS erroneously. */ +static void +darwin_emit_objc_zeroed (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align, tree meta) +{ + section *ocs = data_section; + + if (TREE_PURPOSE (meta) == get_identifier("OBJC2META")) + ocs = darwin_objc2_section (decl, meta, ocs); + else + ocs = darwin_objc1_section (decl, meta, ocs); + + switch_to_section (ocs); + + /* We shall declare that zero-sized meta-data are not valid (yet). */ + gcc_assert (size); + fprintf (fp, "\t.align\t%d\n", floor_log2 (align / BITS_PER_UNIT)); + + /* ... and we let it deal with outputting one byte of zero for them too. */ + darwin_asm_declare_object_name (fp, name, decl); + assemble_zeros (size); +} + +/* This routine emits 'local' storage: + + When Section Anchors are off this routine emits .zerofill commands in + sections named for their alignment. + + When Section Anchors are on, smaller (non-zero-sized) items are placed in + the .static_data section so that the section anchoring system can see them. + Larger items are still placed in .zerofill sections, addressing PR33210. + The routine has no checking - it is all assumed to be done by the caller. +*/ +static void +darwin_emit_local_bss (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int l2align) +{ + /* FIXME: We have a fudge to make this work with Java even when the target does + not use sections anchors -- Java seems to need at least one small item in a + non-zerofill segment. */ + if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) + || (size && size <= 2)) + { + /* Put smaller objects in _static_data, where the section anchors system + can get them. + However, if they are zero-sized punt them to yet a different section + (that is not allowed to participate in anchoring). */ + if (!size) + { + fputs ("\t.section\t__DATA,__zobj_bss\n", fp); + in_section = darwin_sections[zobj_bss_section]; + size = 1; + } + else + { + fputs ("\t.static_data\n", fp); + in_section = darwin_sections[static_data_section]; + } + + if (l2align) + fprintf (fp, "\t.align\t%u\n", l2align); + + assemble_name (fp, name); + fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + else + { + /* When we are on a non-section anchor target, we can get zero-sized + items here. However, all we need to do is to bump them to one byte + and the section alignment will take care of the rest. */ + char secnam[64]; + unsigned int flags ; + snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"), + (unsigned) l2align); + /* We can't anchor (yet, if ever) in zerofill sections, because we can't + switch to them and emit a label. */ + flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR; + in_section = get_section (secnam, flags, NULL); + fprintf (fp, "\t.zerofill %s,", secnam); + assemble_name (fp, name); + if (!size) + size = 1; + + if (l2align) + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", + size, (unsigned) l2align); + else + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + + (*targetm.encode_section_info) (decl, DECL_RTL (decl), false); + /* This is defined as a file-scope var, so we know to notify machopic. */ + machopic_define_symbol (DECL_RTL (decl)); +} + +/* Emit a chunk of common. */ +static void +darwin_emit_common (FILE *fp, const char *name, + unsigned HOST_WIDE_INT size, unsigned int align) +{ + unsigned HOST_WIDE_INT rounded; + unsigned int l2align; + + /* Earlier systems complain if the alignment exceeds the page size. + The magic number is 4096 * 8 - hard-coded for legacy systems. */ + if (!emit_aligned_common && (align > 32768UL)) + align = 4096UL; /* In units. */ + else + align /= BITS_PER_UNIT; + + /* Make sure we have a meaningful align. */ + if (!align) + align = 1; + + /* For earlier toolchains, we need to emit the var as a rounded size to + tell ld the alignment. */ + if (size < align) + rounded = align; + else + rounded = (size + (align-1)) & ~(align-1); + + l2align = floor_log2 (align); + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + in_section = comm_section; + /* We mustn't allow multiple public symbols to share an address when using + the normal OSX toolchain. */ + if (!size) + { + /* Put at least one byte. */ + size = 1; + /* This section can no longer participate in section anchoring. */ + comm_section->common.flags |= SECTION_NO_ANCHOR; + } + + fputs ("\t.comm\t", fp); + assemble_name (fp, name); + fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED, + emit_aligned_common?size:rounded); + if (l2align && emit_aligned_common) + fprintf (fp, ",%u", l2align); + fputs ("\n", fp); +} + +/* Output a var which is all zero - into aligned BSS sections, common, lcomm + or coalescable data sections (for weak or comdat) as appropriate. */ + +void +darwin_output_aligned_bss (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, unsigned int align) +{ + unsigned int l2align; + bool one, pub, weak; + tree meta; + + pub = TREE_PUBLIC (decl); + one = DECL_ONE_ONLY (decl); + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d" + " pub %d weak %d one %d init %lx\n", + name, (long long)size, (int)align, TREE_READONLY (decl), + TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl), + pub, weak, one, (unsigned long)DECL_INITIAL (decl)); +#endif + + /* ObjC metadata can get put in BSS because varasm.c decides it's BSS + before the target has a chance to comment. */ + if ((meta = is_objc_metadata (decl))) + { + darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta); + return; + } + + /* Check that any initializer is valid. */ + gcc_assert ((DECL_INITIAL (decl) == NULL) + || (DECL_INITIAL (decl) == error_mark_node) + || initializer_zerop (DECL_INITIAL (decl))); + + gcc_assert (DECL_SECTION_NAME (decl) == NULL); + gcc_assert (!DECL_COMMON (decl)); + + /* Pick up the correct alignment. */ + if (!size || !align) + align = DECL_ALIGN (decl); + + l2align = floor_log2 (align / BITS_PER_UNIT); + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + last_assemble_variable_decl = decl; + + /* We would rather not have to check this here - but it seems that we might + be passed a decl that should be in coalesced space. */ + if (one || weak) + { + /* Weak or COMDAT objects are put in mergeable sections. */ + darwin_emit_weak_or_comdat (fp, decl, name, size, + DECL_ALIGN (decl)); + return; + } + + /* If this is not public, then emit according to local rules. */ + if (!pub) + { + darwin_emit_local_bss (fp, decl, name, size, l2align); + return; + } + + /* So we have a public symbol (small item fudge for Java, see above). */ + if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL) + || (size && size <= 2)) + { + /* Put smaller objects in data, where the section anchors system can get + them. However, if they are zero-sized punt them to yet a different + section (that is not allowed to participate in anchoring). */ + if (!size) + { + fputs ("\t.section\t__DATA,__zobj_data\n", fp); + in_section = darwin_sections[zobj_data_section]; + size = 1; + } + else + { + fputs ("\t.data\n", fp); + in_section = data_section; + } + + if (l2align) + fprintf (fp, "\t.align\t%u\n", l2align); + + assemble_name (fp, name); + fprintf (fp, ":\n\t.space\t"HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + else + { + char secnam[64]; + unsigned int flags ; + /* When we are on a non-section anchor target, we can get zero-sized + items here. However, all we need to do is to bump them to one byte + and the section alignment will take care of the rest. */ + snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align); + + /* We can't anchor in zerofill sections, because we can't switch + to them and emit a label. */ + flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR; + in_section = get_section (secnam, flags, NULL); + fprintf (fp, "\t.zerofill %s,", secnam); + assemble_name (fp, name); + if (!size) + size = 1; + + if (l2align) + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align); + else + fprintf (fp, ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", size); + } + (* targetm.encode_section_info) (decl, DECL_RTL (decl), false); +} + +/* Output a chunk of common, with alignment specified (where the target + supports this). */ +void +darwin_asm_output_aligned_decl_common (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + unsigned int l2align; + bool one, weak; + tree meta; + + /* No corresponding var. */ + if (decl==NULL) + { +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# adcom: %s (%d,%d) decl=0x0\n", name, (int)size, (int)align); +#endif + darwin_emit_common (fp, name, size, align); + return; + } + + one = DECL_ONE_ONLY (decl); + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# adcom: %s (%lld,%d) ro %d cst %d stat %d com %d pub %d" + " weak %d one %d init %lx\n", + name, (long long)size, (int)align, TREE_READONLY (decl), + TREE_CONSTANT (decl), TREE_STATIC (decl), DECL_COMMON (decl), + TREE_PUBLIC (decl), weak, one, (unsigned long)DECL_INITIAL (decl)); +#endif + + /* ObjC metadata can get put in BSS because varasm.c decides it's BSS + before the target has a chance to comment. */ + if ((meta = is_objc_metadata (decl))) + { + darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta); + return; + } + + /* We shouldn't be messing with this if the decl has a section name. */ + gcc_assert (DECL_SECTION_NAME (decl) == NULL); + + /* We would rather not have to check this here - but it seems that we might + be passed a decl that should be in coalesced space. */ + if (one || weak) + { + /* Weak or COMDAT objects are put in mergable sections. */ + darwin_emit_weak_or_comdat (fp, decl, name, size, + DECL_ALIGN (decl)); + return; + } + + /* We should only get here for DECL_COMMON, with a zero init (and, in + principle, only for public symbols too - although we deal with local + ones below). */ + + /* Check the initializer is OK. */ + gcc_assert (DECL_COMMON (decl) + && ((DECL_INITIAL (decl) == NULL) + || (DECL_INITIAL (decl) == error_mark_node) + || initializer_zerop (DECL_INITIAL (decl)))); + + last_assemble_variable_decl = decl; + + if (!size || !align) + align = DECL_ALIGN (decl); + + l2align = floor_log2 (align / BITS_PER_UNIT); + /* Check we aren't asking for more aligment than the platform allows. */ + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + if (TREE_PUBLIC (decl) != 0) + darwin_emit_common (fp, name, size, align); + else + darwin_emit_local_bss (fp, decl, name, size, l2align); +} + +/* Output a chunk of BSS with alignment specfied. */ +void +darwin_asm_output_aligned_decl_local (FILE *fp, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) +{ + unsigned long l2align; + bool one, weak; + tree meta; + + one = DECL_ONE_ONLY (decl); + weak = (DECL_P (decl) + && DECL_WEAK (decl) + && !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl))); + +#ifdef DEBUG_DARWIN_MEM_ALLOCATORS +fprintf (fp, "# adloc: %s (%lld,%d) ro %d cst %d stat %d one %d pub %d" + " weak %d init %lx\n", + name, (long long)size, (int)align, TREE_READONLY (decl), + TREE_CONSTANT (decl), TREE_STATIC (decl), one, TREE_PUBLIC (decl), + weak , (unsigned long)DECL_INITIAL (decl)); +#endif + + /* ObjC metadata can get put in BSS because varasm.c decides it's BSS + before the target has a chance to comment. */ + if ((meta = is_objc_metadata (decl))) + { + darwin_emit_objc_zeroed (fp, decl, name, size, DECL_ALIGN (decl), meta); + return; + } + + /* We shouldn't be messing with this if the decl has a section name. */ + gcc_assert (DECL_SECTION_NAME (decl) == NULL); + + /* We would rather not have to check this here - but it seems that we might + be passed a decl that should be in coalesced space. */ + if (one || weak) + { + /* Weak or COMDAT objects are put in mergable sections. */ + darwin_emit_weak_or_comdat (fp, decl, name, size, + DECL_ALIGN (decl)); + return; + } + + /* .. and it should be suitable for placement in local mem. */ + gcc_assert(!TREE_PUBLIC (decl) && !DECL_COMMON (decl)); + /* .. and any initializer must be all-zero. */ + gcc_assert ((DECL_INITIAL (decl) == NULL) + || (DECL_INITIAL (decl) == error_mark_node) + || initializer_zerop (DECL_INITIAL (decl))); + + last_assemble_variable_decl = decl; + + if (!size || !align) + align = DECL_ALIGN (decl); + + l2align = floor_log2 (align / BITS_PER_UNIT); + gcc_assert (l2align <= L2_MAX_OFILE_ALIGNMENT); + + darwin_emit_local_bss (fp, decl, name, size, l2align); +} + +/* Emit an assembler directive to set visibility for a symbol. The + only supported visibilities are VISIBILITY_DEFAULT and + VISIBILITY_HIDDEN; the latter corresponds to Darwin's "private + extern". There is no MACH-O equivalent of ELF's + VISIBILITY_INTERNAL or VISIBILITY_PROTECTED. */ + +void +darwin_assemble_visibility (tree decl, int vis) +{ + if (vis == VISIBILITY_DEFAULT) + ; + else if (vis == VISIBILITY_HIDDEN || vis == VISIBILITY_INTERNAL) + { + fputs ("\t.private_extern ", asm_out_file); + assemble_name (asm_out_file, + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); + fputs ("\n", asm_out_file); + } + else + warning (OPT_Wattributes, "protected visibility attribute " + "not supported in this configuration; ignored"); +} + +/* vec used by darwin_asm_dwarf_section. + Maybe a hash tab would be better here - but the intention is that this is + a very short list (fewer than 16 items) and each entry should (ideally, + eventually) only be presented once. + + A structure to hold a dwarf debug section used entry. */ + +typedef struct GTY(()) dwarf_sect_used_entry { + const char *name; + unsigned count; +} +dwarf_sect_used_entry; + + +/* A list of used __DWARF sections. */ +static GTY (()) vec *dwarf_sect_names_table; + +/* This is called when we are asked to assemble a named section and the + name begins with __DWARF,. We keep a list of the section names (without + the __DWARF, prefix) and use this to emit our required start label on the + first switch to each section. */ + +static void +darwin_asm_dwarf_section (const char *name, unsigned int flags, + tree ARG_UNUSED (decl)) +{ + unsigned i; + int namelen; + const char * sname; + dwarf_sect_used_entry *ref; + bool found = false; + gcc_assert ((flags & (SECTION_DEBUG | SECTION_NAMED)) + == (SECTION_DEBUG | SECTION_NAMED)); + /* We know that the name starts with __DWARF, */ + sname = name + 8; + namelen = strchr (sname, ',') - sname; + gcc_assert (namelen); + if (dwarf_sect_names_table == NULL) + vec_alloc (dwarf_sect_names_table, 16); + else + for (i = 0; + dwarf_sect_names_table->iterate (i, &ref); + i++) + { + if (!ref) + break; + if (!strcmp (ref->name, sname)) + { + found = true; + ref->count++; + break; + } + } + + fprintf (asm_out_file, "\t.section %s\n", name); + if (!found) + { + dwarf_sect_used_entry e; + fprintf (asm_out_file, "Lsection%.*s:\n", namelen, sname); + e.count = 1; + e.name = xstrdup (sname); + vec_safe_push (dwarf_sect_names_table, e); + } +} + +/* Output a difference of two labels that will be an assembly time + constant if the two labels are local. (.long lab1-lab2 will be + very different if lab1 is at the boundary between two sections; it + will be relocated according to the second section, not the first, + so one ends up with a difference between labels in different + sections, which is bad in the dwarf2 eh context for instance.) */ + +static int darwin_dwarf_label_counter; + +void +darwin_asm_output_dwarf_delta (FILE *file, int size, + const char *lab1, const char *lab2) +{ + int islocaldiff = (lab1[0] == '*' && lab1[1] == 'L' + && lab2[0] == '*' && lab2[1] == 'L'); + const char *directive = (size == 8 ? ".quad" : ".long"); + + if (islocaldiff) + fprintf (file, "\t.set L$set$%d,", darwin_dwarf_label_counter); + else + fprintf (file, "\t%s\t", directive); + + assemble_name_raw (file, lab1); + fprintf (file, "-"); + assemble_name_raw (file, lab2); + if (islocaldiff) + fprintf (file, "\n\t%s L$set$%d", directive, darwin_dwarf_label_counter++); +} + +/* Output an offset in a DWARF section on Darwin. On Darwin, DWARF section + offsets are not represented using relocs in .o files; either the + section never leaves the .o file, or the linker or other tool is + responsible for parsing the DWARF and updating the offsets. */ + +void +darwin_asm_output_dwarf_offset (FILE *file, int size, const char * lab, + section *base) +{ + char sname[64]; + int namelen; + + gcc_assert (base->common.flags & SECTION_NAMED); + gcc_assert (strncmp (base->named.name, "__DWARF,", 8) == 0); + gcc_assert (strchr (base->named.name + 8, ',')); + + namelen = strchr (base->named.name + 8, ',') - (base->named.name + 8); + sprintf (sname, "*Lsection%.*s", namelen, base->named.name + 8); + darwin_asm_output_dwarf_delta (file, size, lab, sname); +} + +/* Called from the within the TARGET_ASM_FILE_START for each target. */ + +void +darwin_file_start (void) +{ + /* Nothing to do. */ +} + +/* Called for the TARGET_ASM_FILE_END hook. + Emit the mach-o pic indirection data, the lto data and, finally a flag + to tell the linker that it can break the file object into sections and + move those around for efficiency. */ + +void +darwin_file_end (void) +{ + if (!vec_safe_is_empty (ctors)) + finalize_ctors (); + if (!vec_safe_is_empty (dtors)) + finalize_dtors (); + + /* If we are expecting to output NeXT ObjC meta-data, (and we actually see + some) then we output the fix-and-continue marker (Image Info). + This applies to Objective C, Objective C++ and LTO with either language + as part of the input. */ + if (flag_next_runtime && objc_metadata_seen) + { + unsigned int flags = 0; + if (flag_objc_abi >= 2) + { + flags = 16; + output_section_asm_op + (darwin_sections[objc2_image_info_section]->unnamed.data); + } + else + output_section_asm_op + (darwin_sections[objc_image_info_section]->unnamed.data); + + ASM_OUTPUT_ALIGN (asm_out_file, 2); + fputs ("L_OBJC_ImageInfo:\n", asm_out_file); + + flags |= (flag_replace_objc_classes && classes_seen) ? 1 : 0; + flags |= flag_objc_gc ? 2 : 0; + + fprintf (asm_out_file, "\t.long\t0\n\t.long\t%u\n", flags); + } + + machopic_finish (asm_out_file); + if (strcmp (lang_hooks.name, "GNU C++") == 0) + { + switch_to_section (darwin_sections[constructor_section]); + switch_to_section (darwin_sections[destructor_section]); + ASM_OUTPUT_ALIGN (asm_out_file, 1); + } + + /* If there was LTO assembler output, append it to asm_out_file. */ + if (lto_asm_out_name) + { + int n; + char *buf, *lto_asm_txt; + + /* Shouldn't be here if we failed to switch back. */ + gcc_assert (! saved_asm_out_file); + + lto_asm_out_file = fopen (lto_asm_out_name, "r"); + if (lto_asm_out_file == NULL) + fatal_error ("failed to open temporary file %s with LTO output", + lto_asm_out_name); + fseek (lto_asm_out_file, 0, SEEK_END); + n = ftell (lto_asm_out_file); + if (n > 0) + { + fseek (lto_asm_out_file, 0, SEEK_SET); + lto_asm_txt = buf = (char *) xmalloc (n + 1); + while (fgets (lto_asm_txt, n, lto_asm_out_file)) + fputs (lto_asm_txt, asm_out_file); + /* Put a termination label. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_SECTS_SECTION); + fprintf (asm_out_file, "L_GNU_LTO%d:\t;# end of lto\n", + lto_section_num); + /* Make sure our termination label stays in this section. */ + fputs ("\t.space\t1\n", asm_out_file); + } + + /* Remove the temporary file. */ + fclose (lto_asm_out_file); + unlink_if_ordinary (lto_asm_out_name); + free (lto_asm_out_name); + } + + /* Output the names and indices. */ + if (lto_section_names && lto_section_names->length ()) + { + int count; + darwin_lto_section_e *ref; + /* For now, we'll make the offsets 4 bytes and unaligned - we'll fix + the latter up ourselves. */ + const char *op = integer_asm_op (4,0); + + /* Emit the names. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_NAMES_SECTION); + FOR_EACH_VEC_ELT (*lto_section_names, count, ref) + { + fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\n", count); + /* We have to jump through hoops to get the values of the intra-section + offsets... */ + fprintf (asm_out_file, + "\t.set L$gnu$lto$noff%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME0\n", + count, count); + fprintf (asm_out_file, + "\t.set L$gnu$lto$nsiz%d,L_GNU_LTO_NAME%d-L_GNU_LTO_NAME%d\n", + count, count+1, count); + fprintf (asm_out_file, "\t.asciz\t\"%s\"\n", ref->sectname); + } + fprintf (asm_out_file, "L_GNU_LTO_NAME%d:\t;# end\n", lto_section_num); + /* make sure our termination label stays in this section. */ + fputs ("\t.space\t1\n", asm_out_file); + + /* Emit the Index. */ + fprintf (asm_out_file, "\t.section %s,%s,regular,debug\n", + LTO_SEGMENT_NAME, LTO_INDEX_SECTION); + fputs ("\t.align\t2\n", asm_out_file); + fputs ("# Section offset, Section length, Name offset, Name length\n", + asm_out_file); + FOR_EACH_VEC_ELT (*lto_section_names, count, ref) + { + fprintf (asm_out_file, "%s L$gnu$lto$offs%d\t;# %s\n", + op, count, ref->sectname); + fprintf (asm_out_file, "%s L$gnu$lto$size%d\n", op, count); + fprintf (asm_out_file, "%s L$gnu$lto$noff%d\n", op, count); + fprintf (asm_out_file, "%s L$gnu$lto$nsiz%d\n", op, count); + } + } + + /* If we have section anchors, then we must prevent the linker from + re-arranging data. */ + if (!DARWIN_SECTION_ANCHORS || !flag_section_anchors) + fprintf (asm_out_file, "\t.subsections_via_symbols\n"); +} + +/* TODO: Add a language hook for identifying if a decl is a vtable. */ +#define DARWIN_VTABLE_P(DECL) 0 + +/* Cross-module name binding. Darwin does not support overriding + functions at dynamic-link time, except for vtables in kexts. */ + +bool +darwin_binds_local_p (const_tree decl) +{ + return default_binds_local_p_1 (decl, + TARGET_KEXTABI && DARWIN_VTABLE_P (decl)); +} + +/* The Darwin's implementation of TARGET_ASM_OUTPUT_ANCHOR. Define the + anchor relative to ".", the current section position. We cannot use + the default one because ASM_OUTPUT_DEF is wrong for Darwin. */ +void +darwin_asm_output_anchor (rtx symbol) +{ + fprintf (asm_out_file, "\t.set\t"); + assemble_name (asm_out_file, XSTR (symbol, 0)); + fprintf (asm_out_file, ", . + " HOST_WIDE_INT_PRINT_DEC "\n", + SYMBOL_REF_BLOCK_OFFSET (symbol)); +} + +/* Disable section anchoring on any section containing a zero-sized + object. */ +bool +darwin_use_anchors_for_symbol_p (const_rtx symbol) +{ + if (DARWIN_SECTION_ANCHORS && flag_section_anchors) + { + section *sect; + /* If the section contains a zero-sized object it's ineligible. */ + sect = SYMBOL_REF_BLOCK (symbol)->sect; + /* This should have the effect of disabling anchors for vars that follow + any zero-sized one, in a given section. */ + if (sect->common.flags & SECTION_NO_ANCHOR) + return false; + + /* Also check the normal reasons for suppressing. */ + return default_use_anchors_for_symbol_p (symbol); + } + else + return false; +} + +/* Set the darwin specific attributes on TYPE. */ +void +darwin_set_default_type_attributes (tree type) +{ + if (darwin_ms_struct + && TREE_CODE (type) == RECORD_TYPE) + TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("ms_struct"), + NULL_TREE, + TYPE_ATTRIBUTES (type)); +} + +/* True, iff we're generating code for loadable kernel extensions. */ + +bool +darwin_kextabi_p (void) { + return flag_apple_kext; +} + +void +darwin_override_options (void) +{ + /* Keep track of which (major) version we're generating code for. */ + if (darwin_macosx_version_min) + { + if (strverscmp (darwin_macosx_version_min, "10.6") >= 0) + generating_for_darwin_version = 10; + else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0) + generating_for_darwin_version = 9; + + /* Earlier versions are not specifically accounted, until required. */ + } + + /* In principle, this should be c-family only. However, we really need to + set sensible defaults for LTO as well, since the section selection stuff + should check for correctness re. the ABI. TODO: check and provide the + flags (runtime & ABI) from the lto wrapper). */ + + /* Unless set, force ABI=2 for NeXT and m64, 0 otherwise. */ + if (!global_options_set.x_flag_objc_abi) + global_options.x_flag_objc_abi + = (!flag_next_runtime) + ? 0 + : (TARGET_64BIT ? 2 + : (generating_for_darwin_version >= 9) ? 1 + : 0); + + /* Objective-C family ABI 2 is only valid for next/m64 at present. */ + if (global_options_set.x_flag_objc_abi && flag_next_runtime) + { + if (TARGET_64BIT && global_options.x_flag_objc_abi < 2) + error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 must be" + " used for %<-m64%> targets with" + " %<-fnext-runtime%>"); + if (!TARGET_64BIT && global_options.x_flag_objc_abi >= 2) + error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 is not" + " supported on %<-m32%> targets with" + " %<-fnext-runtime%>"); + } + + /* Don't emit DWARF3/4 unless specifically selected. This is a + workaround for tool bugs. */ + if (!global_options_set.x_dwarf_strict) + dwarf_strict = 1; + if (!global_options_set.x_dwarf_version) + dwarf_version = 2; + + /* Do not allow unwind tables to be generated by default for m32. + fnon-call-exceptions will override this, regardless of what we do. */ + if (generating_for_darwin_version < 10 + && !global_options_set.x_flag_asynchronous_unwind_tables + && !TARGET_64BIT) + global_options.x_flag_asynchronous_unwind_tables = 0; + + /* Disable -freorder-blocks-and-partition when unwind tables are being + emitted for Darwin < 9 (OSX 10.5). + The strategy is, "Unless the User has specifically set/unset an unwind + flag we will switch off -freorder-blocks-and-partition when unwind tables + will be generated". If the User specifically sets flags... we assume + (s)he knows why... */ + if (generating_for_darwin_version < 9 + && global_options_set.x_flag_reorder_blocks_and_partition + && ((global_options.x_flag_exceptions /* User, c++, java */ + && !global_options_set.x_flag_exceptions) /* User specified... */ + || (global_options.x_flag_unwind_tables + && !global_options_set.x_flag_unwind_tables) + || (global_options.x_flag_non_call_exceptions + && !global_options_set.x_flag_non_call_exceptions) + || (global_options.x_flag_asynchronous_unwind_tables + && !global_options_set.x_flag_asynchronous_unwind_tables))) + { + inform (input_location, + "-freorder-blocks-and-partition does not work with exceptions " + "on this architecture"); + flag_reorder_blocks_and_partition = 0; + flag_reorder_blocks = 1; + } + + /* FIXME: flag_objc_sjlj_exceptions is no longer needed since there is only + one valid choice of exception scheme for each runtime. */ + if (!global_options_set.x_flag_objc_sjlj_exceptions) + global_options.x_flag_objc_sjlj_exceptions = + flag_next_runtime && !TARGET_64BIT; + + /* FIXME: and this could be eliminated then too. */ + if (!global_options_set.x_flag_exceptions + && flag_objc_exceptions + && TARGET_64BIT) + flag_exceptions = 1; + + if (flag_mkernel || flag_apple_kext) + { + /* -mkernel implies -fapple-kext for C++ */ + if (strcmp (lang_hooks.name, "GNU C++") == 0) + flag_apple_kext = 1; + + flag_no_common = 1; + + /* No EH in kexts. */ + flag_exceptions = 0; + /* No -fnon-call-exceptions data in kexts. */ + flag_non_call_exceptions = 0; + /* so no tables either.. */ + flag_unwind_tables = 0; + flag_asynchronous_unwind_tables = 0; + /* We still need to emit branch islands for kernel context. */ + darwin_emit_branch_islands = true; + } + + if (flag_var_tracking_uninit == 0 + && generating_for_darwin_version >= 9 + && (flag_gtoggle ? (debug_info_level == DINFO_LEVEL_NONE) + : (debug_info_level >= DINFO_LEVEL_NORMAL)) + && write_symbols == DWARF2_DEBUG) + flag_var_tracking_uninit = flag_var_tracking; + + if (MACHO_DYNAMIC_NO_PIC_P) + { + if (flag_pic) + warning_at (UNKNOWN_LOCATION, 0, + "%<-mdynamic-no-pic%> overrides %<-fpic%>, %<-fPIC%>," + " %<-fpie%> or %<-fPIE%>"); + flag_pic = 0; + } + else if (flag_pic == 1) + { + /* Darwin's -fpic is -fPIC. */ + flag_pic = 2; + } + + /* It is assumed that branch island stubs are needed for earlier systems. */ + if (generating_for_darwin_version < 9) + darwin_emit_branch_islands = true; + else + emit_aligned_common = true; /* Later systems can support aligned common. */ + + /* The c_dialect...() macros are not available to us here. */ + darwin_running_cxx = (strstr (lang_hooks.name, "C++") != 0); +} + +#if DARWIN_PPC +/* Add $LDBL128 suffix to long double builtins for ppc darwin. */ + +static void +darwin_patch_builtin (enum built_in_function fncode) +{ + tree fn = builtin_decl_explicit (fncode); + tree sym; + char *newname; + + if (!fn) + return; + + sym = DECL_ASSEMBLER_NAME (fn); + newname = ACONCAT (("_", IDENTIFIER_POINTER (sym), "$LDBL128", NULL)); + + set_user_assembler_name (fn, newname); + + fn = builtin_decl_implicit (fncode); + if (fn) + set_user_assembler_name (fn, newname); +} + +void +darwin_patch_builtins (void) +{ + if (LONG_DOUBLE_TYPE_SIZE != 128) + return; + +#define PATCH_BUILTIN(fncode) darwin_patch_builtin (fncode); +#define PATCH_BUILTIN_NO64(fncode) \ + if (!TARGET_64BIT) \ + darwin_patch_builtin (fncode); +#define PATCH_BUILTIN_VARIADIC(fncode) \ + if (!TARGET_64BIT \ + && (strverscmp (darwin_macosx_version_min, "10.3.9") >= 0)) \ + darwin_patch_builtin (fncode); +#include "darwin-ppc-ldouble-patch.def" +#undef PATCH_BUILTIN +#undef PATCH_BUILTIN_NO64 +#undef PATCH_BUILTIN_VARIADIC +} +#endif + +/* CFStrings implementation. */ +static GTY(()) tree cfstring_class_reference = NULL_TREE; +static GTY(()) tree cfstring_type_node = NULL_TREE; +static GTY(()) tree ccfstring_type_node = NULL_TREE; +static GTY(()) tree pccfstring_type_node = NULL_TREE; +static GTY(()) tree pcint_type_node = NULL_TREE; +static GTY(()) tree pcchar_type_node = NULL_TREE; + +static enum built_in_function darwin_builtin_cfstring; + +/* Store all constructed constant CFStrings in a hash table so that + they get uniqued properly. */ + +typedef struct GTY (()) cfstring_descriptor { + /* The string literal. */ + tree literal; + /* The resulting constant CFString. */ + tree constructor; +} cfstring_descriptor; + +static GTY ((param_is (struct cfstring_descriptor))) htab_t cfstring_htab; + +static hashval_t cfstring_hash (const void *); +static int cfstring_eq (const void *, const void *); + +static tree +add_builtin_field_decl (tree type, const char *name, tree **chain) +{ + tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier (name), type); + + if (*chain != NULL) + **chain = field; + *chain = &DECL_CHAIN (field); + + return field; +} + +tree +darwin_init_cfstring_builtins (unsigned builtin_cfstring) +{ + tree cfsfun, fields, pccfstring_ftype_pcchar; + tree *chain = NULL; + + darwin_builtin_cfstring = + (enum built_in_function) builtin_cfstring; + + /* struct __builtin_CFString { + const int *isa; (will point at + int flags; __CFConstantStringClassReference) + const char *str; + long length; + }; */ + + pcint_type_node = build_pointer_type + (build_qualified_type (integer_type_node, TYPE_QUAL_CONST)); + + pcchar_type_node = build_pointer_type + (build_qualified_type (char_type_node, TYPE_QUAL_CONST)); + + cfstring_type_node = (*lang_hooks.types.make_type) (RECORD_TYPE); + + /* Have to build backwards for finish struct. */ + fields = add_builtin_field_decl (long_integer_type_node, "length", &chain); + add_builtin_field_decl (pcchar_type_node, "str", &chain); + add_builtin_field_decl (integer_type_node, "flags", &chain); + add_builtin_field_decl (pcint_type_node, "isa", &chain); + finish_builtin_struct (cfstring_type_node, "__builtin_CFString", + fields, NULL_TREE); + + /* const struct __builtin_CFstring * + __builtin___CFStringMakeConstantString (const char *); */ + + ccfstring_type_node = build_qualified_type + (cfstring_type_node, TYPE_QUAL_CONST); + pccfstring_type_node = build_pointer_type (ccfstring_type_node); + pccfstring_ftype_pcchar = build_function_type_list + (pccfstring_type_node, pcchar_type_node, NULL_TREE); + + cfsfun = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier ("__builtin___CFStringMakeConstantString"), + pccfstring_ftype_pcchar); + + TREE_PUBLIC (cfsfun) = 1; + DECL_EXTERNAL (cfsfun) = 1; + DECL_ARTIFICIAL (cfsfun) = 1; + /* Make a lang-specific section - dup_lang_specific_decl makes a new node + in place of the existing, which may be NULL. */ + DECL_LANG_SPECIFIC (cfsfun) = NULL; + (*lang_hooks.dup_lang_specific_decl) (cfsfun); + DECL_BUILT_IN_CLASS (cfsfun) = BUILT_IN_MD; + DECL_FUNCTION_CODE (cfsfun) = darwin_builtin_cfstring; + lang_hooks.builtin_function (cfsfun); + + /* extern int __CFConstantStringClassReference[]; */ + cfstring_class_reference = build_decl (BUILTINS_LOCATION, VAR_DECL, + get_identifier ("__CFConstantStringClassReference"), + build_array_type (integer_type_node, NULL_TREE)); + + TREE_PUBLIC (cfstring_class_reference) = 1; + DECL_ARTIFICIAL (cfstring_class_reference) = 1; + (*lang_hooks.decls.pushdecl) (cfstring_class_reference); + DECL_EXTERNAL (cfstring_class_reference) = 1; + rest_of_decl_compilation (cfstring_class_reference, 0, 0); + + /* Initialize the hash table used to hold the constant CFString objects. */ + cfstring_htab = htab_create_ggc (31, cfstring_hash, cfstring_eq, NULL); + + return cfstring_type_node; +} + +tree +darwin_fold_builtin (tree fndecl, int n_args, tree *argp, + bool ARG_UNUSED (ignore)) +{ + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + + if (fcode == darwin_builtin_cfstring) + { + if (!darwin_constant_cfstrings) + { + error ("built-in function %qD requires the" + " %<-mconstant-cfstrings%> flag", fndecl); + return error_mark_node; + } + + if (n_args != 1) + { + error ("built-in function %qD takes one argument only", fndecl); + return error_mark_node; + } + + return darwin_build_constant_cfstring (*argp); + } + + return NULL_TREE; +} + +void +darwin_rename_builtins (void) +{ + /* The system ___divdc3 routine in libSystem on darwin10 is not + accurate to 1ulp, ours is, so we avoid ever using the system name + for this routine and instead install a non-conflicting name that + is accurate. + + When -ffast-math or -funsafe-math-optimizations is given, we can + use the faster version. */ + if (!flag_unsafe_math_optimizations) + { + enum built_in_function dcode + = (enum built_in_function)(BUILT_IN_COMPLEX_DIV_MIN + + DCmode - MIN_MODE_COMPLEX_FLOAT); + tree fn = builtin_decl_explicit (dcode); + /* Fortran and c call TARGET_INIT_BUILTINS and + TARGET_INIT_LIBFUNCS at different times, so we have to put a + call into each to ensure that at least one of them is called + after build_common_builtin_nodes. A better fix is to add a + new hook to run after build_common_builtin_nodes runs. */ + if (fn) + set_user_assembler_name (fn, "___ieee_divdc3"); + fn = builtin_decl_implicit (dcode); + if (fn) + set_user_assembler_name (fn, "___ieee_divdc3"); + } +} + +bool +darwin_libc_has_function (enum function_class fn_class) +{ + if (fn_class == function_sincos) + return false; + if (fn_class == function_c99_math_complex + || fn_class == function_c99_misc) + return (TARGET_64BIT + || strverscmp (darwin_macosx_version_min, "10.3") >= 0); + + return true; +} + +static hashval_t +cfstring_hash (const void *ptr) +{ + tree str = ((const struct cfstring_descriptor *)ptr)->literal; + const unsigned char *p = (const unsigned char *) TREE_STRING_POINTER (str); + int i, len = TREE_STRING_LENGTH (str); + hashval_t h = len; + + for (i = 0; i < len; i++) + h = ((h * 613) + p[i]); + + return h; +} + +static int +cfstring_eq (const void *ptr1, const void *ptr2) +{ + tree str1 = ((const struct cfstring_descriptor *)ptr1)->literal; + tree str2 = ((const struct cfstring_descriptor *)ptr2)->literal; + int len1 = TREE_STRING_LENGTH (str1); + + return (len1 == TREE_STRING_LENGTH (str2) + && !memcmp (TREE_STRING_POINTER (str1), TREE_STRING_POINTER (str2), + len1)); +} + +tree +darwin_build_constant_cfstring (tree str) +{ + struct cfstring_descriptor *desc, key; + void **loc; + tree addr; + + if (!str) + { + error ("CFString literal is missing"); + return error_mark_node; + } + + STRIP_NOPS (str); + + if (TREE_CODE (str) == ADDR_EXPR) + str = TREE_OPERAND (str, 0); + + if (TREE_CODE (str) != STRING_CST) + { + error ("CFString literal expression is not a string constant"); + return error_mark_node; + } + + /* Perhaps we already constructed a constant CFString just like this one? */ + key.literal = str; + loc = htab_find_slot (cfstring_htab, &key, INSERT); + desc = (struct cfstring_descriptor *) *loc; + + if (!desc) + { + tree var, constructor, field; + vec *v = NULL; + int length = TREE_STRING_LENGTH (str) - 1; + + if (darwin_warn_nonportable_cfstrings) + { + const char *s = TREE_STRING_POINTER (str); + int l = 0; + + for (l = 0; l < length; l++) + if (!s[l] || !isascii (s[l])) + { + warning (darwin_warn_nonportable_cfstrings, "%s in CFString literal", + s[l] ? "non-ASCII character" : "embedded NUL"); + break; + } + } + + *loc = desc = ggc_alloc_cleared_cfstring_descriptor (); + desc->literal = str; + + /* isa *. */ + field = TYPE_FIELDS (ccfstring_type_node); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build1 (ADDR_EXPR, TREE_TYPE (field), + cfstring_class_reference)); + /* flags */ + field = DECL_CHAIN (field); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build_int_cst (TREE_TYPE (field), 0x000007c8)); + /* string *. */ + field = DECL_CHAIN (field); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build1 (ADDR_EXPR, TREE_TYPE (field), str)); + /* length */ + field = DECL_CHAIN (field); + CONSTRUCTOR_APPEND_ELT(v, NULL_TREE, + build_int_cst (TREE_TYPE (field), length)); + + constructor = build_constructor (ccfstring_type_node, v); + TREE_READONLY (constructor) = 1; + TREE_CONSTANT (constructor) = 1; + TREE_STATIC (constructor) = 1; + + /* Fromage: The C++ flavor of 'build_unary_op' expects constructor nodes + to have the TREE_HAS_CONSTRUCTOR (...) bit set. However, this file is + being built without any knowledge of C++ tree accessors; hence, we shall + use the generic accessor that TREE_HAS_CONSTRUCTOR actually maps to! */ + if (darwin_running_cxx) + TREE_LANG_FLAG_4 (constructor) = 1; /* TREE_HAS_CONSTRUCTOR */ + + /* Create an anonymous global variable for this CFString. */ + var = build_decl (input_location, CONST_DECL, + NULL, TREE_TYPE (constructor)); + DECL_ARTIFICIAL (var) = 1; + TREE_STATIC (var) = 1; + DECL_INITIAL (var) = constructor; + /* FIXME: This should use a translation_unit_decl to indicate file scope. */ + DECL_CONTEXT (var) = NULL_TREE; + desc->constructor = var; + } + + addr = build1 (ADDR_EXPR, pccfstring_type_node, desc->constructor); + TREE_CONSTANT (addr) = 1; + + return addr; +} + +bool +darwin_cfstring_p (tree str) +{ + struct cfstring_descriptor key; + void **loc; + + if (!str) + return false; + + STRIP_NOPS (str); + + if (TREE_CODE (str) == ADDR_EXPR) + str = TREE_OPERAND (str, 0); + + if (TREE_CODE (str) != STRING_CST) + return false; + + key.literal = str; + loc = htab_find_slot (cfstring_htab, &key, NO_INSERT); + + if (loc) + return true; + + return false; +} + +void +darwin_enter_string_into_cfstring_table (tree str) +{ + struct cfstring_descriptor key; + void **loc; + + key.literal = str; + loc = htab_find_slot (cfstring_htab, &key, INSERT); + + if (!*loc) + { + *loc = ggc_alloc_cleared_cfstring_descriptor (); + ((struct cfstring_descriptor *)*loc)->literal = str; + } +} + +/* Choose named function section based on its frequency. */ + +section * +darwin_function_section (tree decl, enum node_frequency freq, + bool startup, bool exit) +{ + /* Decide if we need to put this in a coalescable section. */ + bool weak = (decl + && DECL_WEAK (decl) + && (!DECL_ATTRIBUTES (decl) + || !lookup_attribute ("weak_import", + DECL_ATTRIBUTES (decl)))); + + /* If there is a specified section name, we should not be trying to + override. */ + if (decl && DECL_SECTION_NAME (decl) != NULL_TREE) + return get_named_section (decl, NULL, 0); + + /* We always put unlikely executed stuff in the cold section. */ + if (freq == NODE_FREQUENCY_UNLIKELY_EXECUTED) + return (weak) ? darwin_sections[text_cold_coal_section] + : darwin_sections[text_cold_section]; + + /* If we have LTO *and* feedback information, then let LTO handle + the function ordering, it makes a better job (for normal, hot, + startup and exit - hence the bailout for cold above). */ + if (in_lto_p && flag_profile_values) + goto default_function_sections; + + /* Non-cold startup code should go to startup subsection. */ + if (startup) + return (weak) ? darwin_sections[text_startup_coal_section] + : darwin_sections[text_startup_section]; + + /* Similarly for exit. */ + if (exit) + return (weak) ? darwin_sections[text_exit_coal_section] + : darwin_sections[text_exit_section]; + + /* Place hot code. */ + if (freq == NODE_FREQUENCY_HOT) + return (weak) ? darwin_sections[text_hot_coal_section] + : darwin_sections[text_hot_section]; + + /* Otherwise, default to the 'normal' non-reordered sections. */ +default_function_sections: + return (weak) ? darwin_sections[text_coal_section] + : text_section; +} + +/* When a function is partitioned between sections, we need to insert a label + at the start of each new chunk - so that it may become a valid 'atom' for + eh and debug purposes. Without this the linker will emit warnings if one + tries to add line location information (since the switched fragment will + be anonymous). */ + +void +darwin_function_switched_text_sections (FILE *fp, tree decl, bool new_is_cold) +{ + char buf[128]; + snprintf (buf, 128, "%s%s",new_is_cold?"__cold_sect_of_":"__hot_sect_of_", + IDENTIFIER_POINTER (DECL_NAME (decl))); + /* Make sure we pick up all the relevant quotes etc. */ + assemble_name_raw (fp, (const char *) buf); + fputs (":\n", fp); +} + +#include "gt-darwin.h" diff --git a/gcc-4.9/gcc/config/darwin.h b/gcc-4.9/gcc/config/darwin.h new file mode 100644 index 000000000..126364099 --- /dev/null +++ b/gcc-4.9/gcc/config/darwin.h @@ -0,0 +1,921 @@ +/* Target definitions for Darwin (Mac OS X) systems. + Copyright (C) 1989-2014 Free Software Foundation, Inc. + Contributed by Apple Computer Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#ifndef CONFIG_DARWIN_H +#define CONFIG_DARWIN_H + +/* The definitions in this file are common to all processor types + running Darwin, which is the kernel for Mac OS X. Darwin is + basically a BSD user layer laid over a Mach kernel, then evolved + for many years (at NeXT) in parallel with other Unix systems. So + while the runtime is a somewhat idiosyncratic Mach-based thing, + other definitions look like they would for a BSD variant. */ + +/* Although NeXT ran on many different architectures, as of Jan 2001 + the only supported Darwin targets are PowerPC and x86. */ + +/* One of Darwin's NeXT legacies is the Mach-O format, which is partly + like a.out and partly like COFF, with additional features like + multi-architecture binary support. */ + +#define DARWIN_X86 0 +#define DARWIN_PPC 0 + +/* Don't assume anything about the header files. */ +#define NO_IMPLICIT_EXTERN_C + +/* Suppress g++ attempt to link in the math library automatically. */ +#define MATH_LIBRARY "" + +/* We have atexit. */ + +#define HAVE_ATEXIT + +/* Define an empty body for the function do_global_dtors() in libgcc2.c. */ + +#define DO_GLOBAL_DTORS_BODY + +/* The string value for __SIZE_TYPE__. */ + +#ifndef SIZE_TYPE +#define SIZE_TYPE "long unsigned int" +#endif + +/* Type used for ptrdiff_t, as a string used in a declaration. */ + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +/* wchar_t is int. */ + +#undef WCHAR_TYPE +#define WCHAR_TYPE "int" +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE 32 + +#define INT8_TYPE "signed char" +#define INT16_TYPE "short int" +#define INT32_TYPE "int" +#define INT64_TYPE "long long int" +#define UINT8_TYPE "unsigned char" +#define UINT16_TYPE "short unsigned int" +#define UINT32_TYPE "unsigned int" +#define UINT64_TYPE "long long unsigned int" + +#define INT_LEAST8_TYPE "signed char" +#define INT_LEAST16_TYPE "short int" +#define INT_LEAST32_TYPE "int" +#define INT_LEAST64_TYPE "long long int" +#define UINT_LEAST8_TYPE "unsigned char" +#define UINT_LEAST16_TYPE "short unsigned int" +#define UINT_LEAST32_TYPE "unsigned int" +#define UINT_LEAST64_TYPE "long long unsigned int" + +#define INT_FAST8_TYPE "signed char" +#define INT_FAST16_TYPE "short int" +#define INT_FAST32_TYPE "int" +#define INT_FAST64_TYPE "long long int" +#define UINT_FAST8_TYPE "unsigned char" +#define UINT_FAST16_TYPE "short unsigned int" +#define UINT_FAST32_TYPE "unsigned int" +#define UINT_FAST64_TYPE "long long unsigned int" + +#define INTPTR_TYPE "long int" +#define UINTPTR_TYPE "long unsigned int" + +#define SIG_ATOMIC_TYPE "int" + +/* Default to using the NeXT-style runtime, since that's what is + pre-installed on Darwin systems. */ + +#define NEXT_OBJC_RUNTIME 1 + +/* Don't default to pcc-struct-return, because gcc is the only compiler, and + we want to retain compatibility with older gcc versions. */ + +#undef DEFAULT_PCC_STRUCT_RETURN +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* True if pragma ms_struct is in effect. */ +extern GTY(()) int darwin_ms_struct; + +#define DRIVER_SELF_SPECS \ + "%{gfull:-g -fno-eliminate-unused-debug-symbols} % 10.5 mmacosx-version-min= -lgcc_s.10.4) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5) \ + %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ + %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \ + -lgcc ; \ + :%:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5) \ + %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ + %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \ + -lgcc }" + +/* We specify crt0.o as -lcrt0.o so that ld will search the library path. + + crt3.o provides __cxa_atexit on systems that don't have it. Since + it's only used with C++, which requires passing -shared-libgcc, key + off that to avoid unnecessarily adding a destructor to every + powerpc program built. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC \ + "%{Zdynamiclib: %(darwin_dylib1) %{fgnu-tm: -lcrttms.o}} \ + %{!Zdynamiclib:%{Zbundle:%{!static: \ + %:version-compare(< 10.6 mmacosx-version-min= -lbundle1.o) \ + %{fgnu-tm: -lcrttms.o}}} \ + %{!Zbundle:%{pg:%{static:-lgcrt0.o} \ + %{!static:%{object:-lgcrt0.o} \ + %{!object:%{preload:-lgcrt0.o} \ + %{!preload:-lgcrt1.o \ + %:version-compare(>= 10.8 mmacosx-version-min= -no_new_main) \ + %(darwin_crt2)}}}} \ + %{!pg:%{static:-lcrt0.o} \ + %{!static:%{object:-lcrt0.o} \ + %{!object:%{preload:-lcrt0.o} \ + %{!preload: %(darwin_crt1) \ + %(darwin_crt2)}}}}}} \ + %{shared-libgcc:%:version-compare(< 10.5 mmacosx-version-min= crt3.o%s)}" + +/* We want a destructor last in the list. */ +#define TM_DESTRUCTOR "%{fgnu-tm: -lcrttme.o}" +#define ENDFILE_SPEC TM_DESTRUCTOR + +#define DARWIN_EXTRA_SPECS \ + { "darwin_crt1", DARWIN_CRT1_SPEC }, \ + { "darwin_dylib1", DARWIN_DYLIB1_SPEC }, \ + { "darwin_minversion", DARWIN_MINVERSION_SPEC }, + +#define DARWIN_DYLIB1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -ldylib1.10.5.o)" + +#define DARWIN_CRT1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lcrt1.10.5.o) \ + %:version-compare(>< 10.6 10.8 mmacosx-version-min= -lcrt1.10.6.o) \ + %{fgnu-tm: -lcrttms.o}" + +/* Default Darwin ASM_SPEC, very simple. */ +#define ASM_SPEC "-arch %(darwin_arch) \ + %{Zforce_cpusubtype_ALL:-force_cpusubtype_ALL} \ + %{static}" + +/* Default ASM_DEBUG_SPEC. Darwin's as cannot currently produce dwarf + debugging data. */ + +#define ASM_DEBUG_SPEC "%{g*:%{!g0:%{!gdwarf*:--gstabs}}}" + +/* We still allow output of STABS. */ + +#define DBX_DEBUGGING_INFO 1 + +#define DWARF2_DEBUGGING_INFO 1 +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG + +#define DEBUG_FRAME_SECTION "__DWARF,__debug_frame,regular,debug" +#define DEBUG_INFO_SECTION "__DWARF,__debug_info,regular,debug" +#define DEBUG_ABBREV_SECTION "__DWARF,__debug_abbrev,regular,debug" +#define DEBUG_ARANGES_SECTION "__DWARF,__debug_aranges,regular,debug" +#define DEBUG_MACINFO_SECTION "__DWARF,__debug_macinfo,regular,debug" +#define DEBUG_LINE_SECTION "__DWARF,__debug_line,regular,debug" +#define DEBUG_LOC_SECTION "__DWARF,__debug_loc,regular,debug" +#define DEBUG_PUBNAMES_SECTION "__DWARF,__debug_pubnames,regular,debug" +#define DEBUG_PUBTYPES_SECTION "__DWARF,__debug_pubtypes,regular,debug" +#define DEBUG_STR_SECTION "__DWARF,__debug_str,regular,debug" +#define DEBUG_RANGES_SECTION "__DWARF,__debug_ranges,regular,debug" +#define DEBUG_MACRO_SECTION "__DWARF,__debug_macro,regular,debug" + +#define TARGET_WANT_DEBUG_PUB_SECTIONS true + +#define TARGET_FORCE_AT_COMP_DIR true + +/* When generating stabs debugging, use N_BINCL entries. */ + +#define DBX_USE_BINCL + +/* There is no limit to the length of stabs strings. */ + +#define DBX_CONTIN_LENGTH 0 + +/* gdb needs a null N_SO at the end of each file for scattered loading. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +/* GCC's definition of 'one_only' is the same as its definition of 'weak'. */ +#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) + +/* Mach-O supports 'weak imports', and 'weak definitions' in coalesced + sections. machopic_select_section ensures that weak variables go in + coalesced sections. Weak aliases (or any other kind of aliases) are + not supported. Weak symbols that aren't visible outside the .s file + are not supported. */ +#define ASM_WEAKEN_DECL(FILE, DECL, NAME, ALIAS) \ + do { \ + if (ALIAS) \ + { \ + warning (0, "alias definitions not supported in Mach-O; ignored"); \ + break; \ + } \ + \ + if (! DECL_EXTERNAL (DECL) && TREE_PUBLIC (DECL)) \ + targetm.asm_out.globalize_label (FILE, NAME); \ + if (DECL_EXTERNAL (DECL)) \ + fputs ("\t.weak_reference ", FILE); \ + else if (lookup_attribute ("weak_import", DECL_ATTRIBUTES (DECL))) \ + break; \ + else if (TREE_PUBLIC (DECL)) \ + fputs ("\t.weak_definition ", FILE); \ + else \ + break; \ + assemble_name (FILE, NAME); \ + fputc ('\n', FILE); \ + } while (0) + +/* Darwin has the pthread routines in libSystem, which every program + links to, so there's no need for weak-ness for that. */ +#define GTHREAD_USE_WEAK 0 + +/* The Darwin linker doesn't want coalesced symbols to appear in + a static archive's table of contents. */ +#undef TARGET_WEAK_NOT_IN_ARCHIVE_TOC +#define TARGET_WEAK_NOT_IN_ARCHIVE_TOC 1 + +/* On Darwin, we don't (at the time of writing) have linkonce sections + with names, so it's safe to make the class data not comdat. */ +#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT hook_bool_void_false + +/* For efficiency, on Darwin the RTTI information that is always + emitted in the standard C++ library should not be COMDAT. */ +#define TARGET_CXX_LIBRARY_RTTI_COMDAT hook_bool_void_false + +/* We make exception information linkonce. */ +#undef TARGET_USES_WEAK_UNWIND_INFO +#define TARGET_USES_WEAK_UNWIND_INFO 1 + +/* We need to use a nonlocal label for the start of an EH frame: the + Darwin linker requires that a coalesced section start with a label. + Unfortunately, it also requires that 'debug' sections don't contain + labels. */ +#undef FRAME_BEGIN_LABEL +#define FRAME_BEGIN_LABEL (for_eh ? "EH_frame" : "Lframe") + +/* Emit a label for the FDE corresponding to DECL. EMPTY means + emit a label for an empty FDE. */ +#define TARGET_ASM_EMIT_UNWIND_LABEL darwin_emit_unwind_label + +/* Emit a label to separate the exception table. */ +#define TARGET_ASM_EMIT_EXCEPT_TABLE_LABEL darwin_emit_except_table_label + +/* Our profiling scheme doesn't LP labels and counter words. */ + +#define NO_PROFILE_COUNTERS 1 + +#undef INIT_SECTION_ASM_OP +#define INIT_SECTION_ASM_OP + +#undef INVOKE__main + +#define TARGET_ASM_CONSTRUCTOR machopic_asm_out_constructor +#define TARGET_ASM_DESTRUCTOR machopic_asm_out_destructor + +/* Always prefix with an underscore. */ + +#define USER_LABEL_PREFIX "_" + +/* A dummy symbol that will be replaced with the function base name. */ +#define MACHOPIC_FUNCTION_BASE_NAME "" + +/* Don't output a .file directive. That is only used by the assembler for + error reporting. */ +#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE false + +#undef TARGET_ASM_FILE_END +#define TARGET_ASM_FILE_END darwin_file_end + +/* Because Mach-O relocations have a counter from 1 to 255 for the + section number they apply to, it is necessary to output all + normal sections before the LTO sections, to make sure that the + sections that may have relocations always have a section number + smaller than 255. */ +#undef TARGET_ASM_LTO_START +#define TARGET_ASM_LTO_START darwin_asm_lto_start +#undef TARGET_ASM_LTO_END +#define TARGET_ASM_LTO_END darwin_asm_lto_end + +#define ASM_OUTPUT_SKIP(FILE,SIZE) \ + fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", SIZE) + +/* Give ObjC methods pretty symbol names. */ + +#undef OBJC_GEN_METHOD_LABEL +#define OBJC_GEN_METHOD_LABEL(BUF,IS_INST,CLASS_NAME,CAT_NAME,SEL_NAME,NUM) \ + do { if (CAT_NAME) \ + sprintf (BUF, "%c[%s(%s) %s]", (IS_INST) ? '-' : '+', \ + (CLASS_NAME), (CAT_NAME), (SEL_NAME)); \ + else \ + sprintf (BUF, "%c[%s %s]", (IS_INST) ? '-' : '+', \ + (CLASS_NAME), (SEL_NAME)); \ + } while (0) + +#undef ASM_DECLARE_OBJECT_NAME +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + darwin_asm_declare_object_name ((FILE), (NAME), (DECL)) + +/* The RTTI data (e.g., __ti4name) is common and public (and static), + but it does need to be referenced via indirect PIC data pointers. + The machopic_define_symbol calls are telling the machopic subsystem + that the name *is* defined in this module, so it doesn't need to + make them indirect. */ + +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do { \ + const char *xname = NAME; \ + if (GET_CODE (XEXP (DECL_RTL (DECL), 0)) != SYMBOL_REF) \ + xname = IDENTIFIER_POINTER (DECL_NAME (DECL)); \ + if (! DECL_WEAK (DECL) \ + && ((TREE_STATIC (DECL) \ + && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL))) \ + || DECL_INITIAL (DECL))) \ + machopic_define_symbol (DECL_RTL (DECL)); \ + if ((TREE_STATIC (DECL) \ + && (!DECL_COMMON (DECL) || !TREE_PUBLIC (DECL))) \ + || DECL_INITIAL (DECL)) \ + (* targetm.encode_section_info) (DECL, DECL_RTL (DECL), false); \ + ASM_OUTPUT_FUNCTION_LABEL (FILE, xname, DECL); \ + } while (0) + +#undef TARGET_ASM_DECLARE_CONSTANT_NAME +#define TARGET_ASM_DECLARE_CONSTANT_NAME darwin_asm_declare_constant_name + +/* Wrap new method names in quotes so the assembler doesn't gag. + Make Objective-C internal symbols local and in doing this, we need + to accommodate the name mangling done by c++ on file scope locals. */ + +int darwin_label_is_anonymous_local_objc_name (const char *name); + +#undef ASM_OUTPUT_LABELREF +#define ASM_OUTPUT_LABELREF(FILE,NAME) \ + do { \ + const char *xname = (NAME); \ + if (! strcmp (xname, MACHOPIC_FUNCTION_BASE_NAME)) \ + machopic_output_function_base_name(FILE); \ + else if (xname[0] == '&' || xname[0] == '*') \ + { \ + int len = strlen (xname); \ + if (len > 6 && !strcmp ("$stub", xname + len - 5)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + else if (len > 7 && !strcmp ("$stub\"", xname + len - 6)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + else if (len > 14 && !strcmp ("$non_lazy_ptr", xname + len - 13)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + else if (len > 15 && !strcmp ("$non_lazy_ptr\"", xname + len - 14)) \ + machopic_validate_stub_or_non_lazy_ptr (xname); \ + if (xname[1] != '"' && name_needs_quotes (&xname[1])) \ + fprintf (FILE, "\"%s\"", &xname[1]); \ + else \ + fputs (&xname[1], FILE); \ + } \ + else if (xname[0] == '+' || xname[0] == '-') \ + fprintf (FILE, "\"%s\"", xname); \ + else if (darwin_label_is_anonymous_local_objc_name (xname)) \ + fprintf (FILE, "L%s", xname); \ + else if (xname[0] != '"' && name_needs_quotes (xname)) \ + asm_fprintf (FILE, "\"%U%s\"", xname); \ + else \ + asm_fprintf (FILE, "%U%s", xname); \ + } while (0) + +/* Output before executable code. */ +#undef TEXT_SECTION_ASM_OP +#define TEXT_SECTION_ASM_OP "\t.text" + +/* Output before writable data. */ + +#undef DATA_SECTION_ASM_OP +#define DATA_SECTION_ASM_OP "\t.data" + +#undef ALIGN_ASM_OP +#define ALIGN_ASM_OP ".align" + +#undef ASM_OUTPUT_ALIGN +#define ASM_OUTPUT_ALIGN(FILE,LOG) \ + if ((LOG) != 0) \ + fprintf (FILE, "\t%s\t%d\n", ALIGN_ASM_OP, (LOG)) + +/* The maximum alignment which the object file format can support in + bits. For Mach-O, this is 2^15 bytes. */ + +#undef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT (0x8000 * 8) + +#define L2_MAX_OFILE_ALIGNMENT 15 + +/* These are the three variants that emit referenced blank space. */ +#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \ + darwin_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) + +#undef ASM_OUTPUT_ALIGNED_DECL_LOCAL +#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ + darwin_asm_output_aligned_decl_local \ + ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) + +#undef ASM_OUTPUT_ALIGNED_DECL_COMMON +#define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ + darwin_asm_output_aligned_decl_common \ + ((FILE), (DECL), (NAME), (SIZE), (ALIGN)) + +/* The generic version, archs should over-ride where required. */ +#define MACHOPIC_NL_SYMBOL_PTR_SECTION ".non_lazy_symbol_pointer" + +/* Declare the section variables. */ +#ifndef USED_FOR_TARGET +enum darwin_section_enum { +#define DEF_SECTION(NAME, FLAGS, DIRECTIVE, OBJC) NAME, +#include "darwin-sections.def" +#undef DEF_SECTION + NUM_DARWIN_SECTIONS +}; +extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS]; +#endif + +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION machopic_select_section + +#undef TARGET_ASM_FUNCTION_SECTION +#define TARGET_ASM_FUNCTION_SECTION darwin_function_section + +#undef TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS +#define TARGET_ASM_FUNCTION_SWITCHED_TEXT_SECTIONS \ + darwin_function_switched_text_sections + +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION machopic_select_rtx_section +#undef TARGET_ASM_UNIQUE_SECTION +#define TARGET_ASM_UNIQUE_SECTION darwin_unique_section +#undef TARGET_ASM_FUNCTION_RODATA_SECTION +#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section + +#undef TARGET_ASM_TM_CLONE_TABLE_SECTION +#define TARGET_ASM_TM_CLONE_TABLE_SECTION darwin_tm_clone_table_section + +#undef TARGET_ASM_RELOC_RW_MASK +#define TARGET_ASM_RELOC_RW_MASK machopic_reloc_rw_mask + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " +#define TARGET_ASM_GLOBALIZE_LABEL darwin_globalize_label + +/* Emit an assembler directive to set visibility for a symbol. Used + to support visibility attribute and Darwin's private extern + feature. */ +#undef TARGET_ASM_ASSEMBLE_VISIBILITY +#define TARGET_ASM_ASSEMBLE_VISIBILITY darwin_assemble_visibility + +/* Extra attributes for Darwin. */ +#define SUBTARGET_ATTRIBUTE_TABLE \ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, \ + affects_type_identity } */ \ + { "apple_kext_compatibility", 0, 0, false, true, false, \ + darwin_handle_kext_attribute, false }, \ + { "weak_import", 0, 0, true, false, false, \ + darwin_handle_weak_import_attribute, false } + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) \ + sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM)) + +#undef TARGET_ASM_MARK_DECL_PRESERVED +#define TARGET_ASM_MARK_DECL_PRESERVED darwin_mark_decl_preserved + +/* Set on a symbol with SYMBOL_FLAG_FUNCTION or + MACHO_SYMBOL_FLAG_VARIABLE to indicate that the function or + variable has been defined in this translation unit. + When porting Mach-O to new architectures you need to make + sure these aren't clobbered by the backend. */ + +#define MACHO_SYMBOL_FLAG_VARIABLE (SYMBOL_FLAG_MACH_DEP) +#define MACHO_SYMBOL_FLAG_DEFINED ((SYMBOL_FLAG_MACH_DEP) << 1) + +/* Set on a symbol to indicate when fix-and-continue style code + generation is being used and the symbol refers to a static symbol + that should be rebound from new instances of a translation unit to + the original instance of the data. */ + +#define MACHO_SYMBOL_STATIC ((SYMBOL_FLAG_MACH_DEP) << 2) + +/* Symbolic names for various things we might know about a symbol. */ + +enum machopic_addr_class { + MACHOPIC_UNDEFINED, + MACHOPIC_DEFINED_DATA, + MACHOPIC_UNDEFINED_DATA, + MACHOPIC_DEFINED_FUNCTION, + MACHOPIC_UNDEFINED_FUNCTION +}; + +/* Macros defining the various PIC cases. */ + +#undef MACHO_DYNAMIC_NO_PIC_P +#define MACHO_DYNAMIC_NO_PIC_P (TARGET_MACHO_DYNAMIC_NO_PIC) +#undef MACHOPIC_INDIRECT +#define MACHOPIC_INDIRECT (flag_pic || MACHO_DYNAMIC_NO_PIC_P) +#define MACHOPIC_JUST_INDIRECT (MACHO_DYNAMIC_NO_PIC_P) +#undef MACHOPIC_PURE +#define MACHOPIC_PURE (flag_pic && ! MACHO_DYNAMIC_NO_PIC_P) + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO darwin_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING default_strip_name_encoding + +#define GEN_BINDER_NAME_FOR_STUB(BUF,STUB,STUB_LENGTH) \ + do { \ + const char *const stub_ = (STUB); \ + char *buffer_ = (BUF); \ + strcpy (buffer_, stub_); \ + if (stub_[0] == '"') \ + { \ + strcpy (buffer_ + (STUB_LENGTH) - 1, "_binder\""); \ + } \ + else \ + { \ + strcpy (buffer_ + (STUB_LENGTH), "_binder"); \ + } \ + } while (0) + +#define GEN_SYMBOL_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH) \ + do { \ + const char *const symbol_ = (SYMBOL); \ + char *buffer_ = (BUF); \ + if (name_needs_quotes (symbol_) && symbol_[0] != '"') \ + { \ + sprintf (buffer_, "\"%s\"", symbol_); \ + } \ + else \ + { \ + strcpy (buffer_, symbol_); \ + } \ + } while (0) + +/* Given a symbol name string, create the lazy pointer version + of the symbol name. */ + +#define GEN_LAZY_PTR_NAME_FOR_SYMBOL(BUF,SYMBOL,SYMBOL_LENGTH) \ + do { \ + const char *symbol_ = (SYMBOL); \ + char *buffer_ = (BUF); \ + if (symbol_[0] == '"') \ + { \ + strcpy (buffer_, "\"L"); \ + strcpy (buffer_ + 2, symbol_ + 1); \ + strcpy (buffer_ + (SYMBOL_LENGTH), "$lazy_ptr\""); \ + } \ + else if (name_needs_quotes (symbol_)) \ + { \ + strcpy (buffer_, "\"L"); \ + strcpy (buffer_ + 2, symbol_); \ + strcpy (buffer_ + (SYMBOL_LENGTH) + 2, "$lazy_ptr\""); \ + } \ + else \ + { \ + strcpy (buffer_, "L"); \ + strcpy (buffer_ + 1, symbol_); \ + strcpy (buffer_ + (SYMBOL_LENGTH) + 1, "$lazy_ptr"); \ + } \ + } while (0) + +#define EH_FRAME_SECTION_NAME "__TEXT" +#define EH_FRAME_SECTION_ATTR ",coalesced,no_toc+strip_static_syms+live_support" + +/* Java runtime class list. */ +#define JCR_SECTION_NAME "__DATA,jcr,regular,no_dead_strip" + +#undef ASM_PREFERRED_EH_DATA_FORMAT +#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ + (((CODE) == 2 && (GLOBAL) == 1) \ + ? (DW_EH_PE_pcrel | DW_EH_PE_indirect | DW_EH_PE_sdata4) : \ + ((CODE) == 1 || (GLOBAL) == 0) ? DW_EH_PE_pcrel : DW_EH_PE_absptr) + +#define ASM_OUTPUT_DWARF_DELTA(FILE,SIZE,LABEL1,LABEL2) \ + darwin_asm_output_dwarf_delta (FILE, SIZE, LABEL1, LABEL2) + +#define ASM_OUTPUT_DWARF_OFFSET(FILE,SIZE,LABEL,BASE) \ + darwin_asm_output_dwarf_offset (FILE, SIZE, LABEL, BASE) + +#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(ASM_OUT_FILE, ENCODING, SIZE, ADDR, DONE) \ + if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) { \ + darwin_non_lazy_pcrel (ASM_OUT_FILE, ADDR); \ + goto DONE; \ + } + +/* Experimentally, putting jump tables in text is faster on SPEC. + Also this is needed for correctness for coalesced functions. */ + +#ifndef JUMP_TABLES_IN_TEXT_SECTION +#define JUMP_TABLES_IN_TEXT_SECTION 1 +#endif + +#define TARGET_TERMINATE_DW2_EH_FRAME_INFO false + +#define TARGET_ASM_INIT_SECTIONS darwin_init_sections +#undef TARGET_ASM_NAMED_SECTION +#define TARGET_ASM_NAMED_SECTION darwin_asm_named_section + +#define DARWIN_REGISTER_TARGET_PRAGMAS() \ + do { \ + if (!flag_preprocess_only) \ + cpp_register_pragma (parse_in, NULL, "mark", \ + darwin_pragma_ignore, false); \ + c_register_pragma (0, "options", darwin_pragma_options); \ + c_register_pragma (0, "segment", darwin_pragma_ignore); \ + c_register_pragma (0, "unused", darwin_pragma_unused); \ + c_register_pragma (0, "ms_struct", darwin_pragma_ms_struct); \ + } while (0) + +#undef ASM_APP_ON +#define ASM_APP_ON "" +#undef ASM_APP_OFF +#define ASM_APP_OFF "" + +void darwin_register_frameworks (const char *, const char *, int); +void darwin_register_objc_includes (const char *, const char *, int); +#define TARGET_EXTRA_PRE_INCLUDES darwin_register_objc_includes +#define TARGET_EXTRA_INCLUDES darwin_register_frameworks + +void add_framework_path (char *); +#define TARGET_OPTF add_framework_path + +#define TARGET_POSIX_IO + +#define WINT_TYPE "int" + +/* Every program on darwin links against libSystem which contains the pthread + routines, so there's no need to explicitly call out when doing threaded + work. */ + +#undef GOMP_SELF_SPECS +#define GOMP_SELF_SPECS "" +#undef GTM_SELF_SPECS +#define GTM_SELF_SPECS "" + +/* Darwin disables section anchors by default. + They should be enabled per arch where support exists in that arch. */ +#define TARGET_ASM_OUTPUT_ANCHOR NULL +#define DARWIN_SECTION_ANCHORS 0 + +#define HAVE_ENABLE_EXECUTE_STACK + +/* For Apple KEXTs, we make the constructors return this to match gcc + 2.95. */ +#define TARGET_CXX_CDTOR_RETURNS_THIS (darwin_kextabi_p) +#define TARGET_KEXTABI flag_apple_kext + +/* We have target-specific builtins. */ +#define SUBTARGET_FOLD_BUILTIN darwin_fold_builtin + +#define TARGET_N_FORMAT_TYPES 1 +#define TARGET_FORMAT_TYPES darwin_additional_format_types + +#ifndef USED_FOR_TARGET +extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **); +#define GCC_DRIVER_HOST_INITIALIZATION \ + darwin_driver_init (&decoded_options_count, &decoded_options) +#endif + +/* The Apple assembler and linker do not support constructor priorities. */ +#undef SUPPORTS_INIT_PRIORITY +#define SUPPORTS_INIT_PRIORITY 0 + +/* When building cross-compilers (and native crosses) we shall default to + providing an osx-version-min of this unless overridden by the User. */ +#define DEF_MIN_OSX_VERSION "10.4" + +#endif /* CONFIG_DARWIN_H */ diff --git a/gcc-4.9/gcc/config/darwin.opt b/gcc-4.9/gcc/config/darwin.opt new file mode 100644 index 000000000..cedfb7a5b --- /dev/null +++ b/gcc-4.9/gcc/config/darwin.opt @@ -0,0 +1,393 @@ +; Processor-independent options for Darwin. + +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +; Various linker options have a -Z added so that they can get to specs +; processing without interference. Note that an option name with a +; prefix that matches another option name, that also takes an +; argument, being mapped to a -Z linker option, needs to be modified +; so the prefix is different, otherwise a '*' after the shorter option +; will match with the longer one. + +all_load +Driver Alias(Zall_load) + +allowable_client +Driver Separate Alias(Zallowable_client) + +arch +Driver RejectNegative Separate + +arch_errors_fatal +Driver Alias(Zarch_errors_fatal) + +bind_at_load +Driver Alias(Zbind_at_load) + +bundle +Driver Alias(Zbundle) + +bundle_loader +Driver Separate Alias(Zbundle_loader) + +dead_strip +Driver Alias(Zdead_strip) + +dependency-file +C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs) + +dylib_file +Driver Separate Alias(Zdylib_file) + +dylinker +Driver + +dynamic +Driver Alias(Zdynamic) + +dynamiclib +Driver Alias(Zdynamiclib) + +exported_symbols_list +Driver Separate Alias(Zexported_symbols_list) + +filelist +Driver RejectNegative Separate + +findirect-virtual-calls +Driver RejectNegative + +flat_namespace +Driver RejectNegative Alias(Zflat_namespace) + +force_cpusubtype_ALL +Driver RejectNegative Alias(Zforce_cpusubtype_ALL) + +force_flat_namespace +Driver RejectNegative Alias(Zforce_flat_namespace) + +framework +Driver RejectNegative Separate + +fterminated-vtables +Driver RejectNegative + +gfull +Driver + +gused +Driver + +headerpad_max_install_names +Driver + +image_base +Driver Separate Alias(Zimage_base) + +init +Driver Separate Alias(Zinit) + +install_name +Driver Separate Alias(Zinstall_name) + +keep_private_externs +Driver + +mconstant-cfstrings +Target Report Var(darwin_constant_cfstrings) Init(1) +Generate compile-time CFString objects + +multi_module +Driver RejectNegative Alias(Zmulti_module) + +multiply_defined +Driver RejectNegative Separate Alias(Zmultiply_defined) + +multiply_defined_unused +Driver RejectNegative Separate Alias(Zmultiplydefinedunused) + +no_dead_strip_inits_and_terms +Driver Alias(Zno_dead_strip_inits_and_terms) + +nofixprebinding +Driver + +nomultidefs +Driver + +noprebind +Driver + +noseglinkedit +Driver + +object +Driver + +prebind +Driver + +prebind_all_twolevel_modules +Driver + +preload +Driver + +private_bundle +Driver + +pthread +Driver + +seg_addr_table +Driver Separate Alias(Zseg_addr_table) + +seg_addr_table_filename +Driver Separate Alias(Zfn_seg_addr_table_filename) + +segaddr +Driver Separate Args(2) Alias(Zsegaddr) + +seglinkedit +Driver + +segs_read_only_addr +Driver Separate Alias(Zsegs_read_only_addr) + +segs_read_write_addr +Driver Separate Alias(Zsegs_read_write_addr) + +single_module +Driver Alias(Zsingle_module) + +twolevel_namespace +Driver + +twolevel_namespace_hints +Driver + +umbrella +Driver Separate Alias(Zumbrella) + +unexported_symbols_list +Driver Separate Alias(Zunexported_symbols_list) + +weak_reference_mismatches +Driver Separate Alias(Zweak_reference_mismatches) + +whatsloaded +Driver + +whyload +Driver + +y +Driver Joined + +Mach +Driver + +Wnonportable-cfstrings +Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning +Warn if constant CFString objects contain non-portable characters + +; Use new-style pic stubs if this is true, x86 only so far. +matt-stubs +Target Report Var(darwin_macho_att_stub) Init(1) +Generate AT&T-style stubs for Mach-O + +mdynamic-no-pic +Target Common Report Mask(MACHO_DYNAMIC_NO_PIC) +Generate code suitable for executables (NOT shared libs) + +mfix-and-continue +Target Report Var(darwin_fix_and_continue) +Generate code suitable for fast turn around debugging + +; The Init here is for the convenience of GCC developers, so that cc1 +; and cc1plus don't crash if no -mmacosx-version-min is passed. The +; driver will always pass a -mmacosx-version-min, so in normal use the +; Init is never used. Useful for setting the OS on which people +; usually debug. +mmacosx-version-min= +Target Joined Report Var(darwin_macosx_version_min) Init("10.6") +The earliest MacOS X version on which this program will run + +mone-byte-bool +Target RejectNegative Report Var(darwin_one_byte_bool) +Set sizeof(bool) to 1 + +fapple-kext +Target Report C++ Var(flag_apple_kext) +Generate code for darwin loadable kernel extensions + +mkernel +Target Report Var(flag_mkernel) +Generate code for the kernel or loadable kernel extensions + +iframework +Target RejectNegative C ObjC C++ ObjC++ Joined Separate +-iframework Add to the end of the system framework include path + +X +Driver + +Zall_load +Driver + +Zallowable_client +Driver Separate + +Zarch_errors_fatal +Driver + +Zbind_at_load +Driver + +Zbundle +Driver + +Zbundle_loader +Driver Separate + +Zdead_strip +Driver + +Zdylib_file +Driver Separate + +Zdynamic +Driver + +Zdynamiclib +Driver + +Zexported_symbols_list +Driver Separate + +Zfn_seg_addr_table_filename +Driver Separate + +Zflat_namespace +Driver + +Zforce_cpusubtype_ALL +Driver + +Zforce_flat_namespace +Driver + +Zimage_base +Driver Separate + +Zinit +Driver Separate + +Zinstall_name +Driver Separate + +Zmulti_module +Driver + +Zmultiply_defined +Driver Separate + +Zmultiplydefinedunused +Driver Separate + +Zno_dead_strip_inits_and_terms +Driver + +Zseg_addr_table +Driver Separate + +Zsegaddr +Driver Separate Args(2) + +Zsegs_read_only_addr +Driver Separate + +Zsegs_read_write_addr +Driver Separate + +Zsingle_module +Driver + +Zumbrella +Driver Separate + +Zunexported_symbols_list +Driver Separate + +Zweak_reference_mismatches +Driver Separate + +client_name +Driver Separate + +compatibility_version +Driver Separate + +current_version +Driver Separate + +dylinker_install_name +Driver Separate + +pagezero_size +Driver Separate + +read_only_relocs +Driver Separate + +sectalign +Driver Separate Args(3) + +sectcreate +Driver Separate Args(3) + +sectobjectsymbols +Driver Separate Args(2) + +sectorder +Driver Separate Args(3) + +seg1addr +Driver Separate + +segcreate +Driver Separate Args(3) + +segprot +Driver Separate Args(3) + +segs_read_only_addr +Driver Separate + +segs_read_write_addr +Driver Separate + +sub_library +Driver Separate + +sub_umbrella +Driver Separate + +undefined +Driver Separate diff --git a/gcc-4.9/gcc/config/darwin10.h b/gcc-4.9/gcc/config/darwin10.h new file mode 100644 index 000000000..9eb60dbfb --- /dev/null +++ b/gcc-4.9/gcc/config/darwin10.h @@ -0,0 +1,34 @@ +/* Target definitions for Darwin (Mac OS X) systems. + Copyright (C) 2009-2014 Free Software Foundation, Inc. + Contributed by Jack Howarth . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Fix PR41260 by passing -no_compact_unwind on darwin10 and later until + unwinder in libSystem is fixed to digest new epilog unwinding notes. + + Fix PR47558 by linking against libSystem ahead of libgcc_ext. */ +#undef LINK_GCC_C_SEQUENCE_SPEC +#define LINK_GCC_C_SEQUENCE_SPEC \ +"%:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) \ + %{!static:%{!static-libgcc: \ + %:version-compare(>= 10.6 mmacosx-version-min= -lSystem) } } \ + %{fno-pic|fno-PIC|fno-pie|fno-PIE|fapple-kext|mkernel|static|mdynamic-no-pic: \ + %:version-compare(>= 10.7 mmacosx-version-min= -no_pie) } %G %L" + +#undef DEF_MIN_OSX_VERSION +#define DEF_MIN_OSX_VERSION "10.6" diff --git a/gcc-4.9/gcc/config/darwin9.h b/gcc-4.9/gcc/config/darwin9.h new file mode 100644 index 000000000..675001cee --- /dev/null +++ b/gcc-4.9/gcc/config/darwin9.h @@ -0,0 +1,65 @@ +/* Target definitions for Darwin (Mac OS X) systems. + Copyright (C) 2006-2014 Free Software Foundation, Inc. + Contributed by Apple Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* Prefer DWARF2. */ +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +#define DARWIN_PREFER_DWARF + +/* Since DWARF2 is default, conditions for running dsymutil are different. */ +#undef DSYMUTIL_SPEC +#define DSYMUTIL_SPEC \ + "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\ + %{v} \ + %{g*:%{!gstabs*:%{!g0: -idsym}}}\ + %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.f|.f90|.f95|.f03|.f77|.for|.F|.F90|.F95|.F03: \ + %{g*:%{!gstabs*:%{!g0: -dsym}}}}}}}}}}}" + +/* Tell collect2 to run dsymutil for us as necessary. */ +#define COLLECT_RUN_DSYMUTIL 1 + +#undef PIE_SPEC +#define PIE_SPEC \ + "%{fpie|pie|fPIE: \ + %{mdynamic-no-pic: %n'-mdynamic-no-pic' overrides '-pie', '-fpie' or '-fPIE'; \ + :-pie}}" + +/* Only ask as for debug data if the debug style is stabs (since as doesn't + yet generate dwarf.) */ + +#undef ASM_DEBUG_SPEC +#define ASM_DEBUG_SPEC "%{g*:%{!g0:%{gstabs:--gstabs}}}" + +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + do { \ + unsigned HOST_WIDE_INT _new_size = (SIZE); \ + fprintf ((FILE), "\t.comm "); \ + assemble_name ((FILE), (NAME)); \ + if (_new_size == 0) _new_size = 1; \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ + _new_size, floor_log2 ((ALIGN) / BITS_PER_UNIT)); \ + } while (0) + +#undef DEF_MIN_OSX_VERSION +#define DEF_MIN_OSX_VERSION "10.5" + +#undef STACK_CHECK_STATIC_BUILTIN +#define STACK_CHECK_STATIC_BUILTIN 1 diff --git a/gcc-4.9/gcc/config/dbx.h b/gcc-4.9/gcc/config/dbx.h new file mode 100644 index 000000000..1b68bcd9a --- /dev/null +++ b/gcc-4.9/gcc/config/dbx.h @@ -0,0 +1,27 @@ +/* Prefer DBX (stabs) debugging information. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file causes gcc to prefer using DBX (stabs) debugging + information. The configure script will add a #include of this file + to tm.h when --with-stabs is used for certain targets. */ + +#define DBX_DEBUGGING_INFO 1 + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG diff --git a/gcc-4.9/gcc/config/dbxcoff.h b/gcc-4.9/gcc/config/dbxcoff.h new file mode 100644 index 000000000..c6a62c475 --- /dev/null +++ b/gcc-4.9/gcc/config/dbxcoff.h @@ -0,0 +1,62 @@ +/* Definitions needed when using stabs embedded in COFF sections. + Copyright (C) 1996-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file may be included by any COFF target which wishes to + support -gstabs generating stabs in sections, as produced by gas + and understood by gdb. */ + +/* Output DBX (stabs) debugging information if doing -gstabs. */ + +#define DBX_DEBUGGING_INFO 1 + +/* Generate SDB debugging information by default. */ + +#ifndef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE SDB_DEBUG +#endif + +/* Be function-relative for block and source line stab directives. */ + +#define DBX_BLOCKS_FUNCTION_RELATIVE 1 + +/* but, to make this work, functions must appear prior to line info. */ + +#define DBX_FUNCTION_FIRST + +/* Generate a blank trailing N_SO to mark the end of the .o file, since + we can't depend upon the linker to mark .o file boundaries with + embedded stabs. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +/* Like block addresses, stabs line numbers are relative to the + current function. */ + +#define DBX_LINES_FUNCTION_RELATIVE 1 + +/* When generating stabs debugging, use N_BINCL entries. */ + +#undef DBX_USE_BINCL +#define DBX_USE_BINCL + +/* There is no limit to the length of stabs strings. */ + +#ifndef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 0 +#endif diff --git a/gcc-4.9/gcc/config/dbxelf.h b/gcc-4.9/gcc/config/dbxelf.h new file mode 100644 index 000000000..4819cfa79 --- /dev/null +++ b/gcc-4.9/gcc/config/dbxelf.h @@ -0,0 +1,68 @@ +/* Definitions needed when using stabs embedded in ELF sections. + Copyright (C) 1999-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This file may be included by any ELF target which wishes to + support -gstabs generating stabs in sections, as produced by gas + and understood by gdb. */ + +#ifndef GCC_DBX_ELF_H +#define GCC_DBX_ELF_H + +/* Output DBX (stabs) debugging information if doing -gstabs. */ + +#define DBX_DEBUGGING_INFO 1 + +/* Make LBRAC and RBRAC addresses relative to the start of the + function. The native Solaris stabs debugging format works this + way, gdb expects it, and it reduces the number of relocation + entries... */ + +#define DBX_BLOCKS_FUNCTION_RELATIVE 1 + +/* ... but, to make this work, functions must appear prior to line info. */ + +#define DBX_FUNCTION_FIRST + +/* When generating stabs debugging, use N_BINCL entries. */ + +#define DBX_USE_BINCL + +/* There is no limit to the length of stabs strings. */ + +#ifndef DBX_CONTIN_LENGTH +#define DBX_CONTIN_LENGTH 0 +#endif + +/* Like block addresses, stabs line numbers are relative to the + current function. */ + +#define DBX_LINES_FUNCTION_RELATIVE 1 + +/* Generate a blank trailing N_SO to mark the end of the .o file, since + we can't depend upon the linker to mark .o file boundaries with + embedded stabs. */ + +#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END + +#endif /* ! GCC_DBX_ELF_H */ diff --git a/gcc-4.9/gcc/config/default-c.c b/gcc-4.9/gcc/config/default-c.c new file mode 100644 index 000000000..26c41f409 --- /dev/null +++ b/gcc-4.9/gcc/config/default-c.c @@ -0,0 +1,33 @@ +/* Default C-family target hooks initializer. + Copyright (C) 2011-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "c-family/c-target.h" +#include "c-family/c-target-def.h" + +/* Do not include tm.h or tm_p.h here; if it is useful for a target to + define some macros for the initializer in a header without defining + targetcm itself (for example, because of interactions with some + hooks depending on the target OS and others on the target + architecture), create a separate tm_c.h for only the relevant + definitions. */ + +struct gcc_targetcm targetcm = TARGETCM_INITIALIZER; diff --git a/gcc-4.9/gcc/config/elfos.h b/gcc-4.9/gcc/config/elfos.h new file mode 100644 index 000000000..1fce7011b --- /dev/null +++ b/gcc-4.9/gcc/config/elfos.h @@ -0,0 +1,438 @@ +/* elfos.h -- operating system specific defines to be used when + targeting GCC for some generic ELF system + Copyright (C) 1991-2014 Free Software Foundation, Inc. + Based on svr4.h contributed by Ron Guilmette (rfg@netcom.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define TARGET_OBJFMT_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__ELF__"); \ + } \ + while (0) + +/* Define a symbol indicating that we are using elfos.h. + Some CPU specific configuration files use this. */ +#define USING_ELFOS_H + +/* The prefix to add to user-visible assembler symbols. + + For ELF systems the convention is *not* to prepend a leading + underscore onto user-level symbol names. */ + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "" + +/* The biggest alignment supported by ELF in bits. 32-bit ELF + supports section alignment up to (0x80000000 * 8), while + 64-bit ELF supports (0x8000000000000000 * 8). If this macro + is not defined, the default is the largest alignment supported + by 32-bit ELF and representable on a 32-bit host. Use this + macro to limit the alignment which can be specified using + the `__attribute__ ((aligned (N)))' construct. */ +#ifndef MAX_OFILE_ALIGNMENT +#define MAX_OFILE_ALIGNMENT (((unsigned int) 1 << 28) * 8) +#endif + +/* Use periods rather than dollar signs in special g++ assembler names. */ + +#define NO_DOLLAR_IN_LABEL + +/* Writing `int' for a bit-field forces int alignment for the structure. */ + +#ifndef PCC_BITFIELD_TYPE_MATTERS +#define PCC_BITFIELD_TYPE_MATTERS 1 +#endif + +/* All ELF targets can support DWARF-2. */ + +#define DWARF2_DEBUGGING_INFO 1 + +/* The GNU tools operate better with dwarf2, and it is required by some + psABI's. Since we don't have any native tools to be compatible with, + default to dwarf2. */ + +#ifndef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG +#endif + +/* All SVR4 targets use the ELF object file format. */ +#define OBJECT_FORMAT_ELF + + +/* Output #ident as a .ident. */ + +#undef TARGET_ASM_OUTPUT_IDENT +#define TARGET_ASM_OUTPUT_IDENT default_asm_output_ident_directive + +#undef SET_ASM_OP +#define SET_ASM_OP "\t.set\t" + +/* Most svr4 assemblers want a .file directive at the beginning of + their input file. */ +#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true + +/* This is how to allocate empty space in some section. The .zero + pseudo-op is used for this on most svr4 assemblers. */ + +#define SKIP_ASM_OP "\t.zero\t" + +#undef ASM_OUTPUT_SKIP +#define ASM_OUTPUT_SKIP(FILE, SIZE) \ + fprintf ((FILE), "%s"HOST_WIDE_INT_PRINT_UNSIGNED"\n",\ + SKIP_ASM_OP, (SIZE)) + +/* This is how to store into the string LABEL + the symbol_ref name of an internal numbered label where + PREFIX is the class of label and NUM is the number within the class. + This is suitable for output with `assemble_name'. + + For most svr4 systems, the convention is that any symbol which begins + with a period is not put into the linker symbol table by the assembler. */ + +#undef ASM_GENERATE_INTERNAL_LABEL +#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ + do \ + { \ + char *__p; \ + (LABEL)[0] = '*'; \ + (LABEL)[1] = '.'; \ + __p = stpcpy (&(LABEL)[2], PREFIX); \ + sprint_ul (__p, (unsigned long) (NUM)); \ + } \ + while (0) + +/* Output the label which precedes a jumptable. Note that for all svr4 + systems where we actually generate jumptables (which is to say every + svr4 target except i386, where we use casesi instead) we put the jump- + tables into the .rodata section and since other stuff could have been + put into the .rodata section prior to any given jumptable, we have to + make sure that the location counter for the .rodata section gets pro- + perly re-aligned prior to the actual beginning of the jump table. */ + +#undef ALIGN_ASM_OP +#define ALIGN_ASM_OP "\t.align\t" + +#ifndef ASM_OUTPUT_BEFORE_CASE_LABEL +#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \ + ASM_OUTPUT_ALIGN ((FILE), 2); +#endif + +#undef ASM_OUTPUT_CASE_LABEL +#define ASM_OUTPUT_CASE_LABEL(FILE, PREFIX, NUM, JUMPTABLE) \ + do \ + { \ + ASM_OUTPUT_BEFORE_CASE_LABEL (FILE, PREFIX, NUM, JUMPTABLE) \ + (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); \ + } \ + while (0) + +/* The standard SVR4 assembler seems to require that certain builtin + library routines (e.g. .udiv) be explicitly declared as .globl + in each assembly file where they are referenced. */ + +#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \ + (*targetm.asm_out.globalize_label) (FILE, XSTR (FUN, 0)) + +/* This says how to output assembler code to declare an + uninitialized external linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#define COMMON_ASM_OP "\t.comm\t" + +#undef ASM_OUTPUT_ALIGNED_COMMON +#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + fprintf ((FILE), "%s", COMMON_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", \ + (SIZE), (ALIGN) / BITS_PER_UNIT); \ + } \ + while (0) + +/* This says how to output assembler code to declare an + uninitialized internal linkage data object. Under SVR4, + the linker seems to want the alignment of data objects + to depend on their types. We do exactly that here. */ + +#define LOCAL_ASM_OP "\t.local\t" + +#undef ASM_OUTPUT_ALIGNED_LOCAL +#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN) \ + do \ + { \ + fprintf ((FILE), "%s", LOCAL_ASM_OP); \ + assemble_name ((FILE), (NAME)); \ + fprintf ((FILE), "\n"); \ + ASM_OUTPUT_ALIGNED_COMMON (FILE, NAME, SIZE, ALIGN); \ + } \ + while (0) + +/* This is the pseudo-op used to generate a contiguous sequence of byte + values from a double-quoted string WITHOUT HAVING A TERMINATING NUL + AUTOMATICALLY APPENDED. This is the same for most svr4 assemblers. */ + +#undef ASCII_DATA_ASM_OP +#define ASCII_DATA_ASM_OP "\t.ascii\t" + +/* Support a read-only data section. */ +#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rodata" + +/* On svr4, we *do* have support for the .init and .fini sections, and we + can put stuff in there to be executed before and after `main'. We let + crtstuff.c and other files know this by defining the following symbols. + The definitions say how to change sections to the .init and .fini + sections. This is the same for all known svr4 assemblers. */ + +#define INIT_SECTION_ASM_OP "\t.section\t.init" +#define FINI_SECTION_ASM_OP "\t.section\t.fini" + +/* Output assembly directive to move to the beginning of current section. */ +#ifdef HAVE_GAS_SUBSECTION_ORDERING +# define ASM_SECTION_START_OP "\t.subsection\t-1" +# define ASM_OUTPUT_SECTION_START(FILE) \ + fprintf ((FILE), "%s\n", ASM_SECTION_START_OP) +#endif + +#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1) + +/* Switch into a generic section. */ +#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section + +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION default_elf_select_rtx_section +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION default_elf_select_section +#undef TARGET_HAVE_SWITCHABLE_BSS_SECTIONS +#define TARGET_HAVE_SWITCHABLE_BSS_SECTIONS true + +/* Define the strings used for the special svr4 .type and .size directives. + These strings generally do not vary from one system running svr4 to + another, but if a given system (e.g. m88k running svr) needs to use + different pseudo-op names for these, they may be overridden in the + file which includes this one. */ + +#define TYPE_ASM_OP "\t.type\t" +#define SIZE_ASM_OP "\t.size\t" + +/* This is how we tell the assembler that a symbol is weak. */ + +#define ASM_WEAKEN_LABEL(FILE, NAME) \ + do \ + { \ + fputs ("\t.weak\t", (FILE)); \ + assemble_name ((FILE), (NAME)); \ + fputc ('\n', (FILE)); \ + } \ + while (0) + +/* The following macro defines the format used to output the second + operand of the .type assembler directive. Different svr4 assemblers + expect various different forms for this operand. The one given here + is just a default. You may need to override it in your machine- + specific tm.h file (depending upon the particulars of your assembler). */ + +#define TYPE_OPERAND_FMT "@%s" + +/* Write the extra assembler code needed to declare a function's result. + Most svr4 assemblers don't require any special declaration of the + result value, but there are exceptions. */ + +#ifndef ASM_DECLARE_RESULT +#define ASM_DECLARE_RESULT(FILE, RESULT) +#endif + +/* These macros generate the special .type and .size directives which + are used to set the corresponding fields of the linker symbol table + entries in an ELF object file under SVR4. These macros also output + the starting labels for the relevant functions/objects. */ + +/* Write the extra assembler code needed to declare a function properly. + Some svr4 assemblers need to also have something extra said about the + function's return value. We allow for that here. */ + +#ifndef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + do \ + { \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ + ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ + ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL); \ + } \ + while (0) +#endif + +/* Write the extra assembler code needed to declare an object properly. */ + +#ifdef HAVE_GAS_GNU_UNIQUE_OBJECT +#define USE_GNU_UNIQUE_OBJECT 1 +#else +#define USE_GNU_UNIQUE_OBJECT 0 +#endif + +#define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ + do \ + { \ + HOST_WIDE_INT size; \ + \ + /* For template static data member instantiations or \ + inline fn local statics and their guard variables, use \ + gnu_unique_object so that they will be combined even under \ + RTLD_LOCAL. Don't use gnu_unique_object for typeinfo, \ + vtables and other read-only artificial decls. */ \ + if (USE_GNU_UNIQUE_OBJECT && DECL_ONE_ONLY (DECL) \ + && (!DECL_ARTIFICIAL (DECL) || !TREE_READONLY (DECL))) \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "gnu_unique_object"); \ + else \ + ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object"); \ + \ + size_directive_output = 0; \ + if (!flag_inhibit_size_directive \ + && (DECL) && DECL_SIZE (DECL)) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size); \ + } \ + \ + ASM_OUTPUT_LABEL (FILE, NAME); \ + } \ + while (0) + +/* Output the size directive for a decl in rest_of_decl_compilation + in the case where we did not do so before the initializer. + Once we find the error_mark_node, we know that the value of + size_directive_output was set + by ASM_DECLARE_OBJECT_NAME when it was run for the same decl. */ + +#undef ASM_FINISH_DECLARE_OBJECT +#define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)\ + do \ + { \ + const char *name = XSTR (XEXP (DECL_RTL (DECL), 0), 0); \ + HOST_WIDE_INT size; \ + \ + if (!flag_inhibit_size_directive \ + && DECL_SIZE (DECL) \ + && ! AT_END && TOP_LEVEL \ + && DECL_INITIAL (DECL) == error_mark_node \ + && !size_directive_output) \ + { \ + size_directive_output = 1; \ + size = int_size_in_bytes (TREE_TYPE (DECL)); \ + ASM_OUTPUT_SIZE_DIRECTIVE (FILE, name, size); \ + } \ + } \ + while (0) + +/* This is how to declare the size of a function. */ +#ifndef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + if (!flag_inhibit_size_directive) \ + ASM_OUTPUT_MEASURED_SIZE (FILE, FNAME); \ + } \ + while (0) +#endif + +/* A table of bytes codes used by the ASM_OUTPUT_ASCII and + ASM_OUTPUT_LIMITED_STRING macros. Each byte in the table + corresponds to a particular byte value [0..255]. For any + given byte value, if the value in the corresponding table + position is zero, the given character can be output directly. + If the table value is 1, the byte must be output as a \ooo + octal escape. If the tables value is anything else, then the + byte value should be output as a \ followed by the value + in the table. Note that we can use standard UN*X escape + sequences for many control characters, but we don't use + \a to represent BEL because some svr4 assemblers (e.g. on + the i386) don't know about that. Also, we don't use \v + since some versions of gas, such as 2.2 did not accept it. */ + +#define ELF_ASCII_ESCAPES \ +"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\ +\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" + +/* Some svr4 assemblers have a limit on the number of characters which + can appear in the operand of a .string directive. If your assembler + has such a limitation, you should define STRING_LIMIT to reflect that + limit. Note that at least some svr4 assemblers have a limit on the + actual number of bytes in the double-quoted string, and that they + count each character in an escape sequence as one byte. Thus, an + escape sequence like \377 would count as four bytes. + + If your target assembler doesn't support the .string directive, you + should define this to zero. +*/ + +#define ELF_STRING_LIMIT ((unsigned) 256) + +#define STRING_ASM_OP "\t.string\t" + +/* The routine used to output NUL terminated strings. We use a special + version of this for most svr4 targets because doing so makes the + generated assembly code more compact (and thus faster to assemble) + as well as more readable, especially for targets like the i386 + (where the only alternative is to output character sequences as + comma separated lists of numbers). */ + +#define ASM_OUTPUT_LIMITED_STRING(FILE, STR) \ + default_elf_asm_output_limited_string ((FILE), (STR)) + +/* The routine used to output sequences of byte values. We use a special + version of this for most svr4 targets because doing so makes the + generated assembly code more compact (and thus faster to assemble) + as well as more readable. Note that if we find subparts of the + character sequence which end with NUL (and which are shorter than + STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING. */ + +#undef ASM_OUTPUT_ASCII +#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ + default_elf_asm_output_ascii ((FILE), (STR), (LENGTH)); + +/* Allow the use of the -frecord-gcc-switches switch via the + elf_record_gcc_switches function defined in varasm.c. */ +#undef TARGET_ASM_RECORD_GCC_SWITCHES +#define TARGET_ASM_RECORD_GCC_SWITCHES elf_record_gcc_switches + +/* A C statement (sans semicolon) to output to the stdio stream STREAM + any text necessary for declaring the name of an external symbol + named NAME which is referenced in this compilation but not defined. + It is needed to properly support non-default visibility. */ + +#ifndef ASM_OUTPUT_EXTERNAL +#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \ + default_elf_asm_output_external (FILE, DECL, NAME) +#endif + +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function diff --git a/gcc-4.9/gcc/config/epiphany/constraints.md b/gcc-4.9/gcc/config/epiphany/constraints.md new file mode 100644 index 000000000..1c463e531 --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/constraints.md @@ -0,0 +1,130 @@ +;; Constraint definitions for Adaptiva epiphany +;; Copyright (C) 2007-2014 Free Software Foundation, Inc. +;; Contributed by Embecosm on behalf of Adapteva, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Integer constraints + +(define_constraint "U16" + "An unsigned 16-bit constant." + (ior (and (match_code "const_int") + (match_test "IMM16 (ival)")) + (and (match_code "symbol_ref,label_ref,const") + (match_test "epiphany_small16 (op)")))) + +(define_constraint "K" + "An unsigned 5-bit constant." + (and (match_code "const_int") + (match_test "IMM5 (ival)"))) + +;; This could also accept symbol_ref, label_ref or const if we introduce +;; a small area and/or attribute that satisfies the 11-bit signed range. +(define_constraint "L" + "A signed 11-bit constant." + (and (match_code "const_int") + (match_test "SIMM11 (ival)"))) + +(define_constraint "CnL" + "A negated signed 11-bit constant." + (and (match_code "const_int") + (match_test "SIMM11 (-ival)"))) + +(define_constraint "Cm1" + "A signed 11-bit constant added to -1" + (and (match_code "const_int") + (match_test "SIMM11 (ival+1)") + (match_test "epiphany_m1reg >= 0"))) + +(define_constraint "Cl1" + "Left-shift of -1" + (and (match_code "const_int") + (match_test "ival == (ival | ~(ival-1))") + (match_test "epiphany_m1reg >= 0"))) + +(define_constraint "Cr1" + "Right-shift of -1" + (and (match_code "const_int") + (match_test "ival == (ival & ~(ival+1))") + (match_test "epiphany_m1reg >= 0"))) + +(define_constraint "Cal" + "Constant for arithmetic/logical operations" + (match_test "(flag_pic + ? nonsymbolic_immediate_operand (op, VOIDmode) + : immediate_operand (op, VOIDmode))")) + +(define_constraint "Csy" + "Symbolic constant for call/jump instruction" + (match_test "symbolic_operand (op, VOIDmode)")) + +;; Register constraints +;; proper register constraints define a register class and can thus +;; drive register allocation and reload. OTOH sometimes we want to +;; avoid just that. + +;; The register class usable in short insns. +;; Subject to TARGET_PREFER_SHORT_INSN_REGS. +(define_register_constraint "Rcs" "SHORT_INSN_REGS" + "short insn register class.") + +; The registers that can be used to hold a sibcall call address. +; This must not conflict with any callee-saved registers. +(define_register_constraint "Rsc" "SIBCALL_REGS" + "sibcall register class") + +; The registers that can be used to hold a status value +(define_register_constraint "Rct" "CORE_CONTROL_REGS" + "Core control register class") + +;; The register group usable in short insns. +(define_constraint "Rgs" + "short insn register group." + (and (match_code "reg") + (match_test "REGNO (op) >= FIRST_PSEUDO_REGISTER || REGNO (op) <= 7"))) + +;; Constant suitable for the addsi3_r pattern. +(define_constraint "Car" + "addsi3_r constant." + (and (match_code "const_int") + (ior (match_test "RTX_OK_FOR_OFFSET_P (SImode, op)") + (match_test "RTX_OK_FOR_OFFSET_P (HImode, op)") + (match_test "RTX_OK_FOR_OFFSET_P (QImode, op)")))) + +;; The return address if it can be replaced with GPR_LR. +(define_constraint "Rra" + "return address constraint - register variant" + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_RETURN_ADDR") + (match_test "!MACHINE_FUNCTION (cfun)->lr_clobbered"))) + +(define_constraint "Rcc" + "integer condition code" + (and (match_code "reg") + (match_test "REGNO (op) == CC_REGNUM"))) + +;; The return address, which might be a stack slot. */ +(define_constraint "Sra" + "return address constraint - memory variant" + (and (match_code "unspec") + (match_test "XINT (op, 1) == UNSPEC_RETURN_ADDR"))) + +(define_constraint "Cfm" + "control register values to switch fp mode" + (and (match_code "const") + (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC") + (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_FP_MODE"))) diff --git a/gcc-4.9/gcc/config/epiphany/epiphany-modes.def b/gcc-4.9/gcc/config/epiphany/epiphany-modes.def new file mode 100644 index 000000000..2a31d6e1c --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany-modes.def @@ -0,0 +1,40 @@ +/* Definitions of target machine for GNU compiler, Adapteva Epiphany cpu. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +CC_MODE (CC_Z); /* only Z valid - for add, testing result. */ +CC_MODE (CC_N_NE); /* N for not-equal (for lsl). */ +CC_MODE (CC_C_LTU); /* C for unsigned-less-than (for add with carry). */ +CC_MODE (CC_C_GTU); /* C for unsigned-greater-than (for sub with carry). */ +CC_MODE (CC_FP); +CC_MODE (CC_FP_EQ); /* AZ for equal. */ +CC_MODE (CC_FP_ORD); /* AZ || ~AC for ordered. */ +CC_MODE (CC_FP_UNEQ); /* AZ || ~AC for unordered / equal. */ +CC_MODE (CC_FP_GTE); /* ~AC / AZ for greater than / equal. */ +#if 0 /* This would be needed for simplified NaN testing. */ +RESET_FLOAT_FORMAT (SF, motorola_single_format); +RESET_FLOAT_FORMAT (DF, motorola_double_format); +#endif +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ +ADJUST_ALIGNMENT (V8QI, epiphany_vect_align); +ADJUST_ALIGNMENT (V4HI, epiphany_vect_align); +ADJUST_ALIGNMENT (V2SI, epiphany_vect_align); +ADJUST_ALIGNMENT (V2SF, epiphany_vect_align); diff --git a/gcc-4.9/gcc/config/epiphany/epiphany-protos.h b/gcc-4.9/gcc/config/epiphany/epiphany-protos.h new file mode 100644 index 000000000..9121e0c3c --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany-protos.h @@ -0,0 +1,64 @@ +/* Definitions of target machine for GNU compiler, EPIPHANY cpu. + Copyright (C) 2000-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifdef RTX_CODE +extern enum machine_mode epiphany_select_cc_mode (enum rtx_code, rtx, rtx); + +/* Define the function that build the compare insn for scc and bcc. */ +extern struct rtx_def *gen_compare_reg (enum machine_mode, enum rtx_code, + enum machine_mode, rtx, rtx); +#endif + +/* Declarations for various fns used in the .md file. */ +extern void epiphany_final_prescan_insn (rtx, rtx *, int); +extern bool epiphany_is_long_call_p (rtx); +extern bool epiphany_small16 (rtx); +bool epiphany_uninterruptible_p (tree decl); +bool epiphany_call_uninterruptible_p (rtx mem); +extern rtx sfunc_symbol (const char *name); + +extern void epiphany_expand_prologue (void); +extern void epiphany_expand_epilogue (int); +extern int epiphany_initial_elimination_offset (int, int); +extern void epiphany_init_expanders (void); +extern int hard_regno_mode_ok (int regno, enum machine_mode mode); +#ifdef HARD_CONST +extern void emit_set_fp_mode (int entity, int mode, HARD_REG_SET regs_live); +#endif +extern void epiphany_insert_mode_switch_use (rtx insn, int, int); +extern void epiphany_expand_set_fp_mode (rtx *operands); +extern int epiphany_mode_needed (int entity, rtx insn); +extern int epiphany_mode_entry_exit (int entity, bool); +extern int epiphany_mode_after (int entity, int last_mode, rtx insn); +extern int epiphany_mode_priority_to_mode (int entity, unsigned priority); +extern bool epiphany_epilogue_uses (int regno); +extern bool epiphany_optimize_mode_switching (int entity); +extern bool epiphany_is_interrupt_p (tree); +extern unsigned epiphany_special_round_type_align (tree, unsigned, unsigned); +extern unsigned epiphany_adjust_field_align (tree, unsigned); +extern void epiphany_start_function (FILE *f, const char *name, tree decl); +extern bool epiphany_regno_rename_ok (unsigned src, unsigned dst); + +/* Also declared in insn-attr.h, but files generated from epiphany.md + can't / won't include that. In particular: + PR other/55523: gencondmd file includes / dependencies are messed up, + it uses peephole2 predicates without having all the necessary headers. */ +extern int get_attr_sched_use_fpu (rtx); + diff --git a/gcc-4.9/gcc/config/epiphany/epiphany-sched.md b/gcc-4.9/gcc/config/epiphany/epiphany-sched.md new file mode 100644 index 000000000..d18cba9c6 --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany-sched.md @@ -0,0 +1,145 @@ +;; DFA scheduling description for EPIPHANY +;; Copyright (C) 2004-2014 Free Software Foundation, Inc. +;; Contributed by Embecosm on behalf of Adapteva, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Two automata are defined to reduce number of states +;; which a single large automaton will have. (Factoring) + +(define_automaton "inst_pipeline,fpu_pipe") + +;; This unit is basically the decode unit of the processor. +;; Since epiphany is a dual issue machine, it is as if there are two +;; units so that any insn can be processed by either one +;; of the decoding unit. + +(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline") + +;; The fixed point arithmetic unit. + +(define_cpu_unit "int" "inst_pipeline") + +;; The floating point unit. + +(define_cpu_unit "F0" "fpu_pipe") + +;; ---------------------------------------------------- +;; This reservation is to simplify the dual issue description. + +(define_reservation "issue" "pipe_01|pipe_02") + +;; This is to express instructions that cannot be paired. + +(define_reservation "d_lock" "pipe_01+pipe_02") + +;; We don't model all pipeline stages; we model the issue stage +;; inasmuch as we allow only two instructions to issue simultaneously, +;; and flow instructions prevent any simultaneous issue of another instruction. +;; (This uses pipe_01 and pipe_02). +;; Double issue of 'other' insns is prevented by using the int unit in the +;; E1 stage. +;; Double issue of float instructions is prevented by using F0 in the E1 stage. + +(define_insn_reservation "simple_arith" 2 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "move,cmove,compare,shift,misc,mul") + (eq_attr "length" "4")) + "issue,int") + +; anything but fp / fp_int / v2fp has a bypass +(define_bypass 1 "simple_arith" "simple_arith,simple_arith_2,simple_arith_4,load,store,branch,call,flow") + +(define_insn_reservation "simple_arith_2" 2 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "move,cmove,compare,shift,misc,mul") + (eq_attr "length" "8")) + "issue,issue+int,int") + +(define_insn_reservation "simple_arith_4" 4 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "move,compare,shift,misc,mul") + (eq_attr "length" "12,16,20,24")) + "issue,issue+int,issue+int,issue+int,int") + +;; Loads have a latency of two. +;; Note that we fix up the latency of post_modify in epiphany.c:epiphany_adjust_cost + +(define_insn_reservation "load" 3 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "load")) + "issue,int") + +; anything but fp / fp_int / v2fp has a bypass +(define_bypass 2 "load" "simple_arith,simple_arith_2,simple_arith_4,load,store,branch,call,flow") + +(define_insn_reservation "store" 1 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "store")) + "issue,int") + +;; Branch +;; Latency when taken: 3 +;; Issue Rate: 1 +;; The latency is 1 when the branch is not taken. +;; We can't really do much with the latency, even if we could express it, +;; but the pairing restrictions are useful to take into account. + +(define_insn_reservation "branch" 1 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "branch,uncond_branch")) + "d_lock") + +;; calls introduce a longisch delay that is likely to flush the pipelines +;; of the caller's instructions. Both the call instruction itself and +;; the rts at the end of the call / sfunc incurs a three cycle penalty, +;; thus also isolating the scheduling of caller and callee. + +(define_insn_reservation "call" 8 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "call,sfunc,fp_sfunc")) + "d_lock*8") + +(define_insn_reservation "flow" 1 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "flow")) + "d_lock") + +(define_insn_reservation "fp_arith" 5 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "fp,fp_int")) + "issue,F0") + +(define_bypass 4 "fp_arith" "store") + +; There are two main consumers for v2fp: +; - other v2fp operation - in that case, the latencies can dovetail to +; save one cycle of latency. +; - 64 bit store operations - we need both registers, but OTOH the latency is +; one lower to start with. +; of the bypass saving one cyles then. +(define_insn_reservation "v2fp_arith" 5 + (and (eq_attr "pipe_model" "epiphany") + (eq_attr "type" "v2fp")) + "issue,issue+F0,F0") + +; A boolean attribute for use by peephole2 patterns that try to figure out +; if we overcommitted the FPU. +; This is notionally a numeric attribute to avoid dependency problems. +(define_attr "sched_use_fpu" "" + (cond [(eq_attr "type" "fp,fp_int,v2fp") (const_int 1)] + (const_int 0))) diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.c b/gcc-4.9/gcc/config/epiphany/epiphany.c new file mode 100644 index 000000000..59b24107a --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany.c @@ -0,0 +1,2938 @@ +/* Subroutines used for code generation on the EPIPHANY cpu. + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "stor-layout.h" +#include "varasm.h" +#include "calls.h" +#include "stringpool.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "function.h" +#include "expr.h" +#include "diagnostic-core.h" +#include "recog.h" +#include "toplev.h" +#include "tm_p.h" +#include "target.h" +#include "df.h" +#include "langhooks.h" +#include "insn-codes.h" +#include "ggc.h" +#include "tm-constrs.h" +#include "tree-pass.h" /* for current_pass */ +#include "context.h" +#include "pass_manager.h" + +/* Which cpu we're compiling for. */ +int epiphany_cpu_type; + +/* Name of mangle string to add to symbols to separate code compiled for each + cpu (or NULL). */ +const char *epiphany_mangle_cpu; + +/* Array of valid operand punctuation characters. */ +char epiphany_punct_chars[256]; + +/* The rounding mode that we generally use for floating point. */ +int epiphany_normal_fp_rounding; + +/* The pass instance, for use in epiphany_optimize_mode_switching. */ +static opt_pass *pass_mode_switch_use; + +static void epiphany_init_reg_tables (void); +static int get_epiphany_condition_code (rtx); +static tree epiphany_handle_interrupt_attribute (tree *, tree, tree, int, bool *); +static tree epiphany_handle_forwarder_attribute (tree *, tree, tree, int, + bool *); +static bool epiphany_pass_by_reference (cumulative_args_t, enum machine_mode, + const_tree, bool); +static rtx frame_insn (rtx); + +/* defines for the initialization of the GCC target structure. */ +#define TARGET_ATTRIBUTE_TABLE epiphany_attribute_table + +#define TARGET_PRINT_OPERAND epiphany_print_operand +#define TARGET_PRINT_OPERAND_ADDRESS epiphany_print_operand_address + +#define TARGET_RTX_COSTS epiphany_rtx_costs +#define TARGET_ADDRESS_COST epiphany_address_cost +#define TARGET_MEMORY_MOVE_COST epiphany_memory_move_cost + +#define TARGET_PROMOTE_FUNCTION_MODE epiphany_promote_function_mode +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true + +#define TARGET_RETURN_IN_MEMORY epiphany_return_in_memory +#define TARGET_PASS_BY_REFERENCE epiphany_pass_by_reference +#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true +#define TARGET_FUNCTION_VALUE epiphany_function_value +#define TARGET_LIBCALL_VALUE epiphany_libcall_value +#define TARGET_FUNCTION_VALUE_REGNO_P epiphany_function_value_regno_p + +#define TARGET_SETUP_INCOMING_VARARGS epiphany_setup_incoming_varargs + +/* Using the simplistic varags handling forces us to do partial reg/stack + argument passing for types with larger size (> 4 bytes) than alignemnt. */ +#define TARGET_ARG_PARTIAL_BYTES epiphany_arg_partial_bytes + +#define TARGET_FUNCTION_OK_FOR_SIBCALL epiphany_function_ok_for_sibcall + +#define TARGET_SCHED_ISSUE_RATE epiphany_issue_rate +#define TARGET_SCHED_ADJUST_COST epiphany_adjust_cost + +#define TARGET_LEGITIMATE_ADDRESS_P epiphany_legitimate_address_p + +#define TARGET_SECONDARY_RELOAD epiphany_secondary_reload + +#define TARGET_OPTION_OVERRIDE epiphany_override_options + +#define TARGET_CONDITIONAL_REGISTER_USAGE epiphany_conditional_register_usage + +#define TARGET_FUNCTION_ARG epiphany_function_arg + +#define TARGET_FUNCTION_ARG_ADVANCE epiphany_function_arg_advance + +#define TARGET_FUNCTION_ARG_BOUNDARY epiphany_function_arg_boundary + +#define TARGET_TRAMPOLINE_INIT epiphany_trampoline_init + +/* Nonzero if the constant rtx value is a legitimate general operand. + We can handle any 32- or 64-bit constant. */ +#define TARGET_LEGITIMATE_CONSTANT_P hook_bool_mode_rtx_true + +#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL \ + epiphany_min_divisions_for_recip_mul + +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE epiphany_preferred_simd_mode + +#define TARGET_VECTOR_MODE_SUPPORTED_P epiphany_vector_mode_supported_p + +#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ + epiphany_vector_alignment_reachable + +#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ + epiphany_support_vector_misalignment + +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ + hook_bool_const_tree_hwi_hwi_const_tree_true +#define TARGET_ASM_OUTPUT_MI_THUNK epiphany_output_mi_thunk + +#include "target-def.h" + +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +bool +epiphany_is_interrupt_p (tree decl) +{ + tree attrs; + + attrs = DECL_ATTRIBUTES (decl); + if (lookup_attribute ("interrupt", attrs)) + return true; + else + return false; +} + +/* Called from epiphany_override_options. + We use this to initialize various things. */ + +static void +epiphany_init (void) +{ + /* N.B. this pass must not run before the first optimize_mode_switching + pass because of the side offect of epiphany_mode_needed on + MACHINE_FUNCTION(cfun)->unknown_mode_uses. But it must run before + pass_resolve_sw_modes. */ + pass_mode_switch_use = make_pass_mode_switch_use (g); + struct register_pass_info insert_use_info + = { pass_mode_switch_use, "mode_sw", + 1, PASS_POS_INSERT_AFTER + }; + opt_pass *mode_sw2 + = g->get_passes()->get_pass_mode_switching ()->clone (); + struct register_pass_info mode_sw2_info + = { mode_sw2, "mode_sw", + 1, PASS_POS_INSERT_AFTER + }; + opt_pass *mode_sw3 = make_pass_resolve_sw_modes (g); + struct register_pass_info mode_sw3_info + = { mode_sw3, "mode_sw", + 1, PASS_POS_INSERT_AFTER + }; + opt_pass *mode_sw4 + = g->get_passes()->get_pass_split_all_insns ()->clone (); + struct register_pass_info mode_sw4_info + = { mode_sw4, "mode_sw", + 1, PASS_POS_INSERT_AFTER + }; + static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING; +#define N_ENTITIES ARRAY_SIZE (num_modes) + + epiphany_init_reg_tables (); + + /* Initialize array for PRINT_OPERAND_PUNCT_VALID_P. */ + memset (epiphany_punct_chars, 0, sizeof (epiphany_punct_chars)); + epiphany_punct_chars['-'] = 1; + + epiphany_normal_fp_rounding + = (epiphany_normal_fp_mode == FP_MODE_ROUND_TRUNC + ? FP_MODE_ROUND_TRUNC : FP_MODE_ROUND_NEAREST); + register_pass (&mode_sw4_info); + register_pass (&mode_sw2_info); + register_pass (&mode_sw3_info); + register_pass (&insert_use_info); + register_pass (&mode_sw2_info); + /* Verify that NUM_MODES_FOR_MODE_SWITCHING has one value per entity. */ + gcc_assert (N_ENTITIES == EPIPHANY_MSW_ENTITY_NUM); + +#if 1 /* As long as peep2_rescan is not implemented, + (see http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html,) + we need a second peephole2 pass to get reasonable code. */ + { + opt_pass *extra_peephole2 + = g->get_passes ()->get_pass_peephole2 ()->clone (); + struct register_pass_info peep2_2_info + = { extra_peephole2, "peephole2", + 1, PASS_POS_INSERT_AFTER + }; + + register_pass (&peep2_2_info); + } +#endif +} + +/* The condition codes of the EPIPHANY, and the inverse function. */ +static const char *const epiphany_condition_codes[] = +{ /* 0 1 2 3 4 5 6 7 8 9 */ + "eq", "ne", "ltu", "gteu", "gt", "lte", "gte", "lt", "gtu", "lteu", + /* 10 11 12 13 */ + "beq","bne","blt", "blte", +}; + +#define EPIPHANY_INVERSE_CONDITION_CODE(X) ((X) ^ 1) + +/* Returns the index of the EPIPHANY condition code string in + `epiphany_condition_codes'. COMPARISON should be an rtx like + `(eq (...) (...))'. */ + +static int +get_epiphany_condition_code (rtx comparison) +{ + switch (GET_MODE (XEXP (comparison, 0))) + { + case CCmode: + switch (GET_CODE (comparison)) + { + case EQ : return 0; + case NE : return 1; + case LTU : return 2; + case GEU : return 3; + case GT : return 4; + case LE : return 5; + case GE : return 6; + case LT : return 7; + case GTU : return 8; + case LEU : return 9; + + default : gcc_unreachable (); + } + case CC_N_NEmode: + switch (GET_CODE (comparison)) + { + case EQ: return 6; + case NE: return 7; + default: gcc_unreachable (); + } + case CC_C_LTUmode: + switch (GET_CODE (comparison)) + { + case GEU: return 2; + case LTU: return 3; + default: gcc_unreachable (); + } + case CC_C_GTUmode: + switch (GET_CODE (comparison)) + { + case LEU: return 3; + case GTU: return 2; + default: gcc_unreachable (); + } + case CC_FPmode: + switch (GET_CODE (comparison)) + { + case EQ: return 10; + case NE: return 11; + case LT: return 12; + case LE: return 13; + default: gcc_unreachable (); + } + case CC_FP_EQmode: + switch (GET_CODE (comparison)) + { + case EQ: return 0; + case NE: return 1; + default: gcc_unreachable (); + } + case CC_FP_GTEmode: + switch (GET_CODE (comparison)) + { + case EQ: return 0; + case NE: return 1; + case GT : return 4; + case GE : return 6; + case UNLE : return 5; + case UNLT : return 7; + default: gcc_unreachable (); + } + case CC_FP_ORDmode: + switch (GET_CODE (comparison)) + { + case ORDERED: return 9; + case UNORDERED: return 8; + default: gcc_unreachable (); + } + case CC_FP_UNEQmode: + switch (GET_CODE (comparison)) + { + case UNEQ: return 9; + case LTGT: return 8; + default: gcc_unreachable (); + } + default: gcc_unreachable (); + } + /*NOTREACHED*/ + return (42); +} + + +/* Return 1 if hard register REGNO can hold a value of machine_mode MODE. */ +int +hard_regno_mode_ok (int regno, enum machine_mode mode) +{ + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + return (regno & 1) == 0 && GPR_P (regno); + else + return 1; +} + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ + +enum machine_mode +epiphany_select_cc_mode (enum rtx_code op, + rtx x ATTRIBUTE_UNUSED, + rtx y ATTRIBUTE_UNUSED) +{ + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + if (TARGET_SOFT_CMPSF + || op == ORDERED || op == UNORDERED) + { + if (op == EQ || op == NE) + return CC_FP_EQmode; + if (op == ORDERED || op == UNORDERED) + return CC_FP_ORDmode; + if (op == UNEQ || op == LTGT) + return CC_FP_UNEQmode; + return CC_FP_GTEmode; + } + return CC_FPmode; + } + /* recognize combiner pattern ashlsi_btst: + (parallel [ + (set (reg:N_NE 65 cc1) + (compare:N_NE (zero_extract:SI (reg/v:SI 75 [ a ]) + (const_int 1 [0x1]) + (const_int 0 [0x0])) + (const_int 0 [0x0]))) + (clobber (scratch:SI)) */ + else if ((op == EQ || op == NE) + && GET_CODE (x) == ZERO_EXTRACT + && XEXP (x, 1) == const1_rtx + && CONST_INT_P (XEXP (x, 2))) + return CC_N_NEmode; + else if ((op == GEU || op == LTU) && GET_CODE (x) == PLUS) + return CC_C_LTUmode; + else if ((op == LEU || op == GTU) && GET_CODE (x) == MINUS) + return CC_C_GTUmode; + else + return CCmode; +} + +enum reg_class epiphany_regno_reg_class[FIRST_PSEUDO_REGISTER]; + +static void +epiphany_init_reg_tables (void) +{ + int i; + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + if (i == GPR_LR) + epiphany_regno_reg_class[i] = LR_REGS; + else if (i <= 7 && TARGET_PREFER_SHORT_INSN_REGS) + epiphany_regno_reg_class[i] = SHORT_INSN_REGS; + else if (call_used_regs[i] + && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)) + epiphany_regno_reg_class[i] = SIBCALL_REGS; + else if (i >= CORE_CONTROL_FIRST && i <= CORE_CONTROL_LAST) + epiphany_regno_reg_class[i] = CORE_CONTROL_REGS; + else if (i < (GPR_LAST+1) + || i == ARG_POINTER_REGNUM || i == FRAME_POINTER_REGNUM) + epiphany_regno_reg_class[i] = GENERAL_REGS; + else if (i == CC_REGNUM) + epiphany_regno_reg_class[i] = NO_REGS /* CC_REG: must be NO_REGS */; + else + epiphany_regno_reg_class[i] = NO_REGS; + } +} + +/* EPIPHANY specific attribute support. + + The EPIPHANY has these attributes: + interrupt - for interrupt functions. + short_call - the function is assumed to be reachable with the b / bl + instructions. + long_call - the function address is loaded into a register before use. + disinterrupt - functions which mask interrupts throughout. + They unmask them while calling an interruptible + function, though. */ + +static const struct attribute_spec epiphany_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ + { "interrupt", 0, 9, true, false, false, epiphany_handle_interrupt_attribute, true }, + { "forwarder_section", 1, 1, true, false, false, epiphany_handle_forwarder_attribute, false }, + { "long_call", 0, 0, false, true, true, NULL, false }, + { "short_call", 0, 0, false, true, true, NULL, false }, + { "disinterrupt", 0, 0, false, true, true, NULL, true }, + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* Handle an "interrupt" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +epiphany_handle_interrupt_attribute (tree *node ATTRIBUTE_UNUSED, + tree name, tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree value; + + if (!args) + return NULL_TREE; + + value = TREE_VALUE (args); + + if (TREE_CODE (value) != STRING_CST) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not a string constant", name); + *no_add_attrs = true; + } + else if (strcmp (TREE_STRING_POINTER (value), "reset") + && strcmp (TREE_STRING_POINTER (value), "software_exception") + && strcmp (TREE_STRING_POINTER (value), "page_miss") + && strcmp (TREE_STRING_POINTER (value), "timer0") + && strcmp (TREE_STRING_POINTER (value), "timer1") + && strcmp (TREE_STRING_POINTER (value), "message") + && strcmp (TREE_STRING_POINTER (value), "dma0") + && strcmp (TREE_STRING_POINTER (value), "dma1") + && strcmp (TREE_STRING_POINTER (value), "wand") + && strcmp (TREE_STRING_POINTER (value), "swi")) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not \"reset\", \"software_exception\", \"page_miss\", \"timer0\", \"timer1\", \"message\", \"dma0\", \"dma1\", \"wand\" or \"swi\"", + name); + *no_add_attrs = true; + return NULL_TREE; + } + + return epiphany_handle_interrupt_attribute (node, name, TREE_CHAIN (args), + flags, no_add_attrs); +} + +/* Handle a "forwarder_section" attribute; arguments as in + struct attribute_spec.handler. */ +static tree +epiphany_handle_forwarder_attribute (tree *node ATTRIBUTE_UNUSED, + tree name, tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree value; + + value = TREE_VALUE (args); + + if (TREE_CODE (value) != STRING_CST) + { + warning (OPT_Wattributes, + "argument of %qE attribute is not a string constant", name); + *no_add_attrs = true; + } + return NULL_TREE; +} + + +/* Misc. utilities. */ + +/* Generate a SYMBOL_REF for the special function NAME. When the address + can't be placed directly into a call instruction, and if possible, copy + it to a register so that cse / code hoisting is possible. */ +rtx +sfunc_symbol (const char *name) +{ + rtx sym = gen_rtx_SYMBOL_REF (Pmode, name); + + /* These sfuncs should be hidden, and every dso should get a copy. */ + SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION | SYMBOL_FLAG_LOCAL; + if (TARGET_SHORT_CALLS) + ; /* Nothing to be done. */ + else if (can_create_pseudo_p ()) + sym = copy_to_mode_reg (Pmode, sym); + else /* We rely on reload to fix this up. */ + gcc_assert (!reload_in_progress || reload_completed); + return sym; +} + +/* X and Y are two things to compare using CODE in IN_MODE. + Emit the compare insn, construct the the proper cc reg in the proper + mode, and return the rtx for the cc reg comparison in CMODE. */ + +rtx +gen_compare_reg (enum machine_mode cmode, enum rtx_code code, + enum machine_mode in_mode, rtx x, rtx y) +{ + enum machine_mode mode = SELECT_CC_MODE (code, x, y); + rtx cc_reg, pat, clob0, clob1, clob2; + + if (in_mode == VOIDmode) + in_mode = GET_MODE (x); + if (in_mode == VOIDmode) + in_mode = GET_MODE (y); + + if (mode == CC_FPmode) + { + /* The epiphany has only EQ / NE / LT / LE conditions for + hardware floating point. */ + if (code == GT || code == GE || code == UNLE || code == UNLT) + { + rtx tmp = x; x = y; y = tmp; + code = swap_condition (code); + } + cc_reg = gen_rtx_REG (mode, CCFP_REGNUM); + y = force_reg (in_mode, y); + } + else + { + if (mode == CC_FP_GTEmode + && (code == LE || code == LT || code == UNGT || code == UNGE)) + { + if (flag_finite_math_only + && ((REG_P (x) && REGNO (x) == GPR_0) + || (REG_P (y) && REGNO (y) == GPR_1))) + switch (code) + { + case LE: code = UNLE; break; + case LT: code = UNLT; break; + case UNGT: code = GT; break; + case UNGE: code = GE; break; + default: gcc_unreachable (); + } + else + { + rtx tmp = x; x = y; y = tmp; + code = swap_condition (code); + } + } + cc_reg = gen_rtx_REG (mode, CC_REGNUM); + } + if ((mode == CC_FP_EQmode || mode == CC_FP_GTEmode + || mode == CC_FP_ORDmode || mode == CC_FP_UNEQmode) + /* movcc might want to re-emit a comparison during ifcvt. */ + && (!REG_P (x) || REGNO (x) != GPR_0 + || !REG_P (y) || REGNO (y) != GPR_1)) + { + rtx reg; + +#if 0 + /* ??? We should really do the r0/r1 clobber only during rtl expansion, + but just like the flag clobber of movsicc, we have to allow + this for ifcvt to work, on the assumption that we'll only want + to do this if these registers have been used before by the + pre-ifcvt code. */ + gcc_assert (currently_expanding_to_rtl); +#endif + reg = gen_rtx_REG (in_mode, GPR_0); + if (reg_overlap_mentioned_p (reg, y)) + return 0; + emit_move_insn (reg, x); + x = reg; + reg = gen_rtx_REG (in_mode, GPR_1); + emit_move_insn (reg, y); + y = reg; + } + else + x = force_reg (in_mode, x); + + pat = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)); + if (mode == CC_FP_EQmode || mode == CC_FP_GTEmode) + { + const char *name = mode == CC_FP_EQmode ? "__eqsf2" : "__gtesf2"; + rtx use = gen_rtx_USE (VOIDmode, sfunc_symbol (name)); + + clob0 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_IP)); + clob1 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_LR)); + pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, pat, use, clob0, clob1)); + } + else if (mode == CC_FP_ORDmode || mode == CC_FP_UNEQmode) + { + const char *name = mode == CC_FP_ORDmode ? "__ordsf2" : "__uneqsf2"; + rtx use = gen_rtx_USE (VOIDmode, sfunc_symbol (name)); + + clob0 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_IP)); + clob1 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_16)); + clob2 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, GPR_LR)); + pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (5, pat, use, + clob0, clob1, clob2)); + } + else + { + clob0 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (in_mode)); + pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob0)); + } + emit_insn (pat); + return gen_rtx_fmt_ee (code, cmode, cc_reg, const0_rtx); +} + +/* The ROUND_ADVANCE* macros are local to this file. */ +/* Round SIZE up to a word boundary. */ +#define ROUND_ADVANCE(SIZE) \ + (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Round arg MODE/TYPE up to the next word boundary. */ +#define ROUND_ADVANCE_ARG(MODE, TYPE) \ + ((MODE) == BLKmode \ + ? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \ + : ROUND_ADVANCE (GET_MODE_SIZE (MODE))) + +/* Round CUM up to the necessary point for argument MODE/TYPE. */ +#define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \ + (epiphany_function_arg_boundary ((MODE), (TYPE)) > BITS_PER_WORD \ + ? (((CUM) + 1) & ~1) \ + : (CUM)) + +static unsigned int +epiphany_function_arg_boundary (enum machine_mode mode, const_tree type) +{ + if ((type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode)) <= PARM_BOUNDARY) + return PARM_BOUNDARY; + return 2 * PARM_BOUNDARY; +} + +/* Do any needed setup for a variadic function. For the EPIPHANY, we + actually emit the code in epiphany_expand_prologue. + + CUM has not been updated for the last named argument which has type TYPE + and mode MODE, and we rely on this fact. */ + + +static void +epiphany_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode, + tree type, int *pretend_size, int no_rtl) +{ + int first_anon_arg; + CUMULATIVE_ARGS next_cum; + machine_function_t *mf = MACHINE_FUNCTION (cfun); + + /* All BLKmode values are passed by reference. */ + gcc_assert (mode != BLKmode); + + next_cum = *get_cumulative_args (cum); + next_cum + = ROUND_ADVANCE_CUM (next_cum, mode, type) + ROUND_ADVANCE_ARG (mode, type); + first_anon_arg = next_cum; + + if (first_anon_arg < MAX_EPIPHANY_PARM_REGS && !no_rtl) + { + /* Note that first_reg_offset < MAX_EPIPHANY_PARM_REGS. */ + int first_reg_offset = first_anon_arg; + + *pretend_size = ((MAX_EPIPHANY_PARM_REGS - first_reg_offset) + * UNITS_PER_WORD); + } + mf->args_parsed = 1; + mf->pretend_args_odd = ((*pretend_size & UNITS_PER_WORD) ? 1 : 0); +} + +static int +epiphany_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode, + tree type, bool named ATTRIBUTE_UNUSED) +{ + int words = 0, rounded_cum; + + gcc_assert (!epiphany_pass_by_reference (cum, mode, type, /* named */ true)); + + rounded_cum = ROUND_ADVANCE_CUM (*get_cumulative_args (cum), mode, type); + if (rounded_cum < MAX_EPIPHANY_PARM_REGS) + { + words = MAX_EPIPHANY_PARM_REGS - rounded_cum; + if (words >= ROUND_ADVANCE_ARG (mode, type)) + words = 0; + } + return words * UNITS_PER_WORD; +} + +/* Cost functions. */ + +/* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + +static bool +epiphany_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, + int *total, bool speed ATTRIBUTE_UNUSED) +{ + switch (code) + { + /* Small integers in the right context are as cheap as registers. */ + case CONST_INT: + if ((outer_code == PLUS || outer_code == MINUS) + && SIMM11 (INTVAL (x))) + { + *total = 0; + return true; + } + if (IMM16 (INTVAL (x))) + { + *total = outer_code == SET ? 0 : COSTS_N_INSNS (1); + return true; + } + /* FALLTHRU */ + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + *total = COSTS_N_INSNS ((epiphany_small16 (x) ? 0 : 1) + + (outer_code == SET ? 0 : 1)); + return true; + + case CONST_DOUBLE: + { + rtx high, low; + split_double (x, &high, &low); + *total = COSTS_N_INSNS (!IMM16 (INTVAL (high)) + + !IMM16 (INTVAL (low))); + return true; + } + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + *total = COSTS_N_INSNS (1); + return true; + + default: + return false; + } +} + + +/* Provide the costs of an addressing mode that contains ADDR. + If ADDR is not a valid address, its cost is irrelevant. */ + +static int +epiphany_address_cost (rtx addr, enum machine_mode mode, + addr_space_t as ATTRIBUTE_UNUSED, bool speed) +{ + rtx reg; + rtx off = const0_rtx; + int i; + + if (speed) + return 0; + /* Return 0 for addresses valid in short insns, 1 for addresses only valid + in long insns. */ + switch (GET_CODE (addr)) + { + case PLUS : + reg = XEXP (addr, 0); + off = XEXP (addr, 1); + break; + case POST_MODIFY: + reg = XEXP (addr, 0); + off = XEXP (addr, 1); + gcc_assert (GET_CODE (off) == PLUS && rtx_equal_p (reg, XEXP (off, 0))); + off = XEXP (off, 1); + if (satisfies_constraint_Rgs (reg) && satisfies_constraint_Rgs (off)) + return 0; + return 1; + case REG: + default: + reg = addr; + break; + } + if (!satisfies_constraint_Rgs (reg)) + return 1; + /* The offset range available for short instructions depends on the mode + of the memory access. */ + /* First, make sure we have a valid integer. */ + if (!satisfies_constraint_L (off)) + return 1; + i = INTVAL (off); + switch (GET_MODE_SIZE (mode)) + { + default: + case 4: + if (i & 1) + return 1; + i >>= 1; + /* Fall through. */ + case 2: + if (i & 1) + return 1; + i >>= 1; + /* Fall through. */ + case 1: + return i < -7 || i > 7; + } +} + +/* Compute the cost of moving data between registers and memory. + For integer, load latency is twice as long as register-register moves, + but issue pich is the same. For floating point, load latency is three + times as much as a reg-reg move. */ +static int +epiphany_memory_move_cost (enum machine_mode mode, + reg_class_t rclass ATTRIBUTE_UNUSED, + bool in ATTRIBUTE_UNUSED) +{ + return GET_MODE_CLASS (mode) == MODE_INT ? 3 : 4; +} + +/* Function prologue/epilogue handlers. */ + +/* EPIPHANY stack frames look like: + + Before call After call + +-----------------------+ +-----------------------+ + | | | | + high | local variables, | | local variables, | + mem | reg save area, etc. | | reg save area, etc. | + | | | | + +-----------------------+ +-----------------------+ + | | | | + | arguments on stack. | | arguments on stack. | + | | | | + SP+8->+-----------------------+FP+8m->+-----------------------+ + | 2 word save area for | | reg parm save area, | + | leaf funcs / flags | | only created for | + SP+0->+-----------------------+ | variable argument | + | functions | + FP+8n->+-----------------------+ + | | + | register save area | + | | + +-----------------------+ + | | + | local variables | + | | + FP+0->+-----------------------+ + | | + | alloca allocations | + | | + +-----------------------+ + | | + | arguments on stack | + | | + SP+8->+-----------------------+ + low | 2 word save area for | + memory | leaf funcs / flags | + SP+0->+-----------------------+ + +Notes: +1) The "reg parm save area" does not exist for non variable argument fns. + The "reg parm save area" could be eliminated if we created our + own TARGET_GIMPLIFY_VA_ARG_EXPR, but that has tradeoffs as well + (so it's not done). */ + +/* Structure to be filled in by epiphany_compute_frame_size with register + save masks, and offsets for the current function. */ +struct epiphany_frame_info +{ + unsigned int total_size; /* # bytes that the entire frame takes up. */ + unsigned int pretend_size; /* # bytes we push and pretend caller did. */ + unsigned int args_size; /* # bytes that outgoing arguments take up. */ + unsigned int reg_size; /* # bytes needed to store regs. */ + unsigned int var_size; /* # bytes that variables take up. */ + HARD_REG_SET gmask; /* Set of saved gp registers. */ + int initialized; /* Nonzero if frame size already calculated. */ + int stld_sz; /* Current load/store data size for offset + adjustment. */ + int need_fp; /* value to override "frame_pointer_needed */ + /* FIRST_SLOT is the slot that is saved first, at the very start of + the frame, with a POST_MODIFY to allocate the frame, if the size fits, + or at least the parm and register save areas, otherwise. + In the case of a large frame, LAST_SLOT is the slot that is saved last, + with a POST_MODIFY to allocate the rest of the frame. */ + int first_slot, last_slot, first_slot_offset, last_slot_offset; + int first_slot_size; + int small_threshold; +}; + +/* Current frame information calculated by epiphany_compute_frame_size. */ +static struct epiphany_frame_info current_frame_info; + +/* Zero structure to initialize current_frame_info. */ +static struct epiphany_frame_info zero_frame_info; + +/* The usual; we set up our machine_function data. */ +static struct machine_function * +epiphany_init_machine_status (void) +{ + struct machine_function *machine; + + /* Reset state info for each function. */ + current_frame_info = zero_frame_info; + + machine = ggc_alloc_cleared_machine_function_t (); + + return machine; +} + +/* Implements INIT_EXPANDERS. We just set up to call the above + * function. */ +void +epiphany_init_expanders (void) +{ + init_machine_status = epiphany_init_machine_status; +} + +/* Type of function DECL. + + The result is cached. To reset the cache at the end of a function, + call with DECL = NULL_TREE. */ + +static enum epiphany_function_type +epiphany_compute_function_type (tree decl) +{ + tree a; + /* Cached value. */ + static enum epiphany_function_type fn_type = EPIPHANY_FUNCTION_UNKNOWN; + /* Last function we were called for. */ + static tree last_fn = NULL_TREE; + + /* Resetting the cached value? */ + if (decl == NULL_TREE) + { + fn_type = EPIPHANY_FUNCTION_UNKNOWN; + last_fn = NULL_TREE; + return fn_type; + } + + if (decl == last_fn && fn_type != EPIPHANY_FUNCTION_UNKNOWN) + return fn_type; + + /* Assume we have a normal function (not an interrupt handler). */ + fn_type = EPIPHANY_FUNCTION_NORMAL; + + /* Now see if this is an interrupt handler. */ + for (a = DECL_ATTRIBUTES (decl); + a; + a = TREE_CHAIN (a)) + { + tree name = TREE_PURPOSE (a); + + if (name == get_identifier ("interrupt")) + fn_type = EPIPHANY_FUNCTION_INTERRUPT; + } + + last_fn = decl; + return fn_type; +} + +#define RETURN_ADDR_REGNUM GPR_LR +#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) +#define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM)) + +/* Tell prologue and epilogue if register REGNO should be saved / restored. + The return address and frame pointer are treated separately. + Don't consider them here. */ +#define MUST_SAVE_REGISTER(regno, interrupt_p) \ + ((df_regs_ever_live_p (regno) \ + || (interrupt_p && !crtl->is_leaf \ + && call_used_regs[regno] && !fixed_regs[regno])) \ + && (!call_used_regs[regno] || regno == GPR_LR \ + || (interrupt_p && regno != GPR_SP))) + +#define MUST_SAVE_RETURN_ADDR 0 + +/* Return the bytes needed to compute the frame pointer from the current + stack pointer. + + SIZE is the size needed for local variables. */ + +static unsigned int +epiphany_compute_frame_size (int size /* # of var. bytes allocated. */) +{ + int regno; + unsigned int total_size, var_size, args_size, pretend_size, reg_size; + HARD_REG_SET gmask; + enum epiphany_function_type fn_type; + int interrupt_p; + int first_slot, last_slot, first_slot_offset, last_slot_offset; + int first_slot_size; + int small_slots = 0; + + var_size = size; + args_size = crtl->outgoing_args_size; + pretend_size = crtl->args.pretend_args_size; + total_size = args_size + var_size; + reg_size = 0; + CLEAR_HARD_REG_SET (gmask); + first_slot = -1; + first_slot_offset = 0; + last_slot = -1; + last_slot_offset = 0; + first_slot_size = UNITS_PER_WORD; + + /* See if this is an interrupt handler. Call used registers must be saved + for them too. */ + fn_type = epiphany_compute_function_type (current_function_decl); + interrupt_p = EPIPHANY_INTERRUPT_P (fn_type); + + /* Calculate space needed for registers. */ + + for (regno = MAX_EPIPHANY_PARM_REGS - 1; pretend_size > reg_size; regno--) + { + reg_size += UNITS_PER_WORD; + SET_HARD_REG_BIT (gmask, regno); + if (epiphany_stack_offset - reg_size == 0) + first_slot = regno; + } + + if (interrupt_p) + reg_size += 2 * UNITS_PER_WORD; + else + small_slots = epiphany_stack_offset / UNITS_PER_WORD; + + if (frame_pointer_needed) + { + current_frame_info.need_fp = 1; + if (!interrupt_p && first_slot < 0) + first_slot = GPR_FP; + } + else + current_frame_info.need_fp = 0; + for (regno = 0; regno <= GPR_LAST; regno++) + { + if (MUST_SAVE_REGISTER (regno, interrupt_p)) + { + gcc_assert (!TEST_HARD_REG_BIT (gmask, regno)); + reg_size += UNITS_PER_WORD; + SET_HARD_REG_BIT (gmask, regno); + /* FIXME: when optimizing for speed, take schedling into account + when selecting these registers. */ + if (regno == first_slot) + gcc_assert (regno == GPR_FP && frame_pointer_needed); + else if (!interrupt_p && first_slot < 0) + first_slot = regno; + else if (last_slot < 0 + && (first_slot ^ regno) != 1 + && (!interrupt_p || regno > GPR_1)) + last_slot = regno; + } + } + if (TEST_HARD_REG_BIT (gmask, GPR_LR)) + MACHINE_FUNCTION (cfun)->lr_clobbered = 1; + /* ??? Could sometimes do better than that. */ + current_frame_info.small_threshold + = (optimize >= 3 || interrupt_p ? 0 + : pretend_size ? small_slots + : 4 + small_slots - (first_slot == GPR_FP)); + + /* If there might be variables with 64-bit alignment requirement, align the + start of the variables. */ + if (var_size >= 2 * UNITS_PER_WORD + /* We don't want to split a double reg save/restore across two unpaired + stack slots when optimizing. This rounding could be avoided with + more complex reordering of the register saves, but that would seem + to be a lot of code complexity for little gain. */ + || (reg_size > 8 && optimize)) + reg_size = EPIPHANY_STACK_ALIGN (reg_size); + if (((total_size + reg_size + /* Reserve space for UNKNOWN_REGNUM. */ + + EPIPHANY_STACK_ALIGN (4)) + <= (unsigned) epiphany_stack_offset) + && !interrupt_p + && crtl->is_leaf && !frame_pointer_needed) + { + first_slot = -1; + last_slot = -1; + goto alloc_done; + } + else if (reg_size + && !interrupt_p + && reg_size < (unsigned HOST_WIDE_INT) epiphany_stack_offset) + reg_size = epiphany_stack_offset; + if (interrupt_p) + { + if (total_size + reg_size < 0x3fc) + { + first_slot_offset = EPIPHANY_STACK_ALIGN (total_size + reg_size); + first_slot_offset += EPIPHANY_STACK_ALIGN (epiphany_stack_offset); + last_slot = -1; + } + else + { + first_slot_offset = EPIPHANY_STACK_ALIGN (reg_size); + last_slot_offset = EPIPHANY_STACK_ALIGN (total_size); + last_slot_offset += EPIPHANY_STACK_ALIGN (epiphany_stack_offset); + if (last_slot >= 0) + CLEAR_HARD_REG_BIT (gmask, last_slot); + } + } + else if (total_size + reg_size < 0x1ffc && first_slot >= 0) + { + first_slot_offset = EPIPHANY_STACK_ALIGN (total_size + reg_size); + last_slot = -1; + } + else + { + if (total_size + reg_size <= (unsigned) epiphany_stack_offset) + { + gcc_assert (first_slot < 0); + gcc_assert (reg_size == 0 || (int) reg_size == epiphany_stack_offset); + last_slot_offset = EPIPHANY_STACK_ALIGN (total_size + reg_size); + } + else + { + first_slot_offset + = (reg_size + ? EPIPHANY_STACK_ALIGN (reg_size - epiphany_stack_offset) : 0); + if (!first_slot_offset) + { + if (first_slot != GPR_FP || !current_frame_info.need_fp) + last_slot = first_slot; + first_slot = -1; + } + last_slot_offset = EPIPHANY_STACK_ALIGN (total_size); + if (reg_size) + last_slot_offset += EPIPHANY_STACK_ALIGN (epiphany_stack_offset); + } + if (last_slot >= 0) + CLEAR_HARD_REG_BIT (gmask, last_slot); + } + alloc_done: + if (first_slot >= 0) + { + CLEAR_HARD_REG_BIT (gmask, first_slot); + if (TEST_HARD_REG_BIT (gmask, first_slot ^ 1) + && epiphany_stack_offset - pretend_size >= 2 * UNITS_PER_WORD) + { + CLEAR_HARD_REG_BIT (gmask, first_slot ^ 1); + first_slot_size = 2 * UNITS_PER_WORD; + first_slot &= ~1; + } + } + total_size = first_slot_offset + last_slot_offset; + + /* Save computed information. */ + current_frame_info.total_size = total_size; + current_frame_info.pretend_size = pretend_size; + current_frame_info.var_size = var_size; + current_frame_info.args_size = args_size; + current_frame_info.reg_size = reg_size; + COPY_HARD_REG_SET (current_frame_info.gmask, gmask); + current_frame_info.first_slot = first_slot; + current_frame_info.last_slot = last_slot; + current_frame_info.first_slot_offset = first_slot_offset; + current_frame_info.first_slot_size = first_slot_size; + current_frame_info.last_slot_offset = last_slot_offset; + + current_frame_info.initialized = reload_completed; + + /* Ok, we're done. */ + return total_size; +} + +/* Print operand X (an rtx) in assembler syntax to file FILE. + CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. + For `%' followed by punctuation, CODE is the punctuation and X is null. */ + +static void +epiphany_print_operand (FILE *file, rtx x, int code) +{ + switch (code) + { + case 'd': + fputs (epiphany_condition_codes[get_epiphany_condition_code (x)], file); + return; + case 'D': + fputs (epiphany_condition_codes[EPIPHANY_INVERSE_CONDITION_CODE + (get_epiphany_condition_code (x))], + file); + return; + + case 'X': + current_frame_info.stld_sz = 8; + break; + + case 'C' : + current_frame_info.stld_sz = 4; + break; + + case 'c' : + current_frame_info.stld_sz = 2; + break; + + case 'f': + fputs (REG_P (x) ? "jalr " : "bl ", file); + break; + + case '-': + fprintf (file, "r%d", epiphany_m1reg); + return; + + case 0 : + /* Do nothing special. */ + break; + default : + /* Unknown flag. */ + output_operand_lossage ("invalid operand output code"); + } + + switch (GET_CODE (x)) + { + rtx addr; + rtx offset; + + case REG : + fputs (reg_names[REGNO (x)], file); + break; + case MEM : + if (code == 0) + current_frame_info.stld_sz = 1; + fputc ('[', file); + addr = XEXP (x, 0); + switch (GET_CODE (addr)) + { + case POST_INC: + offset = GEN_INT (GET_MODE_SIZE (GET_MODE (x))); + addr = XEXP (addr, 0); + break; + case POST_DEC: + offset = GEN_INT (-GET_MODE_SIZE (GET_MODE (x))); + addr = XEXP (addr, 0); + break; + case POST_MODIFY: + offset = XEXP (XEXP (addr, 1), 1); + addr = XEXP (addr, 0); + break; + default: + offset = 0; + break; + } + output_address (addr); + fputc (']', file); + if (offset) + { + fputc (',', file); + if (CONST_INT_P (offset)) switch (GET_MODE_SIZE (GET_MODE (x))) + { + default: + gcc_unreachable (); + case 8: + offset = GEN_INT (INTVAL (offset) >> 3); + break; + case 4: + offset = GEN_INT (INTVAL (offset) >> 2); + break; + case 2: + offset = GEN_INT (INTVAL (offset) >> 1); + break; + case 1: + break; + } + output_address (offset); + } + break; + case CONST_DOUBLE : + /* We handle SFmode constants here as output_addr_const doesn't. */ + if (GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE d; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (d, x); + REAL_VALUE_TO_TARGET_SINGLE (d, l); + fprintf (file, "%s0x%08lx", IMMEDIATE_PREFIX, l); + break; + } + /* Fall through. Let output_addr_const deal with it. */ + case CONST_INT: + fprintf(file,"%s",IMMEDIATE_PREFIX); + if (code == 'C' || code == 'X') + { + fprintf (file, "%ld", + (long) (INTVAL (x) / current_frame_info.stld_sz)); + break; + } + /* Fall through */ + default : + output_addr_const (file, x); + break; + } +} + +/* Print a memory address as an operand to reference that memory location. */ + +static void +epiphany_print_operand_address (FILE *file, rtx addr) +{ + register rtx base, index = 0; + int offset = 0; + + switch (GET_CODE (addr)) + { + case REG : + fputs (reg_names[REGNO (addr)], file); + break; + case SYMBOL_REF : + if (/*???*/ 0 && SYMBOL_REF_FUNCTION_P (addr)) + { + output_addr_const (file, addr); + } + else + { + output_addr_const (file, addr); + } + break; + case PLUS : + if (GET_CODE (XEXP (addr, 0)) == CONST_INT) + offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); + else if (GET_CODE (XEXP (addr, 1)) == CONST_INT) + offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); + else + base = XEXP (addr, 0), index = XEXP (addr, 1); + gcc_assert (GET_CODE (base) == REG); + fputs (reg_names[REGNO (base)], file); + if (index == 0) + { + /* + ** ++rk quirky method to scale offset for ld/str....... + */ + fprintf (file, ",%s%d", IMMEDIATE_PREFIX, + offset/current_frame_info.stld_sz); + } + else + { + switch (GET_CODE (index)) + { + case REG: + fprintf (file, ",%s", reg_names[REGNO (index)]); + break; + case SYMBOL_REF: + fputc (',', file), output_addr_const (file, index); + break; + default: + gcc_unreachable (); + } + } + break; + case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: case POST_MODIFY: + /* We shouldn't get here as we've lost the mode of the memory object + (which says how much to inc/dec by. */ + gcc_unreachable (); + break; + default: + output_addr_const (file, addr); + break; + } +} + +void +epiphany_final_prescan_insn (rtx insn ATTRIBUTE_UNUSED, + rtx *opvec ATTRIBUTE_UNUSED, + int noperands ATTRIBUTE_UNUSED) +{ + int i = epiphany_n_nops; + rtx pat ATTRIBUTE_UNUSED; + + while (i--) + fputs ("\tnop\n", asm_out_file); +} + + +/* Worker function for TARGET_RETURN_IN_MEMORY. */ + +static bool +epiphany_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +{ + HOST_WIDE_INT size = int_size_in_bytes (type); + + if (AGGREGATE_TYPE_P (type) + && (TYPE_MODE (type) == BLKmode || TYPE_NEEDS_CONSTRUCTING (type))) + return true; + return (size == -1 || size > 8); +} + +/* For EPIPHANY, All aggregates and arguments greater than 8 bytes are + passed by reference. */ + +static bool +epiphany_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED, + enum machine_mode mode, const_tree type, + bool named ATTRIBUTE_UNUSED) +{ + if (type) + { + if (AGGREGATE_TYPE_P (type) + && (mode == BLKmode || TYPE_NEEDS_CONSTRUCTING (type))) + return true; + } + return false; +} + + +static rtx +epiphany_function_value (const_tree ret_type, + const_tree fn_decl_or_type ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + enum machine_mode mode; + + mode = TYPE_MODE (ret_type); + /* We must change the mode like PROMOTE_MODE does. + ??? PROMOTE_MODE is ignored for non-scalar types. + The set of types tested here has to be kept in sync + with the one in explow.c:promote_mode. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_MODE_SIZE (mode) < 4 + && (TREE_CODE (ret_type) == INTEGER_TYPE + || TREE_CODE (ret_type) == ENUMERAL_TYPE + || TREE_CODE (ret_type) == BOOLEAN_TYPE + || TREE_CODE (ret_type) == OFFSET_TYPE)) + mode = SImode; + return gen_rtx_REG (mode, 0); +} + +static rtx +epiphany_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, 0); +} + +static bool +epiphany_function_value_regno_p (const unsigned int regno ATTRIBUTE_UNUSED) +{ + return regno == 0; +} + +/* Fix up invalid option settings. */ +static void +epiphany_override_options (void) +{ + if (epiphany_stack_offset < 4) + error ("stack_offset must be at least 4"); + if (epiphany_stack_offset & 3) + error ("stack_offset must be a multiple of 4"); + epiphany_stack_offset = (epiphany_stack_offset + 3) & -4; + + /* This needs to be done at start up. It's convenient to do it here. */ + epiphany_init (); +} + +/* For a DImode load / store SET, make a SImode set for a + REG_FRAME_RELATED_EXPR note, using OFFSET to create a high or lowpart + subreg. */ +static rtx +frame_subreg_note (rtx set, int offset) +{ + rtx src = simplify_gen_subreg (SImode, SET_SRC (set), DImode, offset); + rtx dst = simplify_gen_subreg (SImode, SET_DEST (set), DImode, offset); + + set = gen_rtx_SET (VOIDmode, dst ,src); + RTX_FRAME_RELATED_P (set) = 1; + return set; +} + +static rtx +frame_insn (rtx x) +{ + int i; + rtx note = NULL_RTX; + + if (GET_CODE (x) == PARALLEL) + { + rtx part = XVECEXP (x, 0, 0); + + if (GET_MODE (SET_DEST (part)) == DImode) + { + note = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (XVECLEN (x, 0) + 1)); + XVECEXP (note, 0, 0) = frame_subreg_note (part, 0); + XVECEXP (note, 0, 1) = frame_subreg_note (part, UNITS_PER_WORD); + for (i = XVECLEN (x, 0) - 1; i >= 1; i--) + { + part = copy_rtx (XVECEXP (x, 0, i)); + + if (GET_CODE (part) == SET) + RTX_FRAME_RELATED_P (part) = 1; + XVECEXP (note, 0, i + 1) = part; + } + } + else + { + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) + { + part = XVECEXP (x, 0, i); + + if (GET_CODE (part) == SET) + RTX_FRAME_RELATED_P (part) = 1; + } + } + } + else if (GET_CODE (x) == SET && GET_MODE (SET_DEST (x)) == DImode) + note = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, frame_subreg_note (x, 0), + frame_subreg_note (x, UNITS_PER_WORD))); + x = emit_insn (x); + RTX_FRAME_RELATED_P (x) = 1; + if (note) + add_reg_note (x, REG_FRAME_RELATED_EXPR, note); + return x; +} + +static rtx +frame_move_insn (rtx to, rtx from) +{ + return frame_insn (gen_rtx_SET (VOIDmode, to, from)); +} + +/* Generate a MEM referring to a varargs argument slot. */ + +static rtx +gen_varargs_mem (enum machine_mode mode, rtx addr) +{ + rtx mem = gen_rtx_MEM (mode, addr); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, get_varargs_alias_set ()); + return mem; +} + +/* Emit instructions to save or restore registers in the range [MIN..LIMIT) . + If EPILOGUE_P is 0, save; if it is one, restore. + ADDR is the stack slot to save the first register to; subsequent + registers are written to lower addresses. + However, the order of register pairs can be reversed in order to + use double-word load-store instructions. Likewise, an unpaired single + word save slot can be skipped while double saves are carried out, and + reused when a single register is to be saved. */ + +static void +epiphany_emit_save_restore (int min, int limit, rtx addr, int epilogue_p) +{ + int i; + int stack_offset + = current_frame_info.first_slot >= 0 ? epiphany_stack_offset : 0; + rtx skipped_mem = NULL_RTX; + int last_saved = limit - 1; + + if (!optimize) + while (last_saved >= 0 + && !TEST_HARD_REG_BIT (current_frame_info.gmask, last_saved)) + last_saved--; + for (i = 0; i < limit; i++) + { + enum machine_mode mode = word_mode; + rtx mem, reg; + int n = i; + rtx (*gen_mem) (enum machine_mode, rtx) = gen_frame_mem; + + /* Make sure we push the arguments in the right order. */ + if (n < MAX_EPIPHANY_PARM_REGS && crtl->args.pretend_args_size) + { + n = MAX_EPIPHANY_PARM_REGS - 1 - n; + gen_mem = gen_varargs_mem; + } + if (stack_offset == current_frame_info.first_slot_size + && current_frame_info.first_slot >= 0) + { + if (current_frame_info.first_slot_size > UNITS_PER_WORD) + { + mode = DImode; + addr = plus_constant (Pmode, addr, + - (HOST_WIDE_INT) UNITS_PER_WORD); + } + if (i-- < min || !epilogue_p) + goto next_slot; + n = current_frame_info.first_slot; + gen_mem = gen_frame_mem; + } + else if (n == UNKNOWN_REGNUM + && stack_offset > current_frame_info.first_slot_size) + { + i--; + goto next_slot; + } + else if (!TEST_HARD_REG_BIT (current_frame_info.gmask, n)) + continue; + else if (i < min) + goto next_slot; + + /* Check for a register pair to save. */ + if (n == i + && (n >= MAX_EPIPHANY_PARM_REGS || crtl->args.pretend_args_size == 0) + && (n & 1) == 0 && n+1 < limit + && TEST_HARD_REG_BIT (current_frame_info.gmask, n+1)) + { + /* If it fits in the current stack slot pair, place it there. */ + if (GET_CODE (addr) == PLUS && (stack_offset & 7) == 0 + && stack_offset != 2 * UNITS_PER_WORD + && (current_frame_info.last_slot < 0 + || INTVAL (XEXP (addr, 1)) != UNITS_PER_WORD) + && (n+1 != last_saved || !skipped_mem)) + { + mode = DImode; + i++; + addr = plus_constant (Pmode, addr, + - (HOST_WIDE_INT) UNITS_PER_WORD); + } + /* If it fits in the following stack slot pair, that's fine, too. */ + else if (GET_CODE (addr) == PLUS && (stack_offset & 7) == 4 + && stack_offset != 2 * UNITS_PER_WORD + && stack_offset != 3 * UNITS_PER_WORD + && (current_frame_info.last_slot < 0 + || INTVAL (XEXP (addr, 1)) != 2 * UNITS_PER_WORD) + && n + 1 != last_saved) + { + gcc_assert (!skipped_mem); + stack_offset -= GET_MODE_SIZE (mode); + skipped_mem = gen_mem (mode, addr); + mode = DImode; + i++; + addr = plus_constant (Pmode, addr, + - (HOST_WIDE_INT) 2 * UNITS_PER_WORD); + } + } + reg = gen_rtx_REG (mode, n); + if (mode != DImode && skipped_mem) + mem = skipped_mem; + else + mem = gen_mem (mode, addr); + + /* If we are loading / storing LR, note the offset that + gen_reload_insi_ra requires. Since GPR_LR is even, + we only need to test n, even if mode is DImode. */ + gcc_assert ((GPR_LR & 1) == 0); + if (n == GPR_LR) + { + long lr_slot_offset = 0; + rtx m_addr = XEXP (mem, 0); + + if (GET_CODE (m_addr) == PLUS) + lr_slot_offset = INTVAL (XEXP (m_addr, 1)); + if (frame_pointer_needed) + lr_slot_offset += (current_frame_info.first_slot_offset + - current_frame_info.total_size); + if (MACHINE_FUNCTION (cfun)->lr_slot_known) + gcc_assert (MACHINE_FUNCTION (cfun)->lr_slot_offset + == lr_slot_offset); + MACHINE_FUNCTION (cfun)->lr_slot_offset = lr_slot_offset; + MACHINE_FUNCTION (cfun)->lr_slot_known = 1; + } + + if (!epilogue_p) + frame_move_insn (mem, reg); + else if (n >= MAX_EPIPHANY_PARM_REGS || !crtl->args.pretend_args_size) + emit_move_insn (reg, mem); + if (mem == skipped_mem) + { + skipped_mem = NULL_RTX; + continue; + } + next_slot: + addr = plus_constant (Pmode, addr, -(HOST_WIDE_INT) UNITS_PER_WORD); + stack_offset -= GET_MODE_SIZE (mode); + } +} + +void +epiphany_expand_prologue (void) +{ + int interrupt_p; + enum epiphany_function_type fn_type; + rtx addr, mem, off, reg; + + if (!current_frame_info.initialized) + epiphany_compute_frame_size (get_frame_size ()); + + /* It is debatable if we should adjust this by epiphany_stack_offset. */ + if (flag_stack_usage_info) + current_function_static_stack_size = current_frame_info.total_size; + + fn_type = epiphany_compute_function_type (current_function_decl); + interrupt_p = EPIPHANY_INTERRUPT_P (fn_type); + + if (interrupt_p) + { + addr = plus_constant (Pmode, stack_pointer_rtx, + - (HOST_WIDE_INT) 2 * UNITS_PER_WORD); + if (!lookup_attribute ("forwarder_section", + DECL_ATTRIBUTES (current_function_decl)) + || !epiphany_is_long_call_p (XEXP (DECL_RTL (current_function_decl), + 0))) + frame_move_insn (gen_frame_mem (DImode, addr), + gen_rtx_REG (DImode, GPR_0)); + frame_move_insn (gen_rtx_REG (SImode, GPR_0), + gen_rtx_REG (word_mode, STATUS_REGNUM)); + frame_move_insn (gen_rtx_REG (SImode, GPR_1), + gen_rtx_REG (word_mode, IRET_REGNUM)); + mem = gen_frame_mem (BLKmode, stack_pointer_rtx); + off = GEN_INT (-current_frame_info.first_slot_offset); + frame_insn (gen_stack_adjust_add (off, mem)); + if (!epiphany_uninterruptible_p (current_function_decl)) + emit_insn (gen_gie ()); + addr = plus_constant (Pmode, stack_pointer_rtx, + current_frame_info.first_slot_offset + - (HOST_WIDE_INT) 3 * UNITS_PER_WORD); + } + else + { + addr = plus_constant (Pmode, stack_pointer_rtx, + epiphany_stack_offset + - (HOST_WIDE_INT) UNITS_PER_WORD); + epiphany_emit_save_restore (0, current_frame_info.small_threshold, + addr, 0); + /* Allocate register save area; for small to medium size frames, + allocate the entire frame; this is joint with one register save. */ + if (current_frame_info.first_slot >= 0) + { + enum machine_mode mode + = (current_frame_info.first_slot_size == UNITS_PER_WORD + ? word_mode : DImode); + + off = GEN_INT (-current_frame_info.first_slot_offset); + mem = gen_frame_mem (BLKmode, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, off)); + frame_insn (gen_stack_adjust_str + (gen_frame_mem (mode, stack_pointer_rtx), + gen_rtx_REG (mode, current_frame_info.first_slot), + off, mem)); + addr = plus_constant (Pmode, addr, + current_frame_info.first_slot_offset); + } + } + epiphany_emit_save_restore (current_frame_info.small_threshold, + FIRST_PSEUDO_REGISTER, addr, 0); + if (current_frame_info.need_fp) + frame_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + /* For large frames, allocate bulk of frame. This is usually joint with one + register save. */ + if (current_frame_info.last_slot >= 0) + { + rtx ip, mem2, insn, note; + + gcc_assert (current_frame_info.last_slot != GPR_FP + || (!current_frame_info.need_fp + && current_frame_info.first_slot < 0)); + off = GEN_INT (-current_frame_info.last_slot_offset); + mem = gen_frame_mem (BLKmode, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, off)); + ip = gen_rtx_REG (Pmode, GPR_IP); + frame_move_insn (ip, off); + reg = gen_rtx_REG (word_mode, current_frame_info.last_slot), + mem2 = gen_frame_mem (word_mode, stack_pointer_rtx), + insn = frame_insn (gen_stack_adjust_str (mem2, reg, ip, mem)); + /* Instruction scheduling can separate the instruction setting IP from + INSN so that dwarf2out_frame_debug_expr becomes confused what the + temporary register is. Example: _gcov.o */ + note = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, off)); + note = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, gen_rtx_SET (VOIDmode, mem2, reg), + note)); + add_reg_note (insn, REG_FRAME_RELATED_EXPR, note); + } + /* If there is only one or no register to save, yet we have a large frame, + use an add. */ + else if (current_frame_info.last_slot_offset) + { + mem = gen_frame_mem (BLKmode, + plus_constant (Pmode, stack_pointer_rtx, + current_frame_info.last_slot_offset)); + off = GEN_INT (-current_frame_info.last_slot_offset); + if (!SIMM11 (INTVAL (off))) + { + reg = gen_rtx_REG (Pmode, GPR_IP); + frame_move_insn (reg, off); + off = reg; + } + frame_insn (gen_stack_adjust_add (off, mem)); + } +} + +void +epiphany_expand_epilogue (int sibcall_p) +{ + int interrupt_p; + enum epiphany_function_type fn_type; + rtx mem, addr, reg, off; + HOST_WIDE_INT restore_offset; + + fn_type = epiphany_compute_function_type( current_function_decl); + interrupt_p = EPIPHANY_INTERRUPT_P (fn_type); + + /* For variable frames, deallocate bulk of frame. */ + if (current_frame_info.need_fp) + { + mem = gen_frame_mem (BLKmode, stack_pointer_rtx); + emit_insn (gen_stack_adjust_mov (mem)); + } + /* Else for large static frames, deallocate bulk of frame. */ + else if (current_frame_info.last_slot_offset) + { + mem = gen_frame_mem (BLKmode, stack_pointer_rtx); + reg = gen_rtx_REG (Pmode, GPR_IP); + emit_move_insn (reg, GEN_INT (current_frame_info.last_slot_offset)); + emit_insn (gen_stack_adjust_add (reg, mem)); + } + restore_offset = (interrupt_p + ? - 3 * UNITS_PER_WORD + : epiphany_stack_offset - (HOST_WIDE_INT) UNITS_PER_WORD); + addr = plus_constant (Pmode, stack_pointer_rtx, + (current_frame_info.first_slot_offset + + restore_offset)); + epiphany_emit_save_restore (current_frame_info.small_threshold, + FIRST_PSEUDO_REGISTER, addr, 1); + + if (interrupt_p && !epiphany_uninterruptible_p (current_function_decl)) + emit_insn (gen_gid ()); + + off = GEN_INT (current_frame_info.first_slot_offset); + mem = gen_frame_mem (BLKmode, stack_pointer_rtx); + /* For large / variable size frames, deallocating the register save area is + joint with one register restore; for medium size frames, we use a + dummy post-increment load to dealloacte the whole frame. */ + if (!SIMM11 (INTVAL (off)) || current_frame_info.last_slot >= 0) + { + emit_insn (gen_stack_adjust_ldr + (gen_rtx_REG (word_mode, + (current_frame_info.last_slot >= 0 + ? current_frame_info.last_slot : GPR_IP)), + gen_frame_mem (word_mode, stack_pointer_rtx), + off, + mem)); + } + /* While for small frames, we deallocate the entire frame with one add. */ + else if (INTVAL (off)) + { + emit_insn (gen_stack_adjust_add (off, mem)); + } + if (interrupt_p) + { + emit_move_insn (gen_rtx_REG (word_mode, STATUS_REGNUM), + gen_rtx_REG (SImode, GPR_0)); + emit_move_insn (gen_rtx_REG (word_mode, IRET_REGNUM), + gen_rtx_REG (SImode, GPR_1)); + addr = plus_constant (Pmode, stack_pointer_rtx, + - (HOST_WIDE_INT) 2 * UNITS_PER_WORD); + emit_move_insn (gen_rtx_REG (DImode, GPR_0), + gen_frame_mem (DImode, addr)); + } + addr = plus_constant (Pmode, stack_pointer_rtx, + epiphany_stack_offset - (HOST_WIDE_INT) UNITS_PER_WORD); + epiphany_emit_save_restore (0, current_frame_info.small_threshold, addr, 1); + if (!sibcall_p) + { + if (interrupt_p) + emit_jump_insn (gen_return_internal_interrupt()); + else + emit_jump_insn (gen_return_i ()); + } +} + +int +epiphany_initial_elimination_offset (int from, int to) +{ + epiphany_compute_frame_size (get_frame_size ()); + if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return current_frame_info.total_size - current_frame_info.reg_size; + if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return current_frame_info.first_slot_offset - current_frame_info.reg_size; + if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) + return (current_frame_info.total_size + - ((current_frame_info.pretend_size + 4) & -8)); + if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) + return (current_frame_info.first_slot_offset + - ((current_frame_info.pretend_size + 4) & -8)); + gcc_unreachable (); +} + +bool +epiphany_regno_rename_ok (unsigned, unsigned dst) +{ + enum epiphany_function_type fn_type; + + fn_type = epiphany_compute_function_type (current_function_decl); + if (!EPIPHANY_INTERRUPT_P (fn_type)) + return true; + if (df_regs_ever_live_p (dst)) + return true; + return false; +} + +static int +epiphany_issue_rate (void) +{ + return 2; +} + +/* Function to update the integer COST + based on the relationship between INSN that is dependent on + DEP_INSN through the dependence LINK. The default is to make no + adjustment to COST. This can be used for example to specify to + the scheduler that an output- or anti-dependence does not incur + the same cost as a data-dependence. The return value should be + the new value for COST. */ +static int +epiphany_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) +{ + if (REG_NOTE_KIND (link) == 0) + { + rtx dep_set; + + if (recog_memoized (insn) < 0 + || recog_memoized (dep_insn) < 0) + return cost; + + dep_set = single_set (dep_insn); + + /* The latency that we specify in the scheduling description refers + to the actual output, not to an auto-increment register; for that, + the latency is one. */ + if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) + { + rtx set = single_set (insn); + + if (set + && !reg_overlap_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) + && (!MEM_P (SET_DEST (set)) + || !reg_overlap_mentioned_p (SET_DEST (dep_set), + XEXP (SET_DEST (set), 0)))) + cost = 1; + } + } + return cost; +} + +#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X) + +#define RTX_OK_FOR_BASE_P(X) \ + (REG_P (X) && REG_OK_FOR_BASE_P (X)) + +#define RTX_OK_FOR_INDEX_P(MODE, X) \ + ((GET_MODE_CLASS (MODE) != MODE_VECTOR_INT \ + || epiphany_vect_align >= GET_MODE_SIZE (MODE)) \ + && (REG_P (X) && REG_OK_FOR_INDEX_P (X))) + +#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X) \ +(GET_CODE (X) == PLUS \ + && RTX_OK_FOR_BASE_P (XEXP (X, 0)) \ + && (RTX_OK_FOR_INDEX_P (MODE, XEXP (X, 1)) \ + || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1)))) + +static bool +epiphany_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) +{ +#define REG_OK_FOR_BASE_P(X) \ + (strict ? GPR_P (REGNO (X)) : GPR_AP_OR_PSEUDO_P (REGNO (X))) + if (RTX_OK_FOR_BASE_P (x)) + return true; + if (RTX_FRAME_OFFSET_P (x)) + return true; + if (LEGITIMATE_OFFSET_ADDRESS_P (mode, x)) + return true; + /* If this is a misaligned stack access, don't force it to reg+index. */ + if (GET_MODE_SIZE (mode) == 8 + && GET_CODE (x) == PLUS && XEXP (x, 0) == stack_pointer_rtx + /* Decomposed to SImode; GET_MODE_SIZE (SImode) == 4 */ + && !(INTVAL (XEXP (x, 1)) & 3) + && INTVAL (XEXP (x, 1)) >= -2047 * 4 + && INTVAL (XEXP (x, 1)) <= 2046 * 4) + return true; + if (TARGET_POST_INC + && (GET_CODE (x) == POST_DEC || GET_CODE (x) == POST_INC) + && RTX_OK_FOR_BASE_P (XEXP ((x), 0))) + return true; + if ((TARGET_POST_MODIFY || reload_completed) + && GET_CODE (x) == POST_MODIFY + && GET_CODE (XEXP ((x), 1)) == PLUS + && rtx_equal_p (XEXP ((x), 0), XEXP (XEXP ((x), 1), 0)) + && LEGITIMATE_OFFSET_ADDRESS_P (mode, XEXP ((x), 1))) + return true; + if (mode == BLKmode) + return true; + return false; +} + +static reg_class_t +epiphany_secondary_reload (bool in_p, rtx x, reg_class_t rclass, + enum machine_mode mode ATTRIBUTE_UNUSED, + secondary_reload_info *sri) +{ + /* This could give more reload inheritance, but we are missing some + reload infrastructure. */ + if (0) + if (in_p && GET_CODE (x) == UNSPEC + && satisfies_constraint_Sra (x) && !satisfies_constraint_Rra (x)) + { + gcc_assert (rclass == GENERAL_REGS); + sri->icode = CODE_FOR_reload_insi_ra; + return NO_REGS; + } + return NO_REGS; +} + +bool +epiphany_is_long_call_p (rtx x) +{ + tree decl = SYMBOL_REF_DECL (x); + bool ret_val = !TARGET_SHORT_CALLS; + tree attrs; + + /* ??? Is it safe to default to ret_val if decl is NULL? We should + probably encode information via encode_section_info, and also + have (an) option(s) to take SYMBOL_FLAG_LOCAL and/or SYMBOL_FLAG_EXTERNAL + into account. */ + if (decl) + { + attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + if (lookup_attribute ("long_call", attrs)) + ret_val = true; + else if (lookup_attribute ("short_call", attrs)) + ret_val = false; + } + return ret_val; +} + +bool +epiphany_small16 (rtx x) +{ + rtx base = x; + rtx offs ATTRIBUTE_UNUSED = const0_rtx; + + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + { + base = XEXP (XEXP (x, 0), 0); + offs = XEXP (XEXP (x, 0), 1); + } + if (GET_CODE (base) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (base) + && epiphany_is_long_call_p (base)) + return false; + return TARGET_SMALL16 != 0; +} + +/* Return nonzero if it is ok to make a tail-call to DECL. */ +static bool +epiphany_function_ok_for_sibcall (tree decl, tree exp) +{ + bool cfun_interrupt_p, call_interrupt_p; + + cfun_interrupt_p = EPIPHANY_INTERRUPT_P (epiphany_compute_function_type + (current_function_decl)); + if (decl) + call_interrupt_p = EPIPHANY_INTERRUPT_P (epiphany_compute_function_type (decl)); + else + { + tree fn_type = TREE_TYPE (CALL_EXPR_FN (exp)); + + gcc_assert (POINTER_TYPE_P (fn_type)); + fn_type = TREE_TYPE (fn_type); + gcc_assert (TREE_CODE (fn_type) == FUNCTION_TYPE + || TREE_CODE (fn_type) == METHOD_TYPE); + call_interrupt_p + = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (fn_type)) != NULL; + } + + /* Don't tailcall from or to an ISR routine - although we could in + principle tailcall from one ISR routine to another, we'd need to + handle this in sibcall_epilogue to make it work. */ + if (cfun_interrupt_p || call_interrupt_p) + return false; + + /* Everything else is ok. */ + return true; +} + +/* T is a function declaration or the MEM_EXPR of a MEM passed to a call + expander. + Return true iff the type of T has the uninterruptible attribute. + If T is NULL, return false. */ +bool +epiphany_uninterruptible_p (tree t) +{ + tree attrs; + + if (t) + { + attrs = TYPE_ATTRIBUTES (TREE_TYPE (t)); + if (lookup_attribute ("disinterrupt", attrs)) + return true; + } + return false; +} + +bool +epiphany_call_uninterruptible_p (rtx mem) +{ + rtx addr = XEXP (mem, 0); + tree t = NULL_TREE; + + if (GET_CODE (addr) == SYMBOL_REF) + t = SYMBOL_REF_DECL (addr); + if (!t) + t = MEM_EXPR (mem); + return epiphany_uninterruptible_p (t); +} + +static enum machine_mode +epiphany_promote_function_mode (const_tree type, enum machine_mode mode, + int *punsignedp ATTRIBUTE_UNUSED, + const_tree funtype ATTRIBUTE_UNUSED, + int for_return ATTRIBUTE_UNUSED) +{ + int dummy; + + return promote_mode (type, mode, &dummy); +} + +static void +epiphany_conditional_register_usage (void) +{ + int i; + + if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) + { + fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; + } + if (TARGET_HALF_REG_FILE) + { + for (i = 32; i <= 63; i++) + { + fixed_regs[i] = 1; + call_used_regs[i] = 1; + } + } + if (epiphany_m1reg >= 0) + { + fixed_regs[epiphany_m1reg] = 1; + call_used_regs[epiphany_m1reg] = 1; + } + if (!TARGET_PREFER_SHORT_INSN_REGS) + CLEAR_HARD_REG_SET (reg_class_contents[SHORT_INSN_REGS]); + COPY_HARD_REG_SET (reg_class_contents[SIBCALL_REGS], + reg_class_contents[GENERAL_REGS]); + /* It would be simpler and quicker if we could just use + AND_COMPL_HARD_REG_SET, alas, call_used_reg_set is yet uninitialized; + it is set up later by our caller. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (!call_used_regs[i]) + CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], i); +} + +/* Determine where to put an argument to a function. + Value is zero to push the argument on the stack, + or a hard register in which to store the argument. + + MODE is the argument's machine mode. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + CUM is a variable of type CUMULATIVE_ARGS which gives info about + the preceding args and about the function being called. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). */ +/* On the EPIPHANY the first MAX_EPIPHANY_PARM_REGS args are normally in + registers and the rest are pushed. */ +static rtx +epiphany_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v); + + if (PASS_IN_REG_P (cum, mode, type)) + return gen_rtx_REG (mode, ROUND_ADVANCE_CUM (cum, mode, type)); + return 0; +} + +/* Update the data in CUM to advance over an argument + of mode MODE and data type TYPE. + (TYPE is null for libcalls where that information may not be available.) */ +static void +epiphany_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + *cum = ROUND_ADVANCE_CUM (*cum, mode, type) + ROUND_ADVANCE_ARG (mode, type); +} + +/* Nested function support. + An epiphany trampoline looks like this: + mov r16,%low(fnaddr) + movt r16,%high(fnaddr) + mov ip,%low(cxt) + movt ip,%high(cxt) + jr r16 */ + +#define EPIPHANY_LOW_RTX(X) \ + (gen_rtx_IOR (SImode, \ + gen_rtx_ASHIFT (SImode, \ + gen_rtx_AND (SImode, (X), GEN_INT (0xff)), GEN_INT (5)), \ + gen_rtx_ASHIFT (SImode, \ + gen_rtx_AND (SImode, (X), GEN_INT (0xff00)), GEN_INT (12)))) +#define EPIPHANY_HIGH_RTX(X) \ + EPIPHANY_LOW_RTX (gen_rtx_LSHIFTRT (SImode, (X), GEN_INT (16))) + +/* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ +static void +epiphany_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0)); + + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 0)), + gen_rtx_IOR (SImode, GEN_INT (0x4002000b), + EPIPHANY_LOW_RTX (fnaddr))); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 4)), + gen_rtx_IOR (SImode, GEN_INT (0x5002000b), + EPIPHANY_HIGH_RTX (fnaddr))); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 8)), + gen_rtx_IOR (SImode, GEN_INT (0x2002800b), + EPIPHANY_LOW_RTX (cxt))); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 12)), + gen_rtx_IOR (SImode, GEN_INT (0x3002800b), + EPIPHANY_HIGH_RTX (cxt))); + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (Pmode, tramp, 16)), + GEN_INT (0x0802014f)); +} + +bool +epiphany_optimize_mode_switching (int entity) +{ + if (MACHINE_FUNCTION (cfun)->sw_entities_processed & (1 << entity)) + return false; + switch (entity) + { + case EPIPHANY_MSW_ENTITY_AND: + case EPIPHANY_MSW_ENTITY_OR: + case EPIPHANY_MSW_ENTITY_CONFIG: + return true; + case EPIPHANY_MSW_ENTITY_NEAREST: + case EPIPHANY_MSW_ENTITY_TRUNC: + return optimize > 0; + case EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN: + return MACHINE_FUNCTION (cfun)->unknown_mode_uses != 0; + case EPIPHANY_MSW_ENTITY_ROUND_KNOWN: + return (MACHINE_FUNCTION (cfun)->sw_entities_processed + & (1 << EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN)) != 0; + case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS: + return optimize == 0 || current_pass == pass_mode_switch_use; + } + gcc_unreachable (); +} + +int +epiphany_mode_priority_to_mode (int entity, unsigned priority) +{ + if (entity == EPIPHANY_MSW_ENTITY_AND || entity == EPIPHANY_MSW_ENTITY_OR + || entity== EPIPHANY_MSW_ENTITY_CONFIG) + return priority; + if (priority > 3) + switch (priority) + { + case 4: return FP_MODE_ROUND_UNKNOWN; + case 5: return FP_MODE_NONE; + default: gcc_unreachable (); + } + switch ((enum attr_fp_mode) epiphany_normal_fp_mode) + { + case FP_MODE_INT: + switch (priority) + { + case 0: return FP_MODE_INT; + case 1: return epiphany_normal_fp_rounding; + case 2: return (epiphany_normal_fp_rounding == FP_MODE_ROUND_NEAREST + ? FP_MODE_ROUND_TRUNC : FP_MODE_ROUND_NEAREST); + case 3: return FP_MODE_CALLER; + } + case FP_MODE_ROUND_NEAREST: + case FP_MODE_CALLER: + switch (priority) + { + case 0: return FP_MODE_ROUND_NEAREST; + case 1: return FP_MODE_ROUND_TRUNC; + case 2: return FP_MODE_INT; + case 3: return FP_MODE_CALLER; + } + case FP_MODE_ROUND_TRUNC: + switch (priority) + { + case 0: return FP_MODE_ROUND_TRUNC; + case 1: return FP_MODE_ROUND_NEAREST; + case 2: return FP_MODE_INT; + case 3: return FP_MODE_CALLER; + } + case FP_MODE_ROUND_UNKNOWN: + case FP_MODE_NONE: + gcc_unreachable (); + } + gcc_unreachable (); +} + +int +epiphany_mode_needed (int entity, rtx insn) +{ + enum attr_fp_mode mode; + + if (recog_memoized (insn) < 0) + { + if (entity == EPIPHANY_MSW_ENTITY_AND + || entity == EPIPHANY_MSW_ENTITY_OR + || entity == EPIPHANY_MSW_ENTITY_CONFIG) + return 2; + return FP_MODE_NONE; + } + mode = get_attr_fp_mode (insn); + + switch (entity) + { + case EPIPHANY_MSW_ENTITY_AND: + return mode != FP_MODE_NONE && mode != FP_MODE_INT ? 1 : 2; + case EPIPHANY_MSW_ENTITY_OR: + return mode == FP_MODE_INT ? 1 : 2; + case EPIPHANY_MSW_ENTITY_CONFIG: + /* We must know/save config before we set it to something else. + Where we need the original value, we are fine with having it + just unchanged from the function start. + Because of the nature of the mode switching optimization, + a restore will be dominated by a clobber. */ + if (mode != FP_MODE_NONE && mode != FP_MODE_CALLER) + return 1; + /* A cpecial case are abnormal edges, which are deemed to clobber + the mode as well. We need to pin this effect on a actually + dominating insn, and one where the frame can be accessed, too, in + case the pseudo used to save CONFIG doesn't get a hard register. */ + if (CALL_P (insn) && find_reg_note (insn, REG_EH_REGION, NULL_RTX)) + return 1; + return 2; + case EPIPHANY_MSW_ENTITY_ROUND_KNOWN: + if (recog_memoized (insn) == CODE_FOR_set_fp_mode) + mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn); + /* Fall through. */ + case EPIPHANY_MSW_ENTITY_NEAREST: + case EPIPHANY_MSW_ENTITY_TRUNC: + if (mode == FP_MODE_ROUND_UNKNOWN) + { + MACHINE_FUNCTION (cfun)->unknown_mode_uses++; + return FP_MODE_NONE; + } + return mode; + case EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN: + if (mode == FP_MODE_ROUND_NEAREST || mode == FP_MODE_ROUND_TRUNC) + return FP_MODE_ROUND_UNKNOWN; + return mode; + case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS: + if (mode == FP_MODE_ROUND_UNKNOWN) + return epiphany_normal_fp_rounding; + return mode; + default: + gcc_unreachable (); + } +} + +int +epiphany_mode_entry_exit (int entity, bool exit) +{ + int normal_mode = epiphany_normal_fp_mode ; + + MACHINE_FUNCTION (cfun)->sw_entities_processed |= (1 << entity); + if (epiphany_is_interrupt_p (current_function_decl)) + normal_mode = FP_MODE_CALLER; + switch (entity) + { + case EPIPHANY_MSW_ENTITY_AND: + if (exit) + return normal_mode != FP_MODE_INT ? 1 : 2; + return 0; + case EPIPHANY_MSW_ENTITY_OR: + if (exit) + return normal_mode == FP_MODE_INT ? 1 : 2; + return 0; + case EPIPHANY_MSW_ENTITY_CONFIG: + if (exit) + return 2; + return normal_mode == FP_MODE_CALLER ? 0 : 1; + case EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN: + if (normal_mode == FP_MODE_ROUND_NEAREST + || normal_mode == FP_MODE_ROUND_TRUNC) + return FP_MODE_ROUND_UNKNOWN; + /* Fall through. */ + case EPIPHANY_MSW_ENTITY_NEAREST: + case EPIPHANY_MSW_ENTITY_TRUNC: + case EPIPHANY_MSW_ENTITY_ROUND_KNOWN: + case EPIPHANY_MSW_ENTITY_FPU_OMNIBUS: + return normal_mode; + default: + gcc_unreachable (); + } +} + +int +epiphany_mode_after (int entity, int last_mode, rtx insn) +{ + /* We have too few call-saved registers to hope to keep the masks across + calls. */ + if (entity == EPIPHANY_MSW_ENTITY_AND || entity == EPIPHANY_MSW_ENTITY_OR) + { + if (CALL_P (insn)) + return 0; + return last_mode; + } + /* If there is an abnormal edge, we don't want the config register to + be 'saved' again at the destination. + The frame pointer adjustment is inside a PARALLEL because of the + flags clobber. */ + if (entity == EPIPHANY_MSW_ENTITY_CONFIG && NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) == PARALLEL + && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == SET + && SET_DEST (XVECEXP (PATTERN (insn), 0, 0)) == frame_pointer_rtx) + { + gcc_assert (cfun->has_nonlocal_label); + return 1; + } + if (recog_memoized (insn) < 0) + return last_mode; + if (get_attr_fp_mode (insn) == FP_MODE_ROUND_UNKNOWN + && last_mode != FP_MODE_ROUND_NEAREST && last_mode != FP_MODE_ROUND_TRUNC) + { + if (entity == EPIPHANY_MSW_ENTITY_NEAREST) + return FP_MODE_ROUND_NEAREST; + if (entity == EPIPHANY_MSW_ENTITY_TRUNC) + return FP_MODE_ROUND_TRUNC; + } + if (recog_memoized (insn) == CODE_FOR_set_fp_mode) + { + rtx src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + int fp_mode; + + if (REG_P (src)) + return FP_MODE_CALLER; + fp_mode = INTVAL (XVECEXP (XEXP (src, 0), 0, 0)); + if (entity == EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN + && (fp_mode == FP_MODE_ROUND_NEAREST + || fp_mode == EPIPHANY_MSW_ENTITY_TRUNC)) + return FP_MODE_ROUND_UNKNOWN; + return fp_mode; + } + return last_mode; +} + +void +emit_set_fp_mode (int entity, int mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED) +{ + rtx save_cc, cc_reg, mask, src, src2; + enum attr_fp_mode fp_mode; + + if (!MACHINE_FUNCTION (cfun)->and_mask) + { + MACHINE_FUNCTION (cfun)->and_mask = gen_reg_rtx (SImode); + MACHINE_FUNCTION (cfun)->or_mask = gen_reg_rtx (SImode); + } + if (entity == EPIPHANY_MSW_ENTITY_AND) + { + gcc_assert (mode >= 0 && mode <= 2); + if (mode == 1) + emit_move_insn (MACHINE_FUNCTION (cfun)->and_mask, + gen_int_mode (0xfff1fffe, SImode)); + return; + } + else if (entity == EPIPHANY_MSW_ENTITY_OR) + { + gcc_assert (mode >= 0 && mode <= 2); + if (mode == 1) + emit_move_insn (MACHINE_FUNCTION (cfun)->or_mask, GEN_INT(0x00080000)); + return; + } + else if (entity == EPIPHANY_MSW_ENTITY_CONFIG) + { + /* Mode switching optimization is done after emit_initial_value_sets, + so we have to take care of CONFIG_REGNUM here. */ + gcc_assert (mode >= 0 && mode <= 2); + rtx save = get_hard_reg_initial_val (SImode, CONFIG_REGNUM); + if (mode == 1) + emit_insn (gen_save_config (save)); + return; + } + fp_mode = (enum attr_fp_mode) mode; + src = NULL_RTX; + + switch (fp_mode) + { + case FP_MODE_CALLER: + /* The EPIPHANY_MSW_ENTITY_CONFIG processing must come later + so that the config save gets inserted before the first use. */ + gcc_assert (entity > EPIPHANY_MSW_ENTITY_CONFIG); + src = get_hard_reg_initial_val (SImode, CONFIG_REGNUM); + mask = MACHINE_FUNCTION (cfun)->and_mask; + break; + case FP_MODE_ROUND_UNKNOWN: + MACHINE_FUNCTION (cfun)->unknown_mode_sets++; + mask = MACHINE_FUNCTION (cfun)->and_mask; + break; + case FP_MODE_ROUND_NEAREST: + if (entity == EPIPHANY_MSW_ENTITY_TRUNC) + return; + mask = MACHINE_FUNCTION (cfun)->and_mask; + break; + case FP_MODE_ROUND_TRUNC: + if (entity == EPIPHANY_MSW_ENTITY_NEAREST) + return; + mask = MACHINE_FUNCTION (cfun)->and_mask; + break; + case FP_MODE_INT: + mask = MACHINE_FUNCTION (cfun)->or_mask; + break; + case FP_MODE_NONE: + default: + gcc_unreachable (); + } + save_cc = gen_reg_rtx (CCmode); + cc_reg = gen_rtx_REG (CCmode, CC_REGNUM); + emit_move_insn (save_cc, cc_reg); + mask = force_reg (SImode, mask); + if (!src) + { + rtvec v = gen_rtvec (1, GEN_INT (fp_mode)); + + src = gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, v, UNSPEC_FP_MODE)); + } + if (entity == EPIPHANY_MSW_ENTITY_ROUND_KNOWN + || entity == EPIPHANY_MSW_ENTITY_FPU_OMNIBUS) + src2 = copy_rtx (src); + else + { + rtvec v = gen_rtvec (1, GEN_INT (FP_MODE_ROUND_UNKNOWN)); + + src2 = gen_rtx_CONST (SImode, gen_rtx_UNSPEC (SImode, v, UNSPEC_FP_MODE)); + } + emit_insn (gen_set_fp_mode (src, src2, mask)); + emit_move_insn (cc_reg, save_cc); +} + +void +epiphany_expand_set_fp_mode (rtx *operands) +{ + rtx ctrl = gen_rtx_REG (SImode, CONFIG_REGNUM); + rtx src = operands[0]; + rtx mask_reg = operands[2]; + rtx scratch = operands[3]; + enum attr_fp_mode fp_mode; + + + gcc_assert (rtx_equal_p (src, operands[1]) + /* Sometimes reload gets silly and reloads the same pseudo + into different registers. */ + || (REG_P (src) && REG_P (operands[1]))); + + if (!epiphany_uninterruptible_p (current_function_decl)) + emit_insn (gen_gid ()); + emit_move_insn (scratch, ctrl); + + if (GET_CODE (src) == REG) + { + /* FP_MODE_CALLER */ + emit_insn (gen_xorsi3 (scratch, scratch, src)); + emit_insn (gen_andsi3 (scratch, scratch, mask_reg)); + emit_insn (gen_xorsi3 (scratch, scratch, src)); + } + else + { + gcc_assert (GET_CODE (src) == CONST); + src = XEXP (src, 0); + fp_mode = (enum attr_fp_mode) INTVAL (XVECEXP (src, 0, 0)); + switch (fp_mode) + { + case FP_MODE_ROUND_NEAREST: + emit_insn (gen_andsi3 (scratch, scratch, mask_reg)); + break; + case FP_MODE_ROUND_TRUNC: + emit_insn (gen_andsi3 (scratch, scratch, mask_reg)); + emit_insn (gen_add2_insn (scratch, const1_rtx)); + break; + case FP_MODE_INT: + emit_insn (gen_iorsi3 (scratch, scratch, mask_reg)); + break; + case FP_MODE_CALLER: + case FP_MODE_ROUND_UNKNOWN: + case FP_MODE_NONE: + gcc_unreachable (); + } + } + emit_move_insn (ctrl, scratch); + if (!epiphany_uninterruptible_p (current_function_decl)) + emit_insn (gen_gie ()); +} + +void +epiphany_insert_mode_switch_use (rtx insn, + int entity ATTRIBUTE_UNUSED, + int mode ATTRIBUTE_UNUSED) +{ + rtx pat = PATTERN (insn); + rtvec v; + int len, i; + rtx near = gen_rtx_REG (SImode, FP_NEAREST_REGNUM); + rtx trunc = gen_rtx_REG (SImode, FP_TRUNCATE_REGNUM); + + if (entity != EPIPHANY_MSW_ENTITY_FPU_OMNIBUS) + return; + switch ((enum attr_fp_mode) get_attr_fp_mode (insn)) + { + case FP_MODE_ROUND_NEAREST: + near = gen_rtx_USE (VOIDmode, near); + trunc = gen_rtx_CLOBBER (VOIDmode, trunc); + break; + case FP_MODE_ROUND_TRUNC: + near = gen_rtx_CLOBBER (VOIDmode, near); + trunc = gen_rtx_USE (VOIDmode, trunc); + break; + case FP_MODE_ROUND_UNKNOWN: + near = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FP_ANYFP_REGNUM)); + trunc = copy_rtx (near); + /* Fall through. */ + case FP_MODE_INT: + case FP_MODE_CALLER: + near = gen_rtx_USE (VOIDmode, near); + trunc = gen_rtx_USE (VOIDmode, trunc); + break; + case FP_MODE_NONE: + gcc_unreachable (); + } + gcc_assert (GET_CODE (pat) == PARALLEL); + len = XVECLEN (pat, 0); + v = rtvec_alloc (len + 2); + for (i = 0; i < len; i++) + RTVEC_ELT (v, i) = XVECEXP (pat, 0, i); + RTVEC_ELT (v, len) = near; + RTVEC_ELT (v, len + 1) = trunc; + pat = gen_rtx_PARALLEL (VOIDmode, v); + PATTERN (insn) = pat; + MACHINE_FUNCTION (cfun)->control_use_inserted = true; +} + +bool +epiphany_epilogue_uses (int regno) +{ + if (regno == GPR_LR) + return true; + if (reload_completed && epiphany_is_interrupt_p (current_function_decl)) + { + if (fixed_regs[regno] + && regno != STATUS_REGNUM && regno != IRET_REGNUM + && regno != FP_NEAREST_REGNUM && regno != FP_TRUNCATE_REGNUM) + return false; + return true; + } + if (regno == FP_NEAREST_REGNUM + && epiphany_normal_fp_mode != FP_MODE_ROUND_TRUNC) + return true; + if (regno == FP_TRUNCATE_REGNUM + && epiphany_normal_fp_mode != FP_MODE_ROUND_NEAREST) + return true; + return false; +} + +static unsigned int +epiphany_min_divisions_for_recip_mul (enum machine_mode mode) +{ + if (flag_reciprocal_math && mode == SFmode) + /* We'll expand into a multiply-by-reciprocal anyway, so we might a well do + it already at the tree level and expose it to further optimizations. */ + return 1; + return default_min_divisions_for_recip_mul (mode); +} + +static enum machine_mode +epiphany_preferred_simd_mode (enum machine_mode mode ATTRIBUTE_UNUSED) +{ + return TARGET_VECT_DOUBLE ? DImode : SImode; +} + +static bool +epiphany_vector_mode_supported_p (enum machine_mode mode) +{ + if (mode == V2SFmode) + return true; + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)) + return true; + return false; +} + +static bool +epiphany_vector_alignment_reachable (const_tree type, bool is_packed) +{ + /* Vectors which aren't in packed structures will not be less aligned than + the natural alignment of their element type, so this is safe. */ + if (TYPE_ALIGN_UNIT (type) == 4) + return !is_packed; + + return default_builtin_vector_alignment_reachable (type, is_packed); +} + +static bool +epiphany_support_vector_misalignment (enum machine_mode mode, const_tree type, + int misalignment, bool is_packed) +{ + if (GET_MODE_SIZE (mode) == 8 && misalignment % 4 == 0) + return true; + return default_builtin_support_vector_misalignment (mode, type, misalignment, + is_packed); +} + +/* STRUCTURE_SIZE_BOUNDARY seems a bit crude in how it enlarges small + structs. Make structs double-word-aligned it they are a double word or + (potentially) larger; failing that, do the same for a size of 32 bits. */ +unsigned +epiphany_special_round_type_align (tree type, unsigned computed, + unsigned specified) +{ + unsigned align = MAX (computed, specified); + tree field; + HOST_WIDE_INT total, max; + unsigned try_align = FASTEST_ALIGNMENT; + + if (maximum_field_alignment && try_align > maximum_field_alignment) + try_align = maximum_field_alignment; + if (align >= try_align) + return align; + for (max = 0, field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + tree offset, size; + + if (TREE_CODE (field) != FIELD_DECL + || TREE_TYPE (field) == error_mark_node) + continue; + offset = bit_position (field); + size = DECL_SIZE (field); + if (!tree_fits_uhwi_p (offset) || !tree_fits_uhwi_p (size) + || tree_to_uhwi (offset) >= try_align + || tree_to_uhwi (size) >= try_align) + return try_align; + total = tree_to_uhwi (offset) + tree_to_uhwi (size); + if (total > max) + max = total; + } + if (max >= (HOST_WIDE_INT) try_align) + align = try_align; + else if (try_align > 32 && max >= 32) + align = max > 32 ? 64 : 32; + return align; +} + +/* Upping the alignment of arrays in structs is not only a performance + enhancement, it also helps preserve assumptions about how + arrays-at-the-end-of-structs work, like for struct gcov_fn_info in + libgcov.c . */ +unsigned +epiphany_adjust_field_align (tree field, unsigned computed) +{ + if (computed == 32 + && TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE) + { + tree elmsz = TYPE_SIZE (TREE_TYPE (TREE_TYPE (field))); + + if (!tree_fits_uhwi_p (elmsz) || tree_to_uhwi (elmsz) >= 32) + return 64; + } + return computed; +} + +/* Output code to add DELTA to the first argument, and then jump + to FUNCTION. Used for C++ multiple inheritance. */ +static void +epiphany_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, + HOST_WIDE_INT vcall_offset, + tree function) +{ + int this_regno + = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0; + const char *this_name = reg_names[this_regno]; + const char *fname; + + /* We use IP and R16 as a scratch registers. */ + gcc_assert (call_used_regs [GPR_IP]); + gcc_assert (call_used_regs [GPR_16]); + + /* Add DELTA. When possible use a plain add, otherwise load it into + a register first. */ + if (delta == 0) + ; /* Done. */ + else if (SIMM11 (delta)) + asm_fprintf (file, "\tadd\t%s,%s,%d\n", this_name, this_name, (int) delta); + else if (delta < 0 && delta >= -0xffff) + { + asm_fprintf (file, "\tmov\tip,%d\n", (int) -delta); + asm_fprintf (file, "\tsub\t%s,%s,ip\n", this_name, this_name); + } + else + { + asm_fprintf (file, "\tmov\tip,%%low(%ld)\n", (long) delta); + if (delta & ~0xffff) + asm_fprintf (file, "\tmovt\tip,%%high(%ld)\n", (long) delta); + asm_fprintf (file, "\tadd\t%s,%s,ip\n", this_name, this_name); + } + + /* If needed, add *(*THIS + VCALL_OFFSET) to THIS. */ + if (vcall_offset != 0) + { + /* ldr ip,[this] --> temp = *this + ldr ip,[ip,vcall_offset] > temp = *(*this + vcall_offset) + add this,this,ip --> this+ = *(*this + vcall_offset) */ + asm_fprintf (file, "\tldr\tip, [%s]\n", this_name); + if (vcall_offset < -0x7ff * 4 || vcall_offset > 0x7ff * 4 + || (vcall_offset & 3) != 0) + { + asm_fprintf (file, "\tmov\tr16, %%low(%ld)\n", (long) vcall_offset); + asm_fprintf (file, "\tmovt\tr16, %%high(%ld)\n", (long) vcall_offset); + asm_fprintf (file, "\tldr\tip, [ip,r16]\n"); + } + else + asm_fprintf (file, "\tldr\tip, [ip,%d]\n", (int) vcall_offset / 4); + asm_fprintf (file, "\tadd\t%s, %s, ip\n", this_name, this_name); + } + + fname = XSTR (XEXP (DECL_RTL (function), 0), 0); + if (epiphany_is_long_call_p (XEXP (DECL_RTL (function), 0))) + { + fputs ("\tmov\tip,%low(", file); + assemble_name (file, fname); + fputs (")\n\tmovt\tip,%high(", file); + assemble_name (file, fname); + fputs (")\n\tjr ip\n", file); + } + else + { + fputs ("\tb\t", file); + assemble_name (file, fname); + fputc ('\n', file); + } +} + +void +epiphany_start_function (FILE *file, const char *name, tree decl) +{ + /* If the function doesn't fit into the on-chip memory, it will have a + section attribute - or lack of it - that denotes it goes somewhere else. + But the architecture spec says that an interrupt vector still has to + point to on-chip memory. So we must place a jump there to get to the + actual function implementation. The forwarder_section attribute + specifies the section where this jump goes. + This mechanism can also be useful to have a shortcall destination for + a function that is actually placed much farther away. */ + tree attrs, int_attr, int_names, int_name, forwarder_attr; + + attrs = DECL_ATTRIBUTES (decl); + int_attr = lookup_attribute ("interrupt", attrs); + if (int_attr) + for (int_names = TREE_VALUE (int_attr); int_names; + int_names = TREE_CHAIN (int_names)) + { + char buf[99]; + + int_name = TREE_VALUE (int_names); + sprintf (buf, "ivt_entry_%.80s", TREE_STRING_POINTER (int_name)); + switch_to_section (get_section (buf, SECTION_CODE, decl)); + fputs ("\tb\t", file); + assemble_name (file, name); + fputc ('\n', file); + } + forwarder_attr = lookup_attribute ("forwarder_section", attrs); + if (forwarder_attr) + { + const char *prefix = "__forwarder_dst_"; + char *dst_name = (char *) alloca (strlen (prefix) + strlen (name) + 1); + + strcpy (dst_name, prefix); + strcat (dst_name, name); + forwarder_attr = TREE_VALUE (TREE_VALUE (forwarder_attr)); + switch_to_section (get_section (TREE_STRING_POINTER (forwarder_attr), + SECTION_CODE, decl)); + ASM_OUTPUT_FUNCTION_LABEL (file, name, decl); + if (epiphany_is_long_call_p (XEXP (DECL_RTL (decl), 0))) + { + int tmp = GPR_0; + + if (int_attr) + fputs ("\tstrd r0,[sp,-1]\n", file); + else + tmp = GPR_16; + gcc_assert (call_used_regs[tmp]); + fprintf (file, "\tmov r%d,%%low(", tmp); + assemble_name (file, dst_name); + fprintf (file, ")\n" + "\tmovt r%d,%%high(", tmp); + assemble_name (file, dst_name); + fprintf (file, ")\n" + "\tjr r%d\n", tmp); + } + else + { + fputs ("\tb\t", file); + assemble_name (file, dst_name); + fputc ('\n', file); + } + name = dst_name; + } + switch_to_section (function_section (decl)); + ASM_OUTPUT_FUNCTION_LABEL (file, name, decl); +} + +struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.h b/gcc-4.9/gcc/config/epiphany/epiphany.h new file mode 100644 index 000000000..cffb00c03 --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany.h @@ -0,0 +1,945 @@ +/* Definitions of target machine for GNU compiler, Argonaut EPIPHANY cpu. + Copyright (C) 1994-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_EPIPHANY_H +#define GCC_EPIPHANY_H + +#undef LINK_SPEC +#undef STARTFILE_SPEC +#undef ENDFILE_SPEC +#undef SIZE_TYPE +#undef PTRDIFF_TYPE +#undef WCHAR_TYPE +#undef WCHAR_TYPE_SIZE + +/* Names to predefine in the preprocessor for this target machine. */ +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define ("__epiphany__"); \ + builtin_define ("__little_endian__"); \ + builtin_define_with_int_value ("__EPIPHANY_STACK_OFFSET__", \ + epiphany_stack_offset); \ + builtin_assert ("cpu=epiphany"); \ + builtin_assert ("machine=epiphany"); \ + } while (0) + +/* Pick up the libgloss library. One day we may do this by linker script, but + for now its static. + libgloss might use errno/__errno, which might not have been needed when we + saw libc the first time, so link with libc a second time. */ +#undef LIB_SPEC +#define LIB_SPEC "%{!shared:%{g*:-lg} %{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}} -lepiphany %{!shared:%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}}" + +#define LINK_SPEC "%{v}" + +#define STARTFILE_SPEC "%{!shared:crt0.o%s} crti.o%s " \ + "%{mfp-mode=int:crtint.o%s} %{mfp-mode=truncate:crtrunc.o%s} " \ + "%{m1reg-r43:crtm1reg-r43.o%s} %{m1reg-r63:crtm1reg-r63.o%s} " \ + "crtbegin.o%s" + +#define ENDFILE_SPEC "crtend.o%s crtn.o%s" + +#define EPIPHANY_LIBRARY_EXTRA_SPEC \ + "-ffixed-r40 -ffixed-r41 -ffixed-r42 -ffixed-r43" + +/* In the "spec:" rule,, t-epiphany changes this to epiphany_library_stub_spec + and epiphany_library_extra_spec, respectively. */ +#define EXTRA_SPECS \ + { "epiphany_library_extra_spec", "" }, \ + { "epiphany_library_build_spec", EPIPHANY_LIBRARY_EXTRA_SPEC }, \ + +#define DRIVER_SELF_SPECS " %(epiphany_library_extra_spec) " + +#undef USER_LABEL_PREFIX +#define USER_LABEL_PREFIX "_" + +#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \ + asm (SECTION_OP "\n\ + mov r0,%low(" USER_LABEL_PREFIX #FUNC")\n\ + movt r0,%high(" USER_LABEL_PREFIX #FUNC")\n\ + jalr r0\n\ + .text"); + +#if 0 /* We would like to use Posix for profiling, but the simulator + interface still lacks mkdir. */ +#define TARGET_POSIX_IO +#endif + +/* Target machine storage layout. */ + +/* Define this if most significant bit is lowest numbered + in instructions that operate on numbered bit-fields. */ +#define BITS_BIG_ENDIAN 0 + +/* Define this if most significant byte of a word is the lowest numbered. */ +#define BYTES_BIG_ENDIAN 0 + +/* Define this if most significant word of a multiword number is the lowest + numbered. */ +#define WORDS_BIG_ENDIAN 0 + +/* Width of a word, in units (bytes). */ +#define UNITS_PER_WORD 4 + +/* Define this macro if it is advisable to hold scalars in registers + in a wider mode than that declared by the program. In such cases, + the value is constrained to be within the bounds of the declared + type, but kept valid in the wider mode. The signedness of the + extension may differ from that of the type. */ +/* It is far faster to zero extend chars than to sign extend them */ + +#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + { \ + if (MODE == QImode) \ + UNSIGNEDP = 1; \ + else if (MODE == HImode) \ + UNSIGNEDP = 1; \ + (MODE) = SImode; \ + } + +/* Allocation boundary (in *bits*) for storing arguments in argument list. */ +#define PARM_BOUNDARY 32 + +/* Boundary (in *bits*) on which stack pointer should be aligned. */ +#define STACK_BOUNDARY 64 + +/* ALIGN FRAMES on word boundaries */ +#define EPIPHANY_STACK_ALIGN(LOC) (((LOC)+7) & ~7) + +/* Allocation boundary (in *bits*) for the code of a function. */ +#define FUNCTION_BOUNDARY 32 + +/* Every structure's size must be a multiple of this. */ +#define STRUCTURE_SIZE_BOUNDARY 8 + +/* A bit-field declared as `int' forces `int' alignment for the struct. */ +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/* No data type wants to be aligned rounder than this. */ +/* This is bigger than currently necessary for the EPIPHANY. If 8 byte floats are + ever added it's not clear whether they'll need such alignment or not. For + now we assume they will. We can always relax it if necessary but the + reverse isn't true. */ +#define BIGGEST_ALIGNMENT 64 + +/* The best alignment to use in cases where we have a choice. */ +#define FASTEST_ALIGNMENT 64 + +#define MALLOC_ABI_ALIGNMENT BIGGEST_ALIGNMENT + +/* Make strings dword-aligned so strcpy from constants will be faster. */ +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT : (ALIGN)) + +/* Make arrays of chars dword-aligned for the same reasons. + Also, align arrays of SImode items. */ +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < FASTEST_ALIGNMENT \ + ? FASTEST_ALIGNMENT \ + : (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == SImode \ + && (ALIGN) < FASTEST_ALIGNMENT) \ + ? FASTEST_ALIGNMENT \ + : (ALIGN)) + +/* Set this nonzero if move instructions will actually fail to work + when given unaligned data. */ +/* On the EPIPHANY the lower address bits are masked to 0 as necessary. The chip + won't croak when given an unaligned address, but the insn will still fail + to produce the correct result. */ +#define STRICT_ALIGNMENT 1 + +/* layout_type overrides our ADJUST_ALIGNMENT settings from epiphany-modes.def + for vector modes, so we have to override it back. */ +#define ROUND_TYPE_ALIGN(TYPE, MANGLED_ALIGN, SPECIFIED_ALIGN) \ + (TREE_CODE (TYPE) == VECTOR_TYPE && !TYPE_USER_ALIGN (TYPE) \ + && SPECIFIED_ALIGN <= GET_MODE_ALIGNMENT (TYPE_MODE (TYPE)) \ + ? GET_MODE_ALIGNMENT (TYPE_MODE (TYPE)) \ + : ((TREE_CODE (TYPE) == RECORD_TYPE \ + || TREE_CODE (TYPE) == UNION_TYPE \ + || TREE_CODE (TYPE) == QUAL_UNION_TYPE) \ + && !TYPE_PACKED (TYPE)) \ + ? epiphany_special_round_type_align ((TYPE), (MANGLED_ALIGN), \ + (SPECIFIED_ALIGN)) \ + : MAX ((MANGLED_ALIGN), (SPECIFIED_ALIGN))) + +#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ + epiphany_adjust_field_align((FIELD), (COMPUTED)) + +/* Layout of source language data types. */ + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +/* Define this as 1 if `char' should by default be signed; else as 0. */ +#define DEFAULT_SIGNED_CHAR 0 + +#define SIZE_TYPE "long unsigned int" +#define PTRDIFF_TYPE "long int" +#define WCHAR_TYPE "unsigned int" +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +/* Standard register usage. */ + +/* Number of actual hardware registers. + The hardware registers are assigned numbers for the compiler + from 0 to just below FIRST_PSEUDO_REGISTER. + All registers that the compiler knows about must be given numbers, + even those that are not normally considered general registers. */ + +#define FIRST_PSEUDO_REGISTER 78 + + +/* General purpose registers. */ +#define GPR_FIRST 0 /* First gpr */ + +#define PIC_REGNO (GPR_FIRST + 28) /* PIC register. */ +#define GPR_LAST (GPR_FIRST + 63) /* Last gpr */ +#define CORE_CONTROL_FIRST CONFIG_REGNUM +#define CORE_CONTROL_LAST IRET_REGNUM + +#define GPR_P(R) IN_RANGE (R, GPR_FIRST, GPR_LAST) +#define GPR_OR_AP_P(R) (GPR_P (R) || (R) == ARG_POINTER_REGNUM) + +#define GPR_OR_PSEUDO_P(R) (GPR_P (R) || (R) >= FIRST_PSEUDO_REGISTER) +#define GPR_AP_OR_PSEUDO_P(R) (GPR_OR_AP_P (R) || (R) >= FIRST_PSEUDO_REGISTER) + +#define FIXED_REGISTERS \ +{ /* Integer Registers */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 000-007, gr0 - gr7 */ \ + 0, 0, 0, 0, 0, 1, 0, 0, /* 008-015, gr8 - gr15 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 016-023, gr16 - gr23 */ \ + 0, 0, 0, 0, 1, 1, 1, 1, /* 024-031, gr24 - gr31 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 032-039, gr32 - gr39 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 040-047, gr40 - gr47 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 048-055, gr48 - gr55 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 056-063, gr56 - gr63 */ \ + /* Other registers */ \ + 1, /* 64 AP - fake arg ptr */ \ + 1, /* soft frame pointer */ \ + 1, /* CC_REGNUM - integer conditions */\ + 1, /* CCFP_REGNUM - fp conditions */\ + 1, 1, 1, 1, 1, 1, /* Core Control Registers. */ \ + 1, 1, 1, /* FP_{NEAREST,...}_REGNUM */\ + 1, /* UNKNOWN_REGNUM - placeholder. */\ +} + +/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in + general) by function calls as well as for fixed registers. This macro + therefore identifies the registers that are not available for general + allocation of values that must live across function calls. + + If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically + saves it on function entry and restores it on function exit, if the register + is used within the function. */ + +#define CALL_USED_REGISTERS \ +{ /* Integer Registers */ \ + 1, 1, 1, 1, 0, 0, 0, 0, /* 000-007, gr0 - gr7 */ \ + 0, 0, 0, 0, 1, 1, 1, 0, /* 008-015, gr8 - gr15 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* 016-023, gr16 - gr23 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* 024-031, gr24 - gr31 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* 032-039, gr32 - gr38 */ \ + 0, 0, 0, 0, 1, 1, 1, 1, /* 040-047, gr40 - gr47 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* 048-055, gr48 - gr55 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* 056-063, gr56 - gr63 */ \ + 1, /* 64 AP - fake arg ptr */ \ + 1, /* soft frame pointer */ \ + 1, /* 66 CC_REGNUM */ \ + 1, /* 67 CCFP_REGNUM */ \ + 1, 1, 1, 1, 1, 1, /* Core Control Registers. */ \ + 1, 1, 1, /* FP_{NEAREST,...}_REGNUM */\ + 1, /* UNKNOWN_REGNUM - placeholder. */\ +} + +#define REG_ALLOC_ORDER \ + { \ + 0, 1, 2, 3, /* Caller-saved 'small' registers. */ \ + 12, /* Caller-saved unpaired register. */ \ + /* Caller-saved registers. */ \ + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, \ + 44, 45, 46, 47, \ + 48, 49, 50, 51, 52, 53, 54, 55, \ + 56, 57, 58, 59, 60, 61, 62, 63, \ + 4, 5, 6, 7, /* Calle-saved 'small' registers. */ \ + 15, /* Calle-saved unpaired register. */ \ + 8, 9, 10, 11, /* Calle-saved registers. */ \ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, \ + 14, 13, /* Link register, stack pointer. */ \ + /* Can't allocate, but must name these... */ \ + 28, 29, 30, 31, \ + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77 \ + } + +#define HARD_REGNO_RENAME_OK(SRC, DST) epiphany_regno_rename_ok (SRC, DST) + +/* Return number of consecutive hard regs needed starting at reg REGNO + to hold something of mode MODE. + This is ordinarily the length in words of a value of mode MODE + but can be less for certain modes in special long registers. */ +#define HARD_REGNO_NREGS(REGNO, MODE) \ +((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ +extern const unsigned int epiphany_hard_regno_mode_ok[]; +extern unsigned int epiphany_mode_class[]; +#define HARD_REGNO_MODE_OK(REGNO, MODE) hard_regno_mode_ok((REGNO), (MODE)) + +/* A C expression that is nonzero if it is desirable to choose + register allocation so as to avoid move instructions between a + value of mode MODE1 and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, + MODE2)' are ever different for any R, then `MODES_TIEABLE_P (MODE1, + MODE2)' must be zero. */ + +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +/* Register classes and constants. */ + +/* Define the classes of registers for register constraints in the + machine description. Also define ranges of constants. + + One of the classes must always be named ALL_REGS and include all hard regs. + If there is more than one class, another class must be named NO_REGS + and contain no registers. + + The name GENERAL_REGS must be the name of a class (or an alias for + another name such as ALL_REGS). This is the class of registers + that is allowed by "g" or "r" in a register constraint. + Also, registers outside this class are allocated only when + instructions express preferences for them. + + The classes must be numbered in nondecreasing order; that is, + a larger-numbered class must never be contained completely + in a smaller-numbered class. + + For any two classes, it is very desirable that there be another + class that represents their union. + + It is important that any condition codes have class NO_REGS. + See `register_operand'. */ + +enum reg_class { + NO_REGS, + LR_REGS, + SHORT_INSN_REGS, + SIBCALL_REGS, + GENERAL_REGS, + CORE_CONTROL_REGS, + ALL_REGS, + LIM_REG_CLASSES +}; + +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +/* Give names of register classes as strings for dump file. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "LR_REGS", \ + "SHORT_INSN_REGS", \ + "SIBCALL_REGS", \ + "GENERAL_REGS", \ + "CORE_CONTROL_REGS", \ + "ALL_REGS" \ +} + +/* Define which registers fit in which classes. + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +#define REG_CLASS_CONTENTS \ +{ /* r0-r31 r32-r63 ap/sfp/cc1/cc2/iret/status */ \ + { 0x00000000,0x00000000,0x0}, /* NO_REGS */ \ + { 0x00004000,0x00000000,0x0}, /* LR_REGS */ \ + { 0x000000ff,0x00000000,0x0}, /* SHORT_INSN_REGS */ \ + { 0xffff100f,0xffffff00,0x0}, /* SIBCALL_REGS */ \ + { 0xffffffff,0xffffffff,0x0003}, /* GENERAL_REGS */ \ + { 0x00000000,0x00000000,0x03f0}, /* CORE_CONTROL_REGS */ \ + { 0xffffffff,0xffffffff,0x3fff}, /* ALL_REGS */ \ +} + + +/* The same information, inverted: + Return the class number of the smallest class containing + reg number REGNO. This could be a conditional expression + or could index an array. */ +extern enum reg_class epiphany_regno_reg_class[FIRST_PSEUDO_REGISTER]; +#define REGNO_REG_CLASS(REGNO) \ +(epiphany_regno_reg_class[REGNO]) + +/* The class value for index registers, and the one for base regs. */ +#define BASE_REG_CLASS GENERAL_REGS +#define INDEX_REG_CLASS GENERAL_REGS + +/* These assume that REGNO is a hard or pseudo reg number. + They give nonzero only if REGNO is a hard reg of the suitable class + or a pseudo reg currently allocated to a suitable hard reg. + Since they use reg_renumber, they are safe only once reg_renumber + has been allocated, which happens in reginfo.c during register + allocation. */ +#define REGNO_OK_FOR_BASE_P(REGNO) \ +((REGNO) < FIRST_PSEUDO_REGISTER || (unsigned) reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER) +#define REGNO_OK_FOR_INDEX_P(REGNO) \ +((REGNO) < FIRST_PSEUDO_REGISTER || (unsigned) reg_renumber[REGNO] < FIRST_PSEUDO_REGISTER) + + + +/* Given an rtx X being reloaded into a reg required to be + in class CLASS, return the class of reg to actually use. + In general this is just CLASS; but on some machines + in some cases it is preferable to use a more restrictive class. */ +#define PREFERRED_RELOAD_CLASS(X,CLASS) \ +(CLASS) + +/* Return the maximum number of consecutive registers + needed to represent mode MODE in a register of class CLASS. */ +#define CLASS_MAX_NREGS(CLASS, MODE) \ +((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* The letters I, J, K, L, M, N, O, P in a register constraint string + can be used to stand for particular ranges of immediate operands. + This macro defines what the ranges are. + C is the letter, and VALUE is a constant value. + Return 1 if VALUE is in the range specified by C. */ + +/* 'I' is used for 16 bit unsigned. + 'Cal' is used for long immediates (32 bits) + 'K' is used for any constant up to 5 bits. + 'L' is used for any 11 bit signed. +*/ + +#define IMM16(X) (IN_RANGE ((X), 0, 0xFFFF)) +#define SIMM16(X) (IN_RANGE ((X), -65536, 65535)) +#define SIMM11(X) (IN_RANGE ((X), -1024, 1023)) +#define IMM5(X) (IN_RANGE ((X), 0, 0x1F)) + +typedef struct GTY (()) machine_function +{ + unsigned args_parsed : 1; + unsigned pretend_args_odd : 1; + unsigned lr_clobbered : 1; + unsigned control_use_inserted : 1; + unsigned lr_slot_known : 1; + unsigned sw_entities_processed : 6; + long lr_slot_offset; + rtx and_mask; + rtx or_mask; + unsigned unknown_mode_uses; + unsigned unknown_mode_sets; +} machine_function_t; + +#define MACHINE_FUNCTION(fun) (fun)->machine + +#define INIT_EXPANDERS epiphany_init_expanders () + +/* Stack layout and stack pointer usage. */ + +/* Define this macro if pushing a word onto the stack moves the stack + pointer to a smaller address. */ +#define STACK_GROWS_DOWNWARD + +/* Define this to nonzero if the nominal address of the stack frame + is at the high-address end of the local variables; + that is, each additional local variable allocated + goes at a more negative offset in the frame. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset within stack frame to start allocating local variables at. + If FRAME_GROWS_DOWNWARD, this is the offset to the END of the + first local allocated. Otherwise, it is the offset to the BEGINNING + of the first local allocated. */ +#define STARTING_FRAME_OFFSET epiphany_stack_offset + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. */ +#define STACK_POINTER_OFFSET epiphany_stack_offset + +/* Offset of first parameter from the argument pointer register value. */ +/* 4 bytes for each of previous fp, return address, and previous gp. + 4 byte reserved area for future considerations. */ +#define FIRST_PARM_OFFSET(FNDECL) \ + (epiphany_stack_offset \ + + (MACHINE_FUNCTION (DECL_STRUCT_FUNCTION (FNDECL))->pretend_args_odd \ + ? 4 : 0)) + +#define INCOMING_FRAME_SP_OFFSET epiphany_stack_offset + +/* Register to use for pushing function arguments. */ +#define STACK_POINTER_REGNUM GPR_SP + +/* Base register for access to local variables of the function. */ +#define HARD_FRAME_POINTER_REGNUM GPR_FP + +/* Register in which static-chain is passed to a function. This must + not be a register used by the prologue. */ +#define STATIC_CHAIN_REGNUM GPR_IP + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define ELIMINABLE_REGS \ +{{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \ +} + +/* Define the offset between two registers, one to be eliminated, and the other + its replacement, at the start of a routine. */ + +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + ((OFFSET) = epiphany_initial_elimination_offset ((FROM), (TO))) + +/* Function argument passing. */ + +/* If defined, the maximum amount of space required for outgoing + arguments will be computed and placed into the variable + `current_function_outgoing_args_size'. No space will be pushed + onto the stack for each call; instead, the function prologue should + increase the stack frame size by this amount. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should + hold all necessary information about the function itself + and about the args processed so far, enough to enable macros + such as FUNCTION_ARG to determine where the next arg should go. */ +#define CUMULATIVE_ARGS int + +/* Initialize a variable CUM of type CUMULATIVE_ARGS + for a call to a function whose data type is FNTYPE. + For a library call, FNTYPE is 0. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ +((CUM) = 0) + +/* The number of registers used for parameter passing. Local to this file. */ +#define MAX_EPIPHANY_PARM_REGS 4 + +/* 1 if N is a possible register number for function argument passing. */ +#define FUNCTION_ARG_REGNO_P(N) \ +((unsigned) (N) < MAX_EPIPHANY_PARM_REGS) + +/* Return boolean indicating arg of type TYPE and mode MODE will be passed in + a reg. This includes arguments that have to be passed by reference as the + pointer to them is passed in a reg if one is available (and that is what + we're given). + This macro is only used in this file. */ +/* We must use partial argument passing because of the chosen mode + of varargs handling. */ +#define PASS_IN_REG_P(CUM, MODE, TYPE) \ + (ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)) < MAX_EPIPHANY_PARM_REGS) + +/* Tell GCC to use TARGET_RETURN_IN_MEMORY. */ +#define DEFAULT_PCC_STRUCT_RETURN 0 + +/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function, + the stack pointer does not matter. The value is tested only in + functions that have frame pointers. + No definition is equivalent to always zero. */ +#define EXIT_IGNORE_STACK 1 + +#define EPILOGUE_USES(REGNO) epiphany_epilogue_uses (REGNO) + +/* Output assembler code to FILE to increment profiler label # LABELNO + for profiling a function entry. */ +#define FUNCTION_PROFILER(FILE, LABELNO) + +/* Given an rtx for the frame pointer, + return an rtx for the address of the frame. */ +#define FRAME_ADDR_RTX(frame) \ + ((frame) == hard_frame_pointer_rtx ? arg_pointer_rtx : NULL) + +#define EPIPHANY_RETURN_REGNO \ + ((current_function_decl != NULL \ + && epiphany_is_interrupt_p (current_function_decl)) \ + ? IRET_REGNUM : GPR_LR) +/* This is not only for dwarf unwind info, but also for the benefit of + df-scan.c to tell it that LR is live at the function start. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, EPIPHANY_RETURN_REGNO) + +/* However, we haven't implemented the rest needed for dwarf2 unwind info. */ +#define DWARF2_UNWIND_INFO 0 + +#define RETURN_ADDR_RTX(count, frame) \ + (count ? NULL_RTX \ + : gen_rtx_UNSPEC (SImode, gen_rtvec (1, const0_rtx), UNSPEC_RETURN_ADDR)) + +#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (EPIPHANY_RETURN_REGNO) + +/* Trampolines. + An epiphany trampoline looks like this: + mov r16,%low(fnaddr) + movt r16,%high(fnaddr) + mov ip,%low(cxt) + movt ip,%high(cxt) + jr r16 */ + +/* Length in units of the trampoline for entering a nested function. */ +#define TRAMPOLINE_SIZE 20 + +/* Addressing modes, and classification of registers for them. */ + +/* Maximum number of registers that can appear in a valid memory address. */ +#define MAX_REGS_PER_ADDRESS 2 + +/* We have post_modify (load/store with update). */ +#define HAVE_POST_INCREMENT TARGET_POST_INC +#define HAVE_POST_DECREMENT TARGET_POST_INC +#define HAVE_POST_MODIFY_DISP TARGET_POST_MODIFY +#define HAVE_POST_MODIFY_REG TARGET_POST_MODIFY + +/* Currently, the only users of the USE_*CREMENT macros are + move_by_pieces / store_by_pieces_1 . We don't want them to use + POST_MODIFY modes, because we got ample addressing range for the + reg+offset addressing mode; besides, there are short index+offset loads, + but the only short post-modify load uses POST_MODIFY_REG. + Moreover, using auto-increment in move_by_pieces from structure copying + in the prologue causes confused debug output. + If another pass starts using these macros where the use of these + addressing modes would make more sense, we can try checking the + current pass. */ +#define USE_LOAD_POST_INCREMENT(MODE) 0 +#define USE_LOAD_POST_DECREMENT(MODE) 0 +#define USE_STORE_POST_INCREMENT(MODE) 0 +#define USE_STORE_POST_DECREMENT(MODE) 0 + +/* Recognize any constant value that is a valid address. */ +#define CONSTANT_ADDRESS_P(X) \ +(GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF \ + || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST) + +#define RTX_OK_FOR_OFFSET_P(MODE, X) \ + RTX_OK_FOR_OFFSET_1 (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \ + && epiphany_vect_align == 4 ? SImode : (MODE), X) +#define RTX_OK_FOR_OFFSET_1(MODE, X) \ + (GET_CODE (X) == CONST_INT \ + && !(INTVAL (X) & (GET_MODE_SIZE (MODE) - 1)) \ + && INTVAL (X) >= -2047 * (int) GET_MODE_SIZE (MODE) \ + && INTVAL (X) <= 2047 * (int) GET_MODE_SIZE (MODE)) + +/* Frame offsets cannot be evaluated till the frame pointer is eliminated. */ +#define RTX_FRAME_OFFSET_P(X) \ + ((X) == frame_pointer_rtx \ + || (GET_CODE (X) == PLUS && XEXP ((X), 0) == frame_pointer_rtx \ + && CONST_INT_P (XEXP ((X), 1)))) + +/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, + return the mode to be used for the comparison. */ +#define SELECT_CC_MODE(OP, X, Y) \ + epiphany_select_cc_mode (OP, X, Y) + +/* Return nonzero if SELECT_CC_MODE will never return MODE for a + floating point inequality comparison. */ + +#define REVERSE_CONDITION(CODE, MODE) \ + ((MODE) == CC_FPmode || (MODE) == CC_FP_EQmode || (MODE) == CC_FP_GTEmode \ + || (MODE) == CC_FP_ORDmode || (MODE) == CC_FP_UNEQmode \ + ? reverse_condition_maybe_unordered (CODE) \ + : (MODE) == CCmode ? reverse_condition (CODE) \ + : UNKNOWN) + +/* We can reverse all CCmodes with REVERSE_CONDITION. */ +#define REVERSIBLE_CC_MODE(MODE) \ + ((MODE) == CCmode || (MODE) == CC_FPmode || (MODE) == CC_FP_EQmode \ + || (MODE) == CC_FP_GTEmode || (MODE) == CC_FP_ORDmode \ + || (MODE) == CC_FP_UNEQmode) + +/* Costs. */ + +/* The cost of a branch insn. */ +/* ??? What's the right value here? Branches are certainly more + expensive than reg->reg moves. */ +#define BRANCH_COST(speed_p, predictable_p) \ + (speed_p ? epiphany_branch_cost : 1) + +/* Nonzero if access to memory by bytes is slow and undesirable. + For RISC chips, it means that access to memory by bytes is no + better than access by words when possible, so grab a whole word + and maybe make use of that. */ +#define SLOW_BYTE_ACCESS 1 + +/* Define this macro if it is as good or better to call a constant + function address than to call an address kept in a register. */ +/* On the EPIPHANY, calling through registers is slow. */ +#define NO_FUNCTION_CSE + +/* Section selection. */ +/* WARNING: These section names also appear in dwarf2out.c. */ + +#define TEXT_SECTION_ASM_OP "\t.section .text" +#define DATA_SECTION_ASM_OP "\t.section .data" + +#undef READONLY_DATA_SECTION_ASM_OP +#define READONLY_DATA_SECTION_ASM_OP "\t.section .rodata" + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +/* Define this macro if jump tables (for tablejump insns) should be + output in the text section, along with the assembler instructions. + Otherwise, the readonly data section is used. + This macro is irrelevant if there is no separate readonly data section. */ +#define JUMP_TABLES_IN_TEXT_SECTION (flag_pic) + +/* PIC */ + +/* The register number of the register used to address a table of static + data addresses in memory. In some cases this register is defined by a + processor's ``application binary interface'' (ABI). When this macro + is defined, RTL is generated for this register once, as with the stack + pointer and frame pointer registers. If this macro is not defined, it + is up to the machine-dependent files to allocate such a register (if + necessary). */ +#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? PIC_REGNO : INVALID_REGNUM) + +/* Control the assembler format that we output. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will + end at the end of the line. */ +#define ASM_COMMENT_START ";" + +/* Output to assembler file text saying following lines + may contain character constants, extra white space, comments, etc. */ +#define ASM_APP_ON "" + +/* Output to assembler file text saying following lines + no longer contain unusual constructs. */ +#define ASM_APP_OFF "" + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.global\t" + +/* How to refer to registers in assembler output. + This sequence is indexed by compiler's hard-register-number (see above). */ + +#define REGISTER_NAMES \ +{ \ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "fp", "ip", "sp", "lr", "r15", \ + "r16", "r17","r18", "r19", "r20", "r21", "r22", "r23", \ + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", \ + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", \ + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", \ + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", \ + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", \ + "ap", "sfp", "cc1", "cc2", \ + "config", "status", "lc", "ls", "le", "iret", \ + "fp_near", "fp_trunc", "fp_anyfp", "unknown" \ +} + +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + epiphany_final_prescan_insn (INSN, OPVEC, NOPERANDS) + +#define LOCAL_LABEL_PREFIX "." + +/* A C expression which evaluates to true if CODE is a valid + punctuation character for use in the `PRINT_OPERAND' macro. */ +extern char epiphany_punct_chars[256]; +#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \ + epiphany_punct_chars[(unsigned char) (CHAR)] + +/* This is how to output an element of a case-vector that is absolute. */ +#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ +do { \ + if (CASE_VECTOR_MODE == Pmode) \ + asm_fprintf ((FILE), "\t.word %LL%d\n", (VALUE)); \ + else \ + asm_fprintf ((FILE), "\t.short %LL%d\n", (VALUE)); \ +} while (0) + +/* This is how to output an element of a case-vector that is relative. */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \ +do { \ + if (CASE_VECTOR_MODE == Pmode) \ + asm_fprintf ((FILE), "\t.word"); \ + else \ + asm_fprintf ((FILE), "\t.short"); \ + asm_fprintf ((FILE), " %LL%d-%LL%d\n", (VALUE), (REL)); \ +} while (0) + +/* This is how to output an assembler line + that says to advance the location counter + to a multiple of 2**LOG bytes. */ +#define ASM_OUTPUT_ALIGN(FILE, LOG) \ +do { if ((LOG) != 0) fprintf (FILE, "\t.balign %d\n", 1 << (LOG)); } while (0) + +/* Inside the text section, align with nops rather than zeros. */ +#define ASM_OUTPUT_ALIGN_WITH_NOP(FILE, LOG) \ +do \ +{ \ + if ((LOG) != 0) fprintf (FILE, "\t.balignw %d,0x01a2\n", 1 << (LOG)); \ +} while (0) + +/* This is how to declare the size of a function. */ +#undef ASM_DECLARE_FUNCTION_SIZE +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + do \ + { \ + const char *__name = (FNAME); \ + tree attrs = DECL_ATTRIBUTES ((DECL)); \ + \ + if (!flag_inhibit_size_directive) \ + { \ + if (lookup_attribute ("forwarder_section", attrs)) \ + { \ + const char *prefix = "__forwarder_dst_"; \ + char *dst_name \ + = (char *) alloca (strlen (prefix) + strlen (__name) + 1); \ + \ + strcpy (dst_name, prefix); \ + strcat (dst_name, __name); \ + __name = dst_name; \ + } \ + ASM_OUTPUT_MEASURED_SIZE ((FILE), __name); \ + } \ + } \ + while (0) + +/* Debugging information. */ + +/* Generate DBX and DWARF debugging information. */ +#define DBX_DEBUGGING_INFO 1 + +#undef PREFERRED_DEBUGGING_TYPE +#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG + +/* Turn off splitting of long stabs. */ +#define DBX_CONTIN_LENGTH 0 + +/* Miscellaneous. */ + +/* Specify the machine mode that this machine uses + for the index in the tablejump instruction. */ +#define CASE_VECTOR_MODE (TARGET_SMALL16 && optimize_size ? HImode : Pmode) + +/* Define if operations between registers always perform the operation + on the full register even if a narrower mode is specified. */ +#define WORD_REGISTER_OPERATIONS + +/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD + will either zero-extend or sign-extend. The value of this macro should + be the code that says which one of the two operations is implicitly + done, UNKNOWN if none. */ +#define LOAD_EXTEND_OP(MODE) ZERO_EXTEND + +/* Max number of bytes we can move from memory to memory + in one reasonably fast instruction. */ +#define MOVE_MAX 8 + +/* Define this to be nonzero if shift instructions ignore all but the low-order + few bits. */ +#define SHIFT_COUNT_TRUNCATED 1 + +/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits + is done just by pretending it is already truncated. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* Specify the machine mode that pointers have. + After generation of rtl, the compiler makes no further distinction + between pointers and any other objects of this machine mode. */ + +#define Pmode SImode + +/* A function address in a call instruction. */ +#define FUNCTION_MODE SImode + +/* EPIPHANY function types. */ +enum epiphany_function_type +{ + EPIPHANY_FUNCTION_UNKNOWN, EPIPHANY_FUNCTION_NORMAL, + EPIPHANY_FUNCTION_INTERRUPT +}; + +#define EPIPHANY_INTERRUPT_P(TYPE) ((TYPE) == EPIPHANY_FUNCTION_INTERRUPT) + +/* Compute the type of a function from its DECL. */ + +#define IMMEDIATE_PREFIX "#" + +#define OPTIMIZE_MODE_SWITCHING(ENTITY) \ + (epiphany_optimize_mode_switching (ENTITY)) + +/* We have two fake entities for lazy code motion of the mask constants, + one entity each for round-to-nearest / truncating + with a different idea what FP_MODE_ROUND_UNKNOWN will be, and + finally an entity that runs in a second mode switching pass to + resolve FP_MODE_ROUND_UNKNOWN. */ +#define NUM_MODES_FOR_MODE_SWITCHING \ + { 2, 2, 2, \ + FP_MODE_NONE, FP_MODE_NONE, FP_MODE_NONE, FP_MODE_NONE, FP_MODE_NONE } + +#define MODE_NEEDED(ENTITY, INSN) epiphany_mode_needed((ENTITY), (INSN)) + +#define MODE_PRIORITY_TO_MODE(ENTITY, N) \ + (epiphany_mode_priority_to_mode ((ENTITY), (N))) + +#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ + emit_set_fp_mode ((ENTITY), (MODE), (HARD_REGS_LIVE)) + +#define MODE_ENTRY(ENTITY) (epiphany_mode_entry_exit ((ENTITY), false)) +#define MODE_EXIT(ENTITY) (epiphany_mode_entry_exit ((ENTITY), true)) +#define MODE_AFTER(ENTITY, LAST_MODE, INSN) \ + (epiphany_mode_after ((ENTITY), (LAST_MODE), (INSN))) + +#define TARGET_INSERT_MODE_SWITCH_USE epiphany_insert_mode_switch_use + +/* Mode switching entities. */ +enum +{ + EPIPHANY_MSW_ENTITY_AND, + EPIPHANY_MSW_ENTITY_OR, + EPIPHANY_MSW_ENTITY_CONFIG, /* 1 means config is known or saved. */ + EPIPHANY_MSW_ENTITY_NEAREST, + EPIPHANY_MSW_ENTITY_TRUNC, + EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN, + EPIPHANY_MSW_ENTITY_ROUND_KNOWN, + EPIPHANY_MSW_ENTITY_FPU_OMNIBUS, + EPIPHANY_MSW_ENTITY_NUM +}; + +extern int epiphany_normal_fp_rounding; +#ifndef IN_LIBGCC2 +extern rtl_opt_pass *make_pass_mode_switch_use (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_resolve_sw_modes (gcc::context *ctxt); +#endif + +/* This will need to be adjusted when FP_CONTRACT_ON is properly + implemented. */ +#define TARGET_FUSED_MADD (flag_fp_contract_mode == FP_CONTRACT_FAST) + +#undef ASM_DECLARE_FUNCTION_NAME +#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ + epiphany_start_function ((FILE), (NAME), (DECL)) + +#endif /* !GCC_EPIPHANY_H */ diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.md b/gcc-4.9/gcc/config/epiphany/epiphany.md new file mode 100644 index 000000000..2844eeea7 --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany.md @@ -0,0 +1,2812 @@ +;; Machine description of the Adaptiva epiphany cpu for GNU C compiler +;; Copyright (C) 1994-2014 Free Software Foundation, Inc. +;; Contributed by Embecosm on behalf of Adapteva, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. + +(define_constants + [(GPR_0 0) + (GPR_1 1) + (GPR_FP 11) + (GPR_IP 12) + (GPR_SP 13) + (GPR_LR 14) + (GPR_16 16) + (GPR_18 18) + (GPR_20 20) + (ARG_POINTER_REGNUM 64) + (FRAME_POINTER_REGNUM 65) + (CC_REGNUM 66) ;; 66 or 17 + (CCFP_REGNUM 67) ;; 67 or 18 + (CONFIG_REGNUM 68) + (STATUS_REGNUM 69) + (LC_REGNUM 70) + (LS_REGNUM 71) + (LE_REGNUM 72) + (IRET_REGNUM 73) + (FP_NEAREST_REGNUM 74) + (FP_TRUNCATE_REGNUM 75) + (FP_ANYFP_REGNUM 76) + (UNKNOWN_REGNUM 77) ; used for addsi3_r and friends + ; We represent the return address as an unspec rather than a reg. + ; If we used a reg, we could use register elimination, but eliminating + ; to GPR_LR would make the latter visible to dataflow, thus making it + ; harder to determine when it must be saved. + (UNSPEC_RETURN_ADDR 0) + (UNSPEC_FP_MODE 1) + + (UNSPECV_GID 0) + (UNSPECV_GIE 1)]) + +;; Insn type. Used to default other attribute values. + +(define_attr "type" + "move,load,store,cmove,unary,compare,shift,mul,uncond_branch,branch,call,fp,fp_int,v2fp,misc,sfunc,fp_sfunc,flow" + (const_string "misc")) + +;; Length (in # bytes) + +(define_attr "length" "" (const_int 4)) + +;; The length here is the length of a single asm. + +(define_asm_attributes + [(set_attr "length" "4") + (set_attr "type" "misc")]) + +;; pipeline model; so far we have only one. +(define_attr "pipe_model" "epiphany" (const_string "epiphany")) + +(define_attr "rounding" "trunc,nearest" + (cond [(ne (symbol_ref "TARGET_ROUND_NEAREST") (const_int 0)) + (const_string "nearest")] + (const_string "trunc"))) + +(define_attr "fp_mode" "round_unknown,round_nearest,round_trunc,int,caller,none" + (cond [(eq_attr "type" "fp,v2fp,fp_sfunc") + (symbol_ref "(enum attr_fp_mode) epiphany_normal_fp_rounding") + (eq_attr "type" "call") + (symbol_ref "(enum attr_fp_mode) epiphany_normal_fp_mode") + (eq_attr "type" "fp_int") + (const_string "int")] + (const_string "none"))) + +(include "epiphany-sched.md") + +(include "predicates.md") +(include "constraints.md") + +;; modes that are held in a single register, and hence, a word. +(define_mode_iterator WMODE [SI SF HI QI V2HI V4QI]) +(define_mode_iterator WMODE2 [SI SF HI QI V2HI V4QI]) + +;; modes that are held in a two single registers +(define_mode_iterator DWMODE [DI DF V2SI V2SF V4HI V8QI]) + +;; Double-word mode made up of two single-word mode values. +(define_mode_iterator DWV2MODE [V2SI V2SF]) +(define_mode_attr vmode_part [(V2SI "si") (V2SF "sf")]) +(define_mode_attr vmode_PART [(V2SI "SI") (V2SF "SF")]) +(define_mode_attr vmode_fp_type [(V2SI "fp_int") (V2SF "fp")]) +(define_mode_attr vmode_ccmode [(V2SI "CC") (V2SF "CC_FP")]) +(define_mode_attr vmode_cc [(V2SI "CC_REGNUM") (V2SF "CCFP_REGNUM")]) + +;; Move instructions. + +(define_expand "mov" + [(set (match_operand:WMODE 0 "general_operand" "") + (match_operand:WMODE 1 "general_operand" ""))] + "" +{ + if (mode == V4QImode || mode == V2HImode) + { + operands[0] = simplify_gen_subreg (SImode, operands[0], mode, 0); + operands[1] = simplify_gen_subreg (SImode, operands[1], mode, 0); + emit_insn (gen_movsi (operands[0], operands[1])); + DONE; + } + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (mode, operands[1]); + if (mode == SImode + && (operands[1] == frame_pointer_rtx || operands[1] == arg_pointer_rtx)) + { + rtx reg = operands[0]; + + if (!REG_P (reg)) + reg = gen_reg_rtx (SImode); + emit_insn (gen_move_frame (reg, operands[1])); + operands[1] = reg; + if (operands[0] == reg) + DONE; + } +}) + +(define_insn "*movqi_insn" + [(set (match_operand:QI 0 "move_dest_operand" "=Rcs, r, r,r,m") + (match_operand:QI 1 "move_src_operand" "Rcs,rU16,Cal,m,r"))] +;; ??? Needed? + "gpr_operand (operands[0], QImode) + || gpr_operand (operands[1], QImode)" + "@ + mov %0,%1 + mov %0,%1 + mov %0,%1 + ldrb %0,%1 + strb %1,%0" + [(set_attr "type" "move,move,move,load,store")]) + +(define_insn_and_split "*movhi_insn" + [(set (match_operand:HI 0 "move_dest_operand" "=r, r,r,m") + (match_operand:HI 1 "move_src_operand""rU16,Cal,m,r"))] + "gpr_operand (operands[0], HImode) + || gpr_operand (operands[1], HImode)" + "@ + mov %0,%1 + mov %0,%%low(%1); %1 + ldrh %0,%c1 + strh %1,%c0" + "reload_completed && CONSTANT_P (operands[1]) + && !satisfies_constraint_U16 (operands[1]) && TARGET_SPLIT_LOHI" + [(set (match_dup 2) (match_dup 3))] + "operands[2] = simplify_gen_subreg (SImode, operands[0], HImode, 0); + operands[3] = simplify_gen_subreg (SImode, operands[1], HImode, 0);" + [(set_attr "type" "move,move,load,store")]) + +;; We use a special pattern for a move from the frame pointer to +;; show the flag clobber that is needed when this move is changed +;; to an add by register elimination. +;; ??? A pseudo register might be equivalent to a function invariant, +;; and thus placed by reload into reg_equiv_invariant; if the pseudo +;; does not get a hard register, we then end up with the function +;; invariant in its place, i.e. an unexpected clobber of the flags +;; register. +;; +;; N.B. operand 1 is an operand so that reload will perform elimination. +;; +;; The post-reload pattern recognition and splitting is done in frame_move_1. +(define_insn "move_frame" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (match_operand:SI 1 "register_operand" "r")) + (clobber (reg:CC CC_REGNUM))] + "operands[1] == frame_pointer_rtx || operands[1] == arg_pointer_rtx" + "#") + +(define_insn "movsi_high" + [(set (match_operand:SI 0 "gpr_operand" "+r") + (ior:SI (and:SI (match_dup 0) (const_int 65535)) + (high:SI (match_operand:SI 1 "move_src_operand" "i"))))] + "" + "movt %0, %%high(%1)" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn "movsi_lo_sum" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (lo_sum:SI (const_int 0) + (match_operand:SI 1 "move_src_operand" "i")))] + "" + "mov %0, %%low(%1)" + [(set_attr "type" "move") + (set_attr "length" "4")]) + +(define_insn_and_split "*movsi_insn" + [(set (match_operand:SI 0 "move_dest_operand" + "= r, r, r, r, r, r, m, r, Rct") + (match_operand:SI 1 "move_src_operand" + "rU16Rra,Cm1,Cl1,Cr1,Cal,mSra,rRra,Rct,r"))] + "gpr_operand (operands[0], SImode) + || gpr_operand (operands[1], SImode) + || satisfies_constraint_Sra (operands[1])" +{ + switch (which_alternative) + { + case 0: return "mov %0,%1"; + case 1: return "add %0,%-,(1+%1)"; + case 2: operands[1] = GEN_INT (exact_log2 (-INTVAL (operands[1]))); + return "lsl %0,%-,%1"; + case 3: operands[1] = GEN_INT (32 - exact_log2 (INTVAL (operands[1]) + 1)); + return "lsr %0,%-,%1"; + case 4: return "mov %0,%%low(%1)\;movt %0,%%high(%1) ; %1"; + case 5: return "ldr %0,%C1"; + case 6: return "str %1,%C0"; + case 7: return "movfs %0,%1"; + case 8: return "movts %0,%1"; + default: gcc_unreachable (); + } +} + "reload_completed && CONSTANT_P (operands[1]) + && !satisfies_constraint_U16 (operands[1]) + && !satisfies_constraint_Cm1 (operands[1]) + && !satisfies_constraint_Cl1 (operands[1]) + && !satisfies_constraint_Cr1 (operands[1]) + && TARGET_SPLIT_LOHI" + [(match_dup 2) (match_dup 3)] + "operands[2] = gen_movsi_lo_sum (operands[0], operands[1]); + operands[3] = gen_movsi_high (operands[0], operands[1]);" + [(set_attr "type" "move,misc,misc,misc,move,load,store,flow,flow") + (set_attr "length" "4,4,4,4,8,4,4,4,4")]) + +(define_split + [(set (match_operand:SI 0 "nonimmediate_operand") + (unspec:SI [(const_int 0)] UNSPEC_RETURN_ADDR))] + "reload_completed && !MACHINE_FUNCTION (cfun)->lr_clobbered" + [(set (match_dup 0) (reg:SI GPR_LR))]) + +(define_split + [(set (match_operand:SI 0 "gpr_operand") + (unspec:SI [(const_int 0)] UNSPEC_RETURN_ADDR))] + "reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + emit_insn (gen_reload_insi_ra (operands[0], operands[1])); + DONE; +}) + +(define_expand "reload_insi_ra" + [(set (match_operand:SI 0 "gpr_operand" "r") (match_operand:SI 1 "" "Sra"))] + "" +{ + rtx addr + = (frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx); + + if (!MACHINE_FUNCTION (cfun)->lr_slot_known) + { + start_sequence (); + epiphany_expand_prologue (); + if (!MACHINE_FUNCTION (cfun)->lr_slot_known) + epiphany_expand_epilogue (0); + end_sequence (); + gcc_assert (MACHINE_FUNCTION (cfun)->lr_slot_known); + } + addr = plus_constant (Pmode, addr, MACHINE_FUNCTION (cfun)->lr_slot_offset); + operands[1] = gen_frame_mem (SImode, addr); +}) + +;; If the frame pointer elimination offset is zero, we'll use this pattern. +;; Note that the splitter can accept any gpr in operands[1]; this is +;; necessary, (e.g. for compile/20021015-1.c -O0,) +;; because when register elimination cannot be done with the constant +;; as an immediate operand of the add instruction, reload will resort to +;; loading the constant into a reload register, using gen_add2_insn to add +;; the stack pointer, and then use the reload register as new source in +;; the move_frame pattern. +(define_insn_and_split "*move_frame_1" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (match_operand:SI 1 "gpr_operand" "r")) + (clobber (reg:CC CC_REGNUM))] + "(reload_in_progress || reload_completed) + && (operands[1] == stack_pointer_rtx + || operands[1] == hard_frame_pointer_rtx)" + "#" + "reload_in_progress || reload_completed" + [(set (match_dup 0) (match_dup 1))]) + +(define_expand "mov" + [(set (match_operand:DWMODE 0 "general_operand" "") + (match_operand:DWMODE 1 "general_operand" ""))] + "" + " +{ + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + if (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY) + { + rtx o0l, o0h, o1l, o1h; + + o0l = simplify_gen_subreg (SImode, operands[0], mode, 0); + o0h = simplify_gen_subreg (SImode, operands[0], mode, + UNITS_PER_WORD); + o1l = simplify_gen_subreg (SImode, operands[1], mode, 0); + o1h = simplify_gen_subreg (SImode, operands[1], mode, + UNITS_PER_WORD); + if (reg_overlap_mentioned_p (o0l, o1h)) + { + emit_move_insn (o0h, o1h); + emit_move_insn (o0l, o1l); + } + else + { + emit_move_insn (o0l, o1l); + emit_move_insn (o0h, o1h); + } + DONE; + } + /* lower_subreg has a tendency to muck up vectorized code. + To protect the wide memory accesses, we must use same-size + subregs. */ + if (epiphany_vect_align != 4 /* == 8 */ + && !reload_in_progress + && (GET_CODE (operands[0]) == MEM || GET_CODE (operands[1]) == MEM) + && !misaligned_operand (operands[1], mode) + && (GET_CODE (operands[0]) != SUBREG + || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))) + != GET_MODE_SIZE (mode) + && GET_CODE (operands[1]) != SUBREG))) + { + operands[0] + = simplify_gen_subreg (DImode, operands[0], mode, 0); + operands[1] + = simplify_gen_subreg (DImode, operands[1], mode, 0); + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + } + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (mode, operands[1]); +}") + +(define_insn_and_split "*mov_insn" + [(set (match_operand:DWMODE 0 "move_dest_operand" "=r, r,r,m") + (match_operand:DWMODE 1 "move_double_src_operand" "r,CalE,m,r"))] + "(gpr_operand (operands[0], mode) + || gpr_operand (operands[1], mode))" + "@ + # + # + ldrd %0,%X1 + strd %1,%X0" + "reload_completed + && (((!MEM_P (operands[0]) || misaligned_operand (operands[0], mode)) + && (!MEM_P (operands[1]) + || misaligned_operand (operands[1], mode))) + || epiphany_vect_align == 4)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int word0 = 0, word1 = UNITS_PER_WORD; + + if (post_modify_operand (operands[0], mode) + || post_modify_operand (operands[1], mode)) + word0 = UNITS_PER_WORD, word1 = 0; + + operands[2] = simplify_gen_subreg (SImode, operands[0], mode, word0); + operands[3] = simplify_gen_subreg (SImode, operands[1], mode, word0); + operands[4] = simplify_gen_subreg (SImode, operands[0], mode, word1); + operands[5] = simplify_gen_subreg (SImode, operands[1], mode, word1); + if (post_modify_operand (operands[0], mode)) + operands[2] + = change_address (operands[2], VOIDmode, + plus_constant (Pmode, XEXP (XEXP (operands[0], 0), 0), + UNITS_PER_WORD)); + if (post_modify_operand (operands[1], mode)) + operands[3] + = change_address (operands[3], VOIDmode, + plus_constant (Pmode, XEXP (XEXP (operands[1], 0), 0), + UNITS_PER_WORD)); +} + [(set_attr "type" "move,move,load,store") + (set_attr "length" "8,16,4,4")]) + + +(define_insn_and_split "*movsf_insn" + [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,m") + (match_operand:SF 1 "move_src_operand" "r,E,m,r"))] + "gpr_operand (operands[0], SFmode) + || gpr_operand (operands[1], SFmode)" + "@ + mov %0,%1 + mov %0,%%low(%1)\;movt %0,%%high(%1) ; %1 + ldr %0,%C1 + str %1,%C0" + "reload_completed && CONSTANT_P (operands[1]) && TARGET_SPLIT_LOHI" + [(set (match_dup 2) (match_dup 3))] + "operands[2] = simplify_gen_subreg (SImode, operands[0], SFmode, 0); + operands[3] = simplify_gen_subreg (SImode, operands[1], SFmode, 0);" + [(set_attr "type" "move,move,load,store") + (set_attr "length" "4,8,4,4")]) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "add_reg_operand" "") + (plus:SI (match_operand:SI 1 "add_reg_operand" "") + (match_operand:SI 2 "add_operand" "")))] + "" + " +{ + if (reload_in_progress || reload_completed) + emit_insn (gen_addsi3_r (operands[0], operands[1], operands[2])); + else if (TARGET_FP_IARITH && add_reg_operand (operands[2], SImode)) + emit_insn (gen_iadd (operands[0], operands[1], operands[2])); + else + emit_insn (gen_addsi3_i (operands[0], operands[1], operands[2])); + DONE; +}") + +; The default case of epiphany_print_operand emits IMMEDIATE_PREFIX +; where appropriate; however, 'n' is processed by output_asm_insn +; which doesn't, so we have to explicitly emit the '# in the +; r/r/CnL output template alternative. +(define_insn "addsi3_i" + [(set (match_operand:SI 0 "add_reg_operand" "=r,r") + (plus:SI (match_operand:SI 1 "add_reg_operand" "%r,r") + (match_operand:SI 2 "add_operand" "rL,CnL"))) + (clobber (reg:CC CC_REGNUM))] + "" + "@ + add %0,%1,%2 + sub %0,%1,#%n2" +[(set_attr "type" "misc")]) + +; We use a clobber of UNKNOWN_REGNUM here so that the peephole optimizers +; can identify the unresolved flags clobber problem, and also to +; avoid unwanted matches. +; +; At -O0 / -O1 we don't peephole all instances away. We could get better +; debug unwinding through the emitted code if we added a splitter. +(define_insn "addsi3_r" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (plus:SI (match_operand:SI 1 "gpr_operand" "%r") + (match_operand:SI 2 "nonmemory_operand" "rCar"))) + (clobber (reg:CC UNKNOWN_REGNUM))] + "reload_in_progress || reload_completed" +{ + int scratch = (0x17 + ^ (true_regnum (operands[0]) & 1) + ^ (true_regnum (operands[1]) & 2) + ^ (true_regnum (operands[2]) & 4)); + asm_fprintf (asm_out_file, "\tstr r%d,[sp,#0]\n", scratch); + asm_fprintf (asm_out_file, "\tmovfs r%d,status\n", scratch); + output_asm_insn ("add %0,%1,%2", operands); + asm_fprintf (asm_out_file, "\tmovts status,r%d\n", scratch); + asm_fprintf (asm_out_file, "\tldr r%d,[sp,#0]\n", scratch); + return ""; +} + [(set_attr "length" "20") + (set_attr "type" "misc")]) + +;; reload uses gen_addsi2 because it doesn't understand the need for +;; the clobber. +(define_peephole2 + [(set (match_operand:SI 0 "gpr_operand" "") + (match_operand:SI 1 "const_int_operand" "")) + (parallel [(set (match_dup 0) + (plus:SI (match_dup 0) + (match_operand:SI 2 "gpr_operand"))) + (clobber (reg:CC UNKNOWN_REGNUM))])] + "satisfies_constraint_L (operands[1]) + || ((operands[2] == stack_pointer_rtx + || (operands[2] == hard_frame_pointer_rtx && frame_pointer_needed)) + && !peep2_regno_dead_p (2, CC_REGNUM) + && satisfies_constraint_Car (operands[1]))" + [(parallel [(set (match_dup 0) + (plus:SI (match_dup 2) (match_dup 1))) + (clobber (reg:CC UNKNOWN_REGNUM))])] + ;; FIXME: + ;; need this patch: http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html + ;; "peep2_rescan = true;" +) + +(define_peephole2 + [(match_parallel 5 "" + [(set (match_operand 3 "cc_operand" "") (match_operand 4 "" ""))]) + (parallel [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:CC UNKNOWN_REGNUM))])] + "REGNO (operands[3]) == CC_REGNUM + && (gpr_operand (operands[2], SImode) + || satisfies_constraint_L (operands[2])) + && !reg_overlap_mentioned_p (operands[0], operands[5]) + && !reg_set_p (operands[1], operands[5]) + && !reg_set_p (operands[2], operands[5])" + [(parallel [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REGNUM))]) + (match_dup 5)] + "") + +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:CC UNKNOWN_REGNUM))])] + "peep2_regno_dead_p (1, CC_REGNUM) + && (gpr_operand (operands[2], SImode) + || satisfies_constraint_L (operands[2]))" + [(parallel [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:CC CC_REGNUM))])] + "") + +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (reg:SI GPR_SP) + (match_operand:SI 1 "nonmemory_operand" ""))) + (clobber (reg:CC UNKNOWN_REGNUM))])] + "(REG_P (operands[1]) && !reg_overlap_mentioned_p (operands[0], operands[1])) + || RTX_OK_FOR_OFFSET_P (mode, operands[1])" + [(set (match_dup 0) (reg:SI GPR_SP)) + (set (mem:WMODE (post_modify (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1)))) + (reg:WMODE GPR_SP))] + "") + + + +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (reg:SI GPR_FP) + (match_operand:SI 1 "nonmemory_operand" ""))) + (clobber (reg:CC UNKNOWN_REGNUM))]) + (match_scratch:WMODE 2 "r")] + "frame_pointer_needed + && ((REG_P (operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[1])) + || RTX_OK_FOR_OFFSET_P (mode, operands[1]))" + [(set (match_dup 0) (reg:SI GPR_FP)) + (set (match_dup 2) + (mem:WMODE (post_modify (match_dup 0) + (plus:SI (match_dup 0) (match_dup 1)))))] + "") + +(define_expand "subsi3" + [(set (match_operand:SI 0 "gpr_operand" "") + (plus:SI (match_operand:SI 1 "add_reg_operand" "") + (match_operand:SI 2 "arith_operand" "")))] + "" + " +{ + gcc_assert (!reload_in_progress && !reload_completed); + + if (TARGET_FP_IARITH) + emit_insn (gen_isub (operands[0], operands[1], operands[2])); + else + emit_insn (gen_subsi3_i (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "subsi3_i" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (minus:SI (match_operand:SI 1 "add_reg_operand" "r") + (match_operand:SI 2 "arith_operand" "rL"))) + (clobber (reg:CC CC_REGNUM))] + "" + "sub %0,%1,%2" + [(set_attr "type" "misc")]) + +; After mode-switching, floating point operations, fp_sfuncs and calls +; must exhibit the use of the control register, lest the setting of the +; control register could be deleted or moved. OTOH a use of a hard register +; greatly counfounds optimizers like the rtl loop optimizers or combine. +; Therefore, we put an extra pass immediately after the mode switching pass +; that inserts the USEs of the control registers, and sets a flag in struct +; machine_function that float_operation can henceforth only match with that +; USE. + +;; Addition +(define_expand "addsf3" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (plus:SF (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*addsf3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (plus:SF (match_operand:SF 1 "gpr_operand" "%r") + (match_operand:SF 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "fadd %0,%1,%2" + [(set_attr "type" "fp")]) + +;; Subtraction +(define_expand "subsf3" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (minus:SF (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*subsf3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (minus:SF (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "fsub %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_expand "subsf3_f" + [(parallel + [(set (reg:CC_FP CCFP_REGNUM) + (compare:CC_FP (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r"))) + (set (match_operand:SF 0 "gpr_operand" "=r") + (minus:SF (match_dup 1) (match_dup 2)))])] + "!TARGET_SOFT_CMPSF") + +(define_insn "*subsf3_f_i" + [(match_parallel 3 "float_operation" + [(set (reg:CC_FP CCFP_REGNUM) + (compare:CC_FP (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r"))) + (set (match_operand:SF 0 "gpr_operand" "=r") + (minus:SF (match_dup 1) (match_dup 2)))])] + "!TARGET_SOFT_CMPSF" + "fsub %0,%1,%2" + [(set_attr "type" "fp")]) + +; There is an fabs instruction, but it has longer latency. +(define_expand "abssf2" + [(set (match_operand:SF 0 "gpr_operand" "") + (abs:SF (match_operand:SF 1 "gpr_operand" "")))] + "" + " +{ + rtx op1 = copy_to_mode_reg (SImode, simplify_gen_subreg (SImode, operands[1], + SFmode, 0)); + rtx op0 = simplify_gen_subreg (SImode, operands[0], SFmode, 0); + + emit_insn (gen_ashlsi3 (op1, op1, const1_rtx)); + emit_insn (gen_lshrsi3 (op0, op1, const1_rtx)); + DONE; +}") + +;; Multiplication +(define_expand "mulsf3" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (mult:SF (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*mulsf3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (mult:SF (match_operand:SF 1 "gpr_operand" "%r") + (match_operand:SF 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "fmul %0,%1,%2" + [(set_attr "type" "fp")]) + +;; Division +(define_expand "divsf3" + [(set (match_operand:SF 0 "gpr_operand" "") + (div:SF (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" "")))] + "flag_reciprocal_math" +{ + rtx one = CONST1_RTX (SFmode); + rtx dst = operands[0]; + + if (rtx_equal_p (dst, operands[1])) + { + emit_move_insn (dst, one); + DONE; + } + else if (!register_operand (dst, SFmode) && can_create_pseudo_p ()) + dst = gen_reg_rtx (SFmode); + emit_insn (gen_recipsf2 (dst, one, operands[2], + sfunc_symbol (\"__fast_recipsf2\"))); + emit_insn (gen_mulsf3 (operands[0], operands[1], dst)); + DONE; +}) + +;; Before reload, keep the hard reg usage to clobbers so that the loop +;; optimizers can more easily move this insn. +;; It would be nicer to use a constraint for a GPR_0 - only register class, +;; but sched1 can still cause trouble then, and there is no guarantee of +;; better register allocations. +;; Neither is there when using the opposite strategy - putting explicit +;; hard register references into pre-reload rtl. +(define_expand "recipsf2" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (div:SF (match_operand:SF 1 "const_float_1_operand" "") + (match_operand:SF 2 "move_src_operand" ""))) + (use (match_operand:SI 3 "move_src_operand" "")) + (clobber (reg:SF 0)) + (clobber (reg:SI 1)) + (clobber (reg:SF GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn_and_split "*recipsf2_1" + [(match_parallel 4 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r,r") + (div:SF (match_operand:SF 1 "const_float_1_operand" "") + (match_operand:SF 2 "move_src_operand" "rU16m,rU16mCal"))) + (use (match_operand:SI 3 "move_src_operand" "rU16m,rU16mCal")) + (clobber (reg:SF 0)) + (clobber (reg:SI 1)) + (clobber (reg:SF GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "flag_reciprocal_math" + "#" + "&& reload_completed" + [(set (reg:SI 1) (match_dup 3)) + (set (reg:SF 0) (match_dup 2)) + (parallel + [(set (reg:SF 0) + (div:SF (match_dup 1) + (reg:SF 0))) + (use (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 5) + (match_dup 6)]) + (set (match_dup 0) (reg:SF 0))] + "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2); + operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);" + [(set_attr "type" "fp_sfunc") + (set_attr "length" "16,24")]) + +(define_insn "*recipsf2_2" + [(match_parallel 1 "float_operation" + [(set (reg:SF 0) + (div:SF (match_operand:SF 0 "const_float_1_operand" "") + (reg:SF 0))) + (use (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "flag_reciprocal_math" + "jalr r1" + [(set_attr "type" "fp_sfunc")]) + + +;; Fused multiply-add +(define_expand "fmasf4" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (fma:SF (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" "") + (match_operand:SF 3 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "") + +; The multiply operands are commutative, but since they have the +; same constraints, there is no point in telling reload about this. +(define_insn "*fmadd" + [(match_parallel 4 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (fma:SF (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r") + (match_operand:SF 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "fmadd %0,%1,%2" + [(set_attr "type" "fp")]) + +; Once vetorization consistently works for this port, should check +; if the fmadd / fmsub patterns still serve a purpose. With the +; introduction of fma / fnma handling by the SSA optimizers, +; at least scalars should be handled by these optimizers, would +; have to see how well they do on vectors from auto-vectorization. +; +; combiner pattern, also used by vector combiner pattern +(define_expand "maddsf" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "=r") + (plus:SF (mult:SF (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r")) + (match_operand:SF 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "TARGET_FUSED_MADD") + +(define_insn "*maddsf_combine" + [(match_parallel 4 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (plus:SF (mult:SF (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r")) + (match_operand:SF 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "TARGET_FUSED_MADD" + "fmadd %0,%1,%2" + [(set_attr "type" "fp")]) + +;; Fused multiply-sub +(define_expand "fnmasf4" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (fma:SF (neg:SF (match_operand:SF 1 "gpr_operand" "")) + (match_operand:SF 2 "gpr_operand" "") + (match_operand:SF 3 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "") + +(define_insn "*fmsub" + [(match_parallel 4 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (fma:SF (neg:SF (match_operand:SF 1 "gpr_operand" "r")) + (match_operand:SF 2 "gpr_operand" "r") + (match_operand:SF 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "fmsub %0,%1,%2" + [(set_attr "type" "fp")]) + +(define_insn "*fmsub_combine" + [(match_parallel 4 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (minus:SF (match_operand:SF 3 "gpr_operand" "0") + (mult:SF (match_operand:SF 1 "gpr_operand" "r") + (match_operand:SF 2 "gpr_operand" "r")))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "TARGET_FUSED_MADD" + "fmsub %0,%1,%2" + [(set_attr "type" "fp")]) + +;; float / integer conversions + +(define_expand "floatsisf2" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (float:SF (match_operand:SI 1 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*floatsisf2_i" + [(match_parallel 2 "float_operation" + [(set (match_operand:SF 0 "gpr_operand" "=r") + (float:SF (match_operand:SI 1 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "float %0, %1" + [(set_attr "type" "fp")]) + +(define_expand "floatsisf2_cmp" + [(parallel + [(set (reg:CC_FP CCFP_REGNUM) + (compare:CC_FP (float:SF (match_operand:SF 1 "gpr_operand" "r")) + (match_dup 2))) + (set (match_operand:SF 0 "gpr_operand" "=r") + (float:SF (match_dup 1)))])] + "" + "operands[2] = CONST0_RTX (SFmode);") + +(define_insn "*floatsisf2_cmp_i" + [(match_parallel 3 "float_operation" + [(set (reg:CC_FP CCFP_REGNUM) + (compare:CC_FP (float:SF (match_operand:SF 1 "gpr_operand" "r")) + (match_operand:SF 2 "const0_operand" ""))) + (set (match_operand:SF 0 "gpr_operand" "=r") + (float:SF (match_dup 1)))])] + "" + "float %0, %1" + [(set_attr "type" "fp")]) + +(define_expand "floatunssisf2" + [(set (match_operand:SF 0 "gpr_operand" "") + (float:SF (match_operand:SI 1 "gpr_operand" "")))] + "epiphany_normal_fp_rounding == /*FP_MODE_ROUND_TRUNC*/ 2" +{ + rtx cst = force_reg (SImode, gen_int_mode (0xb0800000, SImode)); + rtx tmp = gen_reg_rtx (SImode); + rtx cmp = gen_rtx_GTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + + if (reg_overlap_mentioned_p (operands[0], operands[1])) + operands[1] = copy_to_mode_reg (SImode, operands[1]); + emit_insn (gen_floatsisf2 (operands[0], operands[1])); + emit_insn (gen_ashrsi3 (tmp, operands[1], GEN_INT (8))); + emit_insn (gen_sub_f (tmp, tmp, cst)); + emit_insn (gen_movsfcc (operands[0], cmp, + simplify_gen_subreg (SFmode, tmp, SImode, 0), + operands[0])); + DONE; +}) + +(define_expand "fix_truncsfsi2" + [(parallel + [(set (match_operand:SI 0 "gpr_operand" "") + (fix:SI (match_operand:SF 1 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*fix_truncsfsi2_i" + [(match_parallel 2 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (fix:SI (match_operand:SF 1 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "fix %0, %1" + [(set_attr "type" "fp") + (set (attr "fp_mode") + (cond [(match_test "TARGET_MAY_ROUND_FOR_TRUNC") + (const_string "round_unknown")] + (const_string "round_trunc")))]) + +(define_expand "fixuns_truncsfsi2" + [(set (match_operand:SI 0 "gpr_operand" "") + (unsigned_fix:SI (match_operand:SF 1 "gpr_operand" "")))] + "" +{ + if (reg_overlap_mentioned_p (operands[0], operands[1])) + operands[1] = copy_to_mode_reg (SImode, operands[1]); + if (TARGET_SOFT_CMPSF || optimize_function_for_speed_p (cfun)) + { + rtx op1si; + /* By toggling what it to be bit31 before the shift, we get a chance to + use a short movt insn. */ + rtx bit31 = force_reg (SImode, GEN_INT (0x800000)); + rtx tmp = gen_reg_rtx (SImode); + rtx limit = force_reg (SImode, gen_int_mode (0x4f000000, SImode)); + rtx cmp + = gen_rtx_GE (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + + op1si = simplify_gen_subreg (SImode, operands[1], SFmode, 0); + emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1])); + emit_insn (gen_subsi3_i (tmp, op1si, bit31)); + emit_insn (gen_ashlsi3 (tmp, tmp, GEN_INT (8))); + emit_insn (gen_cmpsi_cc_insn (op1si, limit)); + emit_insn (gen_movsicc (operands[0], cmp, tmp, operands[0])); + } + else + { + REAL_VALUE_TYPE offset; + rtx limit; + rtx tmp = gen_reg_rtx (SFmode); + rtx label = gen_label_rtx (); + rtx bit31; + rtx cc1 = gen_rtx_REG (CC_FPmode, CCFP_REGNUM); + rtx cmp = gen_rtx_LT (VOIDmode, cc1, CONST0_RTX (SFmode)); + + real_2expN (&offset, 31, SFmode); + limit = CONST_DOUBLE_FROM_REAL_VALUE (offset, SFmode); + limit = force_reg (SFmode, limit); + emit_insn (gen_fix_truncsfsi2 (operands[0], operands[1])); + emit_insn (gen_subsf3_f (tmp, operands[1], limit)); + emit_jump_insn (gen_branch_insn (label, cmp, cc1)); + bit31 = force_reg (SImode, gen_int_mode (0x80000000, SImode)); + emit_insn (gen_fix_truncsfsi2 (operands[0], tmp)); + emit_insn (gen_xorsi3 (operands[0], operands[0], bit31)); + emit_label (label); + } + DONE; +}) + +(define_expand "iadd" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (plus:SI (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*iadd_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (plus:SI (match_operand:SI 1 "gpr_operand" "%r") + (match_operand:SI 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "iadd %0, %1, %2" + [(set_attr "type" "fp_int")]) + +(define_expand "isub" + [(parallel + [(set (match_operand:SF 0 "gpr_operand" "") + (minus:SI (match_operand:SF 1 "gpr_operand" "") + (match_operand:SF 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*isub_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (minus:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "isub %0, %1, %2" + [(set_attr "type" "fp_int")]) + +; Try to figure out if we over-committed the FPU, and if so, move +; some insns back over to the integer pipe. + +; The peephole optimizer 'consumes' the insns that are explicitly +; mentioned. We do not want the preceding insn reconsidered, but +; we do want that for the following one, so that if we have a run +; of five fpu users, two of them get changed. Therefore, we +; use next_active_insn to look at the 'following' insn. That should +; exist, because peephole2 runs after reload, and there has to be +; a return after an fp_int insn. +; ??? However, we can not even ordinarily match the preceding insn; +; there is some bug in the generators such that then it leaves out +; the check for PARALLEL before the length check for the then-second +; main insn. Observed when compiling compatibility-atomic-c++0x.cc +; from libstdc++-v3. +(define_peephole2 + [(match_parallel 3 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "") + (match_operator:SI 4 "addsub_operator" + [(match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "gpr_operand" "")])) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0))) + && peep2_regno_dead_p (1, CC_REGNUM) + && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0)))" + [(parallel [(set (match_dup 0) (match_dup 4)) + (clobber (reg:CC CC_REGNUM))])] +) + +(define_peephole2 + [(match_parallel 3 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "") + (mult:SI + (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "prev_active_insn (peep2_next_insn (0)) + && get_attr_sched_use_fpu (prev_active_insn (peep2_next_insn (0))) + && peep2_regno_dead_p (1, CC_REGNUM) + && get_attr_sched_use_fpu (next_active_insn (peep2_next_insn (0))) + && find_reg_note (insn, REG_EQUAL, NULL_RTX) != NULL_RTX + && GET_CODE (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0)) == MULT + && CONST_INT_P (XEXP (XEXP (find_reg_note (insn, REG_EQUAL, NULL_RTX), 0), + 1))" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 4))) + (clobber (reg:CC CC_REGNUM))])] +{ + operands[4] + = XEXP (XEXP (find_reg_note (curr_insn, REG_EQUAL, NULL_RTX), 0), 1); +}) + +(define_expand "mulsi3" + [(parallel + [(set (match_operand:SI 0 "gpr_operand" "") + (mult:SI (match_operand:SI 1 "gpr_operand" "") + (match_operand:SI 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn "*imul" + [(match_parallel 3 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (mult:SI (match_operand:SI 1 "gpr_operand" "%r") + (match_operand:SI 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "imul %0, %1, %2" + [(set_attr "type" "fp_int")]) + +; combiner pattern, also used by vector combiner pattern +(define_expand "maddsi" + [(parallel + [(set (match_operand:SI 0 "gpr_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r")) + (match_operand:SI 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "") + +(define_insn "*maddsi_combine" + [(match_parallel 4 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (plus:SI (mult:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r")) + (match_operand:SI 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "imadd %0, %1, %2" + [(set_attr "type" "fp_int")]) + +(define_insn "*imsub" + [(match_parallel 4 "float_operation" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (minus:SI (match_operand:SI 3 "gpr_operand" "0") + (mult:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r")))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "imsub %0, %1, %2" + [(set_attr "type" "fp_int")]) + +(define_expand "divsi3" + [(parallel + [(set (match_operand:SI 0 "move_dest_operand" "") + (div:SI (match_operand:SI 1 "move_src_operand" "") + (match_operand:SI 2 "move_src_operand" ""))) + (use (match_dup 3)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "operands[3] = sfunc_symbol (\"__divsi3\");") + +;; Before reload, keep the hard reg usage to clobbers so that the loop +;; optimizers can more easily move this insn. +(define_insn_and_split "*divsi3_1" + [(match_parallel 4 "float_operation" + [(set (match_operand:SI 0 "move_dest_operand" "=r,r") + (div:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal") + (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal"))) + (use (match_operand:SI 3 "call_address_operand" "Csy,r")) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "&& reload_completed" + [(set (reg:SI 0) (match_dup 1)) + (set (reg:SI 1) (match_dup 2)) + (parallel + [(set (reg:SI 0) (div:SI (reg:SI 0) (reg:SI 1))) + (use (match_dup 3)) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 5) + (match_dup 6)]) + (set (match_dup 0) (reg:SI 0))] + "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2); + operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);" + [(set_attr "type" "fp_sfunc") + (set_attr "length" "16,24")]) + +(define_insn "*divsi3_2" + [(match_parallel 1 "float_operation" + [(set (reg:SI 0) (div:SI (reg:SI 0) (reg:SI 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_20)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "%f0" + [(set_attr "type" "fp_sfunc")]) + +(define_expand "udivsi3" + [(parallel + [(set (match_operand:SI 0 "move_dest_operand" "") + (udiv:SI (match_operand:SI 1 "move_src_operand" "") + (match_operand:SI 2 "move_src_operand" ""))) + (use (match_dup 3)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "operands[3] = sfunc_symbol (\"__udivsi3\");") + +;; Before reload, keep the hard reg usage to clobbers so that the loop +;; optimizers can more easily move this insn. +(define_insn_and_split "*udivsi3_1" + [(match_parallel 4 "float_operation" + [(set (match_operand:SI 0 "move_dest_operand" "=r,r") + (udiv:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal") + (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal"))) + (use (match_operand:SI 3 "call_address_operand" "Csy,r")) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "&& reload_completed" + [(set (reg:SI 0) (match_dup 1)) + (set (reg:SI 1) (match_dup 2)) + (parallel + [(set (reg:SI 0) (udiv:SI (reg:SI 0) (reg:SI 1))) + (use (match_dup 3)) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 5) + (match_dup 6)]) + (set (match_dup 0) (reg:SI 0))] + "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2); + operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);" + [(set_attr "type" "fp_sfunc") + (set_attr "length" "16,24")]) + +(define_insn "*udivsi3_2" + [(match_parallel 1 "float_operation" + [(set (reg:SI 0) (udiv:SI (reg:SI 0) (reg:SI 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI 1)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "%f0" + [(set_attr "type" "fp_sfunc")]) + +(define_expand "modsi3" + [(parallel + [(set (match_operand:SI 0 "move_dest_operand" "") + (mod:SI (match_operand:SI 1 "move_src_operand" "") + (match_operand:SI 2 "move_src_operand" ""))) + (use (match_dup 3)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "operands[3] = sfunc_symbol (\"__modsi3\");") + +;; Before reload, keep the hard reg usage to clobbers so that the loop +;; optimizers can more easily move this insn. +(define_insn_and_split "*modsi3_1" + [(match_parallel 4 "float_operation" + [(set (match_operand:SI 0 "move_dest_operand" "=r,r") + (mod:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal") + (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal"))) + (use (match_operand:SI 3 "call_address_operand" "Csy,r")) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "&& reload_completed" + [(set (reg:SI 0) (match_dup 1)) + (set (reg:SI 1) (match_dup 2)) + (parallel + [(set (reg:SI 0) (mod:SI (reg:SI 0) (reg:SI 1))) + (use (match_dup 3)) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 5) + (match_dup 6)]) + (set (match_dup 0) (reg:SI 0))] + "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2); + operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);" + [(set_attr "type" "fp_sfunc") + (set_attr "length" "16,24")]) + +(define_insn "*modsi3_2" + [(match_parallel 1 "float_operation" + [(set (reg:SI 0) (mod:SI (reg:SI 0) (reg:SI 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:DI GPR_18)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "%f0" + [(set_attr "type" "fp_sfunc")]) + +(define_expand "umodsi3" + [(parallel + [(set (match_operand:SI 0 "move_dest_operand" "") + (umod:SI (match_operand:SI 1 "move_src_operand" "") + (match_operand:SI 2 "move_src_operand" ""))) + (use (match_dup 3)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "operands[3] = sfunc_symbol (\"__umodsi3\");") + +;; Before reload, keep the hard reg usage to clobbers so that the loop +;; optimizers can more easily move this insn. +(define_insn_and_split "*umodsi3_1" + [(match_parallel 4 "float_operation" + [(set (match_operand:SI 0 "move_dest_operand" "=r,r") + (umod:SI (match_operand:SI 1 "move_src_operand" "rU16m,rU16mCal") + (match_operand:SI 2 "move_src_operand" "rU16m,rU16mCal"))) + (use (match_operand:SI 3 "call_address_operand" "Csy,r")) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "&& reload_completed" + [(set (reg:SI 0) (match_dup 1)) + (set (reg:SI 1) (match_dup 2)) + (parallel + [(set (reg:SI 0) (umod:SI (reg:SI 0) (reg:SI 1))) + (use (match_dup 3)) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 5) + (match_dup 6)]) + (set (match_dup 0) (reg:SI 0))] + "operands[5] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2); + operands[6] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1);" + [(set_attr "type" "fp_sfunc") + (set_attr "length" "16,24")]) + +(define_insn "*umodsi3_2" + [(match_parallel 1 "float_operation" + [(set (reg:SI 0) (umod:SI (reg:SI 0) (reg:SI 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI 2)) + (clobber (reg:SI GPR_IP)) + (clobber (reg:DI GPR_16)) + (clobber (reg:SI GPR_LR)) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "%f0" + [(set_attr "type" "fp_sfunc")]) + +; Disable interrupts. +; Any earlier values read from CONFIG_REGNUM are out of date, since interrupts +; might have changed settings that we do not want to mess with. +(define_insn "gid" + [(set (reg:SI CONFIG_REGNUM) + (unspec_volatile:SI [(const_int 0)] UNSPECV_GID))] + "" + "gid" + [(set_attr "type" "flow")]) + +; Enable interrupts. +; Present CONTROL_REGNUM here to make sure it is live before the +; actual uses in floating point insns / calls are inserted. +; FWIW, interrupts also do mind what is in the control register. +(define_insn "gie" + [(unspec_volatile [(reg:SI CONFIG_REGNUM)] UNSPECV_GIE)] + "" + "gie" + [(set_attr "type" "flow")]) + +; Floating point instructions require manipulating the control register. +; Manipulating the control register needs aritmetic. +; Arithmetic clobbers flags. +; The flags are in the status register, which also contains the alternate +; flag and the interrupt enable/disable bits. +; saving/restoring status and mixing up the order with gid/gie could +; lead to disaster. +; Usually, saving/restoring the status is unnecessary, and will be optimized +; away. But when we really need it, we must make sure that we don't change +; anything but the flags. +; N.B.: We could make the constant easier to load by inverting it, but +; then we'd need to clobber the saved value - and that would make optimizing +; away unneeded saves/restores harder / less likely. +(define_expand "movcc" + [(parallel [(set (match_operand:CC 0 "cc_move_operand" "") + (match_operand:CC 1 "cc_move_operand" "")) + (use (match_dup 2)) + (clobber (match_scratch:SI 3 "=X, &r"))])] + "" + "operands[2] = gen_int_mode (~0x10f0, SImode);") + +(define_insn "*movcc_i" + [(set (match_operand:CC 0 "cc_move_operand" "=r,Rcc") + (match_operand:CC 1 "cc_move_operand" "Rcc, r")) + (use (match_operand:SI 2 "nonmemory_operand" "X, r")) + (clobber (match_scratch:SI 3 "=X, &r"))] + "" + "@ + movfs %0,status + movfs %3,status\;eor %3,%3,%1\;and %3,%3,%2\;eor %3,%3,%1\;movts status,%3" + [(set_attr "type" "flow") + (set_attr "length" "20,4")]) + +(define_insn_and_split "save_config" + [(set (match_operand:SI 0 "gpr_operand" "=r") (reg:SI CONFIG_REGNUM)) + (use (reg:SI FP_NEAREST_REGNUM)) + (use (reg:SI FP_TRUNCATE_REGNUM)) + (use (reg:SI FP_ANYFP_REGNUM))] + "" + "#" + "reload_completed" + [(set (match_dup 0) (reg:SI CONFIG_REGNUM))]) + +(define_insn_and_split "set_fp_mode" + [(set (reg:SI FP_NEAREST_REGNUM) + (match_operand:SI 0 "set_fp_mode_operand" "rCfm")) + (set (reg:SI FP_TRUNCATE_REGNUM) (match_dup 0)) + (set (reg:SI FP_ANYFP_REGNUM) + (match_operand:SI 1 "set_fp_mode_operand" "rCfm")) + (use (match_operand:SI 2 "gpr_operand" "r")) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 3 "=&r"))] + "" + "#" + "reload_completed || !rtx_equal_p (operands[0], operands[1])" + [(const_int 0)] +{ + if (!reload_completed) + emit_note (NOTE_INSN_DELETED); + else + epiphany_expand_set_fp_mode (operands); + DONE; +}) + + +;; Boolean instructions. +;; +;; We don't define the DImode versions as expand_binop does a good enough job. + +(define_insn "andsi3" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (and:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "and %0,%1,%2") + +(define_insn "iorsi3" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (ior:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "orr %0,%1,%2") + +(define_insn "xorsi3" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (xor:SI (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "gpr_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "eor %0,%1,%2") + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "gpr_operand" "") + (xor:SI (match_operand:SI 1 "gpr_operand" "") + (match_dup 2)))] + "" +{ + if (epiphany_m1reg >= 0) + emit_insn (gen_one_cmplsi2_i (operands[0], operands[1])); + else + emit_insn (gen_xorsi3 (operands[0], operands[1], + force_reg (SImode, GEN_INT (-1)))); + DONE; +}) + +; Note that folding this pattern into the xorsi3 pattern would make combine +; less effective. +(define_insn "one_cmplsi2_i" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (not:SI (match_operand:SI 1 "gpr_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "epiphany_m1reg >= 0" + "eor %0,%1,%-") + +;; Shift instructions. +;; In principle we could support arbitrary symbolic values as shift constant +;; (truncating the value appropriately), but that would require a suitable +;; relocation and assembler & linker support. +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "gpr_operand" "=r,r") + (ashiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,K"))) + (clobber (reg:CC CC_REGNUM))] + "" + "asr %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "shift")]) + +(define_insn "ashrsi3_tst" + [(set (reg:CC CC_REGNUM) + (compare:CC + (ashiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,K")) + (const_int 0))) + (set (match_operand:SI 0 "gpr_operand" "=r,r") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "" + "asr %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "shift")]) + +;; Logical Shift Right +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "gpr_operand" "=r,r") + (lshiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,K"))) + (clobber (reg:CC CC_REGNUM))] + "" + "lsr %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "shift")]) + +(define_insn "lshrsi3_tst" + [(set (reg:CC CC_REGNUM) + (compare:CC + (lshiftrt:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,K")) + (const_int 0))) + (set (match_operand:SI 0 "gpr_operand" "=r,r") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "" + "lsr %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "shift")]) + +;; Logical/Arithmetic Shift Left +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "gpr_operand" "=r,r") + (ashift:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,K"))) + (clobber (reg:CC CC_REGNUM))] + "" + "lsl %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "shift")]) + +(define_insn "*ashlsi_btst" + [(set (reg:CC_N_NE CC_REGNUM) + (compare:CC_N_NE + (zero_extract:SI (match_operand:SI 1 "gpr_operand" "r") + (const_int 1) + (match_operand 2 "const_int_operand" "K")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "" +{ + rtx xop[3]; + + xop[0] = operands[0]; + xop[1] = operands[1]; + xop[2] = GEN_INT (31-INTVAL (operands[2])); + output_asm_insn ("lsl %0,%1,%2", xop); + return ""; +}) + +;; zero extensions +(define_insn_and_split "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "r,m"))) + (clobber (reg:CC CC_REGNUM))] + "" + "@ + # + ldrb %0,%1" + "reload_completed + ? true_regnum (operands[1]) >= 0 + : REG_P (operands[1]) && REGNO (operands[1]) < FIRST_PSEUDO_REGISTER" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (clobber (reg:CC CC_REGNUM))]) + (parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 24))) + (clobber (reg:CC CC_REGNUM))])] + "operands[2] = simplify_gen_subreg (SImode, operands[1], QImode, 0);") + +(define_insn "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,m")))] + "" + "@ + movt %0, 0 + ldrh %0,%c1") + + +;; Compare instructions. + +(define_insn "cmpsi_cc_insn" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 0 "add_reg_operand" "r,r") + (match_operand:SI 1 "arith_operand" "r,L"))) + (clobber (match_scratch:SI 2 "=r,r"))] + "" + "sub %2,%0,%1" + [(set_attr "type" "compare")]) + +(define_insn "sub_f" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,L"))) + (set (match_operand:SI 0 "gpr_operand" "=r,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "" + "sub %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_insn "*sub_f_add_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "gpr_operand" "r") + (match_operand:SI 2 "arith_int_operand" "L"))) + (set (match_operand:SI 0 "gpr_operand" "=r") + (plus:SI (match_dup 1) (match_operand:SI 3 "const_int_operand" "L")))] + "INTVAL (operands[2]) == -INTVAL (operands[3])" + "sub %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_expand "abssi2" + [(set (match_dup 2) (const_int 0)) + (parallel [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 2) + (match_operand:SI 1 "nonmemory_operand" ""))) + (set (match_dup 3) + (minus:SI (match_dup 2) (match_dup 1)))]) + (set (match_operand:SI 0 "gpr_operand" "=r") + (if_then_else:SI (gt:SI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 3) + (match_dup 1)))] + "TARGET_CMOVE" + "operands[2] = gen_reg_rtx (SImode); operands[3] = gen_reg_rtx (SImode);") + +(define_insn "*add_c" + [(set (reg:CC_C_LTU CC_REGNUM) + (compare:CC_C_LTU + (plus:SI (match_operand:SI 1 "gpr_operand" "%r,r") + (match_operand:SI 2 "arith_operand" "r,L")) + (match_dup 1))) + (set (match_operand:SI 0 "gpr_operand" "=r,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "" + "add %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_insn "*add_c_rev" + [(set (reg:CC_C_LTU CC_REGNUM) + (compare:CC_C_LTU + (plus:SI (match_operand:SI 1 "gpr_operand" "%r,r") + (match_operand:SI 2 "arith_operand" "r,L")) + (match_dup 1))) + (set (match_operand:SI 0 "gpr_operand" "=r,r") + (plus:SI (match_dup 2) (match_dup 1)))] + "" + "add %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_insn "*sub_c" + [(set (reg:CC_C_GTU CC_REGNUM) + (compare:CC_C_GTU + (minus:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,L")) + (match_dup 1))) + (set (match_operand:SI 0 "gpr_operand" "=r,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "" + "sub %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_insn "*sub_c_void" + [(set (reg:CC_C_GTU CC_REGNUM) + (compare:CC_C_GTU + (minus:SI (match_operand:SI 1 "gpr_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,L")) + (match_dup 1))) + (clobber (match_scratch:SI 0 "=r,r"))] + "" + "sub %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_code_iterator logical_op + [and ior xor]) + +(define_code_attr op_mnc + [(plus "add") (minus "sub") (and "and") (ior "orr") (xor "eor")]) + +(define_insn "*_f" + [(set (reg:CC CC_REGNUM) + (compare:CC (logical_op:SI (match_operand:SI 1 "gpr_operand" "%r") + (match_operand:SI 2 "gpr_operand" "r")) + (const_int 0))) + (set (match_operand:SI 0 "gpr_operand" "=r") + (logical_op:SI (match_dup 1) (match_dup 2)))] + "" + " %0,%1,%2" + [(set_attr "type" "compare")]) + +(define_insn_and_split "*mov_f" + [(set (reg:CC CC_REGNUM) + (compare:CC (match_operand:SI 1 "gpr_operand" "r") (const_int 0))) + (set (match_operand:SI 0 "gpr_operand" "=r") (match_dup 1))] + "" + "#" + "reload_completed" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (and:SI (match_dup 1) (match_dup 1)) (const_int 0))) + (set (match_operand:SI 0 "gpr_operand" "=r") + (and:SI (match_dup 1) (match_dup 1)))])] + "" + [(set_attr "type" "compare")]) + +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "gpr_operand") + (logical_op:SI (match_operand:SI 1 "gpr_operand") + (match_operand:SI 2 "gpr_operand"))) + (clobber (reg:CC CC_REGNUM))]) + (parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (and:SI (match_dup 0) (match_dup 0)) (const_int 0))) + (set (match_operand:SI 3 "gpr_operand") + (and:SI (match_dup 0) (match_dup 0)))])] + "peep2_reg_dead_p (2, operands[0])" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (logical_op:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 3) (logical_op:SI (match_dup 1) (match_dup 2)))])]) + +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "gpr_operand") + (logical_op:SI (match_operand:SI 1 "gpr_operand") + (match_operand:SI 2 "gpr_operand"))) + (clobber (reg:CC CC_REGNUM))]) + (parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (and:SI (match_dup 0) (match_dup 0)) (const_int 0))) + (set (match_operand:SI 3 "gpr_operand") + (and:SI (match_dup 0) (match_dup 0)))])] + "peep2_reg_dead_p (2, operands[3])" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (logical_op:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (logical_op:SI (match_dup 1) (match_dup 2)))])]) + +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "gpr_operand") + (logical_op:SI (match_operand:SI 1 "gpr_operand") + (match_operand:SI 2 "gpr_operand"))) + (clobber (reg:CC CC_REGNUM))]) + (parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 0) (const_int 0))) + (clobber (match_operand:SI 3 "gpr_operand"))])] + "" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (logical_op:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) (logical_op:SI (match_dup 1) (match_dup 2)))])]) + +(define_expand "cstoresi4" + [(parallel + [(set (reg:CC CC_REGNUM) + (match_operand:SI 1 "comparison_operator")) + (match_operand:SI 2 "" "")]) + (set (match_dup 0) (match_operand:SI 3 "arith_operand" "")) + (set (match_operand:SI 0 "gpr_operand" "=r") + (if_then_else:SI (match_dup 4) (match_dup 5) (match_dup 0)))] + "" +{ + enum rtx_code o2_code = GET_CODE (operands[2]); + enum rtx_code cmp_code = GET_CODE (operands[1]); + + if ((o2_code == AND || o2_code == IOR || o2_code == XOR) + && operands[3] == const0_rtx) + { + operands[2] = copy_rtx(operands[2]); + XEXP (operands[2], 0) = force_reg (SImode, XEXP (operands[2], 0)); + XEXP (operands[2], 1) = force_reg (SImode, XEXP (operands[2], 1)); + } + else + operands[2] = force_reg (SImode, operands[2]); + operands[1] = gen_rtx_COMPARE (CCmode, operands[2], operands[3]); + if (cmp_code != NE) + { + operands[2] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)); + operands[3] = const0_rtx; + } + else + { + if (operands[3] != const0_rtx) + operands[2] = gen_rtx_MINUS (SImode, operands[2], operands[3]); + operands[2] = gen_rtx_SET (VOIDmode, operands[0], operands[2]); + operands[3] = operands[0]; + } + operands[4] = gen_rtx_fmt_ee (cmp_code, SImode, + gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + operands[5] = force_reg (SImode, GEN_INT (STORE_FLAG_VALUE)); +}) + + +; floating point comparisons + +(define_insn "*cmpsf_cc_insn" + [(match_parallel 3 "float_operation" + [(set (reg:CC_FP CCFP_REGNUM) + (compare:CC_FP (match_operand:SF 0 "gpr_operand" "r") + (match_operand:SF 1 "gpr_operand" "r"))) + (clobber (match_scratch:SF 2 "=r"))])] + "!TARGET_SOFT_CMPSF" + "fsub %2,%0,%1" + [(set_attr "type" "fp") + (set_attr "fp_mode" "round_unknown")]) + +;; ??? do we have to relax the operand0 predicate to immediate_operand +;; to allow the rtl loop optimizer to generate comparisons? OTOH +;; we want call_address_operand to enforce valid operands so that +;; combine won't do silly things, allowing instruction scheduling to do +;; a proper job. +(define_insn "*cmpsf_eq" + [(set (reg:CC_FP_EQ CC_REGNUM) (compare:CC_FP_EQ (reg:SF 0) (reg:SF 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI GPR_IP)) + (clobber (reg:SI GPR_LR))] + "TARGET_SOFT_CMPSF" + "%f0" + [(set_attr "type" "sfunc")]) + +(define_insn "*cmpsf_gte" + [(set (reg:CC_FP_GTE CC_REGNUM) (compare:CC_FP_GTE (reg:SF 0) (reg:SF 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI GPR_IP)) + (clobber (reg:SI GPR_LR))] + "TARGET_SOFT_CMPSF" + "%f0" + [(set_attr "type" "sfunc")]) + +(define_insn "*cmpsf_ord" + [(set (reg:CC_FP_ORD CC_REGNUM) (compare:CC_FP_ORD (reg:SF 0) (reg:SF 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI GPR_IP)) + (clobber (reg:SI GPR_16)) + (clobber (reg:SI GPR_LR))] + "" + "%f0" + [(set_attr "type" "sfunc")]) + +(define_insn "*cmpsf_uneq" + [(set (reg:CC_FP_UNEQ CC_REGNUM) (compare:CC_FP_UNEQ (reg:SF 0) (reg:SF 1))) + (use (match_operand:SI 0 "call_address_operand" "Csy,r")) + (clobber (reg:SI GPR_IP)) + (clobber (reg:SI GPR_16)) + (clobber (reg:SI GPR_LR))] + "TARGET_SOFT_CMPSF" + "%f0" + [(set_attr "type" "sfunc")]) + +;; conditional moves + +(define_expand "movcc" + [(set (match_operand:WMODE 0 "gpr_operand" "") + (if_then_else:WMODE (match_operand 1 "comparison_operator" "") + (match_operand:WMODE 2 "gpr_operand" "") + (match_operand:WMODE 3 "gpr_operand" "")))] + "TARGET_CMOVE" +{ + rtx cmp_op0 = XEXP (operands[1], 0); + rtx cmp_op1 = XEXP (operands[1], 1); + enum machine_mode cmp_in_mode; + enum rtx_code code = GET_CODE (operands[1]); + + cmp_in_mode = GET_MODE (cmp_op0); + if (cmp_in_mode == VOIDmode) + cmp_in_mode = GET_MODE (cmp_op1); + if (cmp_in_mode == VOIDmode) + cmp_in_mode = SImode; + /* If the operands are a better match when reversed, swap them now. + This allows combine to see the proper comparison codes. */ + if (rtx_equal_p (operands[0], operands[2]) + && !rtx_equal_p (operands[0], operands[3])) + { + rtx tmp = operands[2]; operands[2] = operands[3]; operands[3] = tmp; + code = (FLOAT_MODE_P (GET_MODE (cmp_op0)) + ? reverse_condition_maybe_unordered (code) + : reverse_condition (code)); + } + + if (proper_comparison_operator (operands[1], VOIDmode)) + operands[1] = gen_rtx_fmt_ee (code, cmp_in_mode, cmp_op0, cmp_op1); + else + { + if (!currently_expanding_to_rtl) + { + /* ??? It would seem safest to FAIL here, but that would defeat + the purpose of having an if-conversion pass; its logic currently + assumes that the backend should be safe to insert condition code + setting instructions, as the same condition codes were presumably + set by the if-conversion input code. */ + } + /* What mode to give as first operand to gen_compare_reg here is + debatable. VOIDmode would be minimalist; telling gen_compare_reg + to use the mode of CC_REGNUM (or putting it on the comparison + operator afterwards) is also a logical choice. OTOH, by using + mode, we have mode combine opportunities with flag setting + operations - if we get some. */ + operands[1] + = gen_compare_reg (mode, code, cmp_in_mode, cmp_op0, cmp_op1); + if (!operands[1]) + FAIL; + } +}) + +(define_insn "*movcc_insn" + [(set (match_operand:WMODE 0 "gpr_operand" "=r") + (if_then_else:WMODE (match_operator 3 "proper_comparison_operator" + [(match_operand 4 "cc_operand") (const_int 0)]) + (match_operand:WMODE 1 "gpr_operand" "r") + (match_operand:WMODE 2 "gpr_operand" "0")))] + "TARGET_CMOVE" + "mov%d3 %0,%1" + [(set_attr "type" "cmove")]) + +(define_peephole2 + [(parallel [(set (match_operand:WMODE 0 "gpr_operand" "") + (match_operand:WMODE 1 "" "")) + (clobber (match_operand 8 "cc_operand"))]) + (match_operand 2 "" "") + (set (match_operand:WMODE2 3 "gpr_operand" "") + (match_operand:WMODE2 9 "gpr_operand" "")) + (set (match_dup 3) + (if_then_else:WMODE2 (match_operator 5 "proper_comparison_operator" + [(match_operand 6 "cc_operand") + (match_operand 7 "const0_operand")]) + (match_operand:WMODE2 4 "nonmemory_operand" "") + (match_dup 3)))] + "REGNO (operands[0]) == REGNO (operands[9]) + && peep2_reg_dead_p (3, operands[0]) + && !reg_set_p (operands[0], operands[2]) + && !reg_set_p (operands[3], operands[2]) + && !reg_overlap_mentioned_p (operands[3], operands[2])" + [(parallel [(set (match_dup 10) (match_dup 1)) + (clobber (match_dup 8))]) + (match_dup 2) + (set (match_dup 3) + (if_then_else:WMODE2 (match_dup 5) (match_dup 4) (match_dup 3)))] +{ + operands[10] = simplify_gen_subreg (mode, operands[3], + mode, 0); + replace_rtx (operands[2], operands[9], operands[3]); + replace_rtx (operands[2], operands[0], operands[10]); + gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[2])); +}) + +(define_peephole2 + [(parallel [(set (match_operand 6 "cc_operand") (match_operand 2 "" "")) + (set (match_operand:WMODE 0 "gpr_operand" "") + (match_operand:WMODE 1 "" ""))]) + (set (match_operand:WMODE2 3 "gpr_operand" "") + (match_operand:WMODE2 4 "gpr_operand")) + (set (match_dup 3) + (if_then_else:WMODE2 (match_operator 5 "proper_comparison_operator" + [(match_dup 6) + (match_operand:WMODE 7 "const0_operand")]) + (match_operand:WMODE2 8 "gpr_operand") + (match_dup 3)))] + "REGNO (operands[0]) == REGNO (operands[8]) + && REVERSIBLE_CC_MODE (GET_MODE (operands[6])) + && peep2_reg_dead_p (3, operands[6]) + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[4], operands[3])" + [(parallel [(set (match_dup 6) (match_dup 2)) + (set (match_dup 9) (match_dup 1))]) + (set (match_dup 3) + (if_then_else:WMODE2 (match_dup 5) (match_dup 4) (match_dup 3)))] + " +{ + operands[5] + = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[5]), + GET_MODE (operands[6])), + GET_MODE (operands[5]), operands[6], operands[7]); + operands[9] = simplify_gen_subreg (mode, operands[3], + mode, 0); +}") + +;; These control RTL generation for conditional jump insns + +;; To signal to can_compare_p that the cbranchs?4 patterns work, +;; they must allow const0_rtx for both comparison operands +(define_expand "cbranchsi4" + [(set (reg CC_REGNUM) + (compare (match_operand:SI 1 "add_operand" "") + (match_operand:SI 2 "arith_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" [(reg CC_REGNUM) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx cmp = gen_compare_reg (VOIDmode, GET_CODE (operands[0]), SImode, + operands[1], operands[2]); + emit_jump_insn (gen_branch_insn (operands[3], cmp, XEXP (cmp, 0))); + DONE; +}) + +(define_expand "cbranchsf4" + [(set (reg CC_REGNUM) + (compare (match_operand:SF 1 "arith_operand" "") + (match_operand:SF 2 "arith_operand" ""))) + (set (pc) + (if_then_else + (match_operator 0 "comparison_operator" [(reg CC_REGNUM) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +{ + rtx cmp = gen_compare_reg (VOIDmode, GET_CODE (operands[0]), SFmode, + operands[1], operands[2]); + emit_jump_insn (gen_branch_insn (operands[3], cmp, XEXP (cmp, 0))); + DONE; +}) + +;; Now match both normal and inverted jump. + +(define_insn "branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(match_operand 2 "cc_operand") + (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "b%d1 %l0" + [(set_attr "type" "branch")]) + +(define_insn "*rev_branch_insn" + [(set (pc) + (if_then_else (match_operator 1 "proper_comparison_operator" + [(reg CC_REGNUM) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "b%D1 %l0" + [(set_attr "type" "branch")]) + +;; Unconditional and other jump instructions. + +(define_insn "jump" + [(set (pc) (label_ref (match_operand 0 "" "")))] + "" + "b %l0" + [(set_attr "type" "uncond_branch")]) + +(define_insn "indirect_jump" + [(set (pc) (match_operand:SI 0 "gpr_operand" "r"))] + "" + "jr %0" + [(set_attr "type" "uncond_branch")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand:SI 0 "gpr_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + /* In PIC mode, the table entries are stored PC relative. + Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + + operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], + op1, NULL_RTX, 0, OPTAB_DIRECT); + } +}) + +(define_insn "*tablejump_internal" + [(set (pc) (match_operand:SI 0 "gpr_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jr %0;" + [(set_attr "type" "uncond_branch")]) + +(define_insn "*tablejump_hi_internal" + [(set (pc) (match_operand:HI 0 "gpr_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "optimize_size && TARGET_SMALL16" + "jr %0;" + [(set_attr "type" "uncond_branch")]) + + +(define_expand "call" + ;; operands[1] is stack_size_rtx + ;; operands[2] is next_arg_register + [(parallel [(call (match_operand:SI 0 "call_operand" "") + (match_operand 1 "" "")) + (clobber (reg:SI GPR_LR))])] + "" +{ + bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[0]); + + if (!call_operand (operands[1], VOIDmode)) + operands[0] + = change_address (operands[0], VOIDmode, + copy_to_mode_reg (Pmode, XEXP (operands[0], 0))); + if (epiphany_uninterruptible_p (current_function_decl) + != target_uninterruptible) + { + emit_insn (target_uninterruptible ? gen_gid (): gen_gie ()); + emit_call_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, gen_rtx_CALL (VOIDmode, operands[0], operands[1]), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (SImode, GPR_LR))))); + emit_insn (target_uninterruptible ? gen_gie (): gen_gid ()); + DONE; + } +}) + +(define_insn "*call_i" + [(match_parallel 2 "float_operation" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Csy,r")) + (match_operand 1 "" "")) + (clobber (reg:SI GPR_LR))])] + "" + "%f0" + [(set_attr "type" "call")]) + +(define_expand "sibcall" + ;; operands[1] is stack_size_rtx + ;; operands[2] is next_arg_register + [(parallel [(call (match_operand:SI 0 "call_operand" "") + (match_operand 1 "" "")) + (return)])] + "" +{ + bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[0]); + + if (!call_operand (operands[1], VOIDmode)) + operands[0] + = change_address (operands[0], VOIDmode, + copy_to_mode_reg (Pmode, XEXP (operands[0], 0))); + if (epiphany_uninterruptible_p (current_function_decl) + != target_uninterruptible) + { + emit_insn (target_uninterruptible ? gen_gid (): gen_gie ()); + emit_call_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, gen_rtx_CALL (VOIDmode, operands[0], operands[1]), + ret_rtx))); + emit_insn (target_uninterruptible ? gen_gie (): gen_gid ()); + DONE; + } +}) + +(define_insn "*sibcall_i" + [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Csy,Rsc")) + (match_operand 1 "" "")) + (return)] + "" + "@ + b %0 + jr %0" + [(set_attr "type" "call")]) + +(define_expand "call_value" + ;; operand 2 is stack_size_rtx + ;; operand 3 is next_arg_register + [(parallel [(set (match_operand 0 "gpr_operand" "=r") + (call (match_operand:SI 1 "call_operand" "") + (match_operand 2 "" ""))) + (clobber (reg:SI GPR_LR))])] + "" +{ + bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[1]); + + if (!call_operand (operands[1], VOIDmode)) + operands[1] + = change_address (operands[1], VOIDmode, + copy_to_mode_reg (Pmode, XEXP (operands[1], 0))); + if (epiphany_uninterruptible_p (current_function_decl) + != target_uninterruptible) + { + emit_insn (target_uninterruptible ? gen_gid (): gen_gie ()); + emit_call_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, gen_rtx_SET + (VOIDmode, operands[0], + gen_rtx_CALL (VOIDmode, operands[1], operands[2])), + gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (SImode, GPR_LR))))); + emit_insn (target_uninterruptible ? gen_gie (): gen_gid ()); + DONE; + } +}) + +(define_insn "*call_value_i" + [(match_parallel 3 "float_operation" + [(set (match_operand 0 "gpr_operand" "=r,r") + (call (mem:SI (match_operand:SI 1 "call_address_operand" "Csy,r")) + (match_operand 2 "" ""))) + (clobber (reg:SI GPR_LR))])] + "" + "%f1" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_expand "sibcall_value" + ;; operand 2 is stack_size_rtx + ;; operand 3 is next_arg_register + [(parallel [(set (match_operand 0 "gpr_operand" "=r") + (call (match_operand:SI 1 "call_operand" "") + (match_operand 2 "" ""))) + (return)])] + "" +{ + bool target_uninterruptible = epiphany_call_uninterruptible_p (operands[1]); + + if (!call_operand (operands[1], VOIDmode)) + operands[1] + = change_address (operands[1], VOIDmode, + copy_to_mode_reg (Pmode, XEXP (operands[1], 0))); + if (epiphany_uninterruptible_p (current_function_decl) + != target_uninterruptible) + { + emit_insn (target_uninterruptible ? gen_gid (): gen_gie ()); + emit_call_insn + (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec (2, gen_rtx_SET + (VOIDmode, operands[0], + gen_rtx_CALL (VOIDmode, operands[1], operands[2])), + ret_rtx))); + emit_insn (target_uninterruptible ? gen_gie (): gen_gid ()); + DONE; + } +}) + +(define_insn "*sibcall_value_i" + [(set (match_operand 0 "gpr_operand" "=r,r") + (call (mem:SI (match_operand:SI 1 "call_address_operand" "Csy,Rsc")) + (match_operand 2 "" ""))) + (return)] + "" + "@ + b %1 + jr %1" + [(set_attr "type" "call") + (set_attr "length" "4")]) + +(define_expand "prologue" + [(pc)] + "" +{ + epiphany_expand_prologue (); + DONE; +}) + +(define_expand "epilogue" + [(pc)] + "" +{ + epiphany_expand_epilogue (0); + DONE; +}) + +(define_expand "sibcall_epilogue" + [(pc)] + "" +{ + epiphany_expand_epilogue (1); + DONE; +}) + +; Since the demise of REG_N_SETS, it is no longer possible to find out +; in the prologue / epilogue expanders how many times lr is set. +; Using df_regs_ever_live_p to decide if lr needs saving means that +; any explicit use of lr will cause it to be saved; hence we cannot +; represent the blink use in return / sibcall instructions themselves, and +; instead have to show it in EPILOGUE_USES. +(define_insn "return_i" + [(return)] + "reload_completed" + "rts" + [(set_attr "type" "uncond_branch")]) + +(define_insn "return_internal_interrupt" + [(return) + (unspec_volatile [(const_int 0)] 1)] + "" + "rti" + [(set_attr "type" "uncond_branch")]) + +(define_insn "stack_adjust_add" + [(set (reg:SI GPR_SP) + (plus:SI (reg:SI GPR_SP) (match_operand:SI 0 "arith_operand" "rL"))) + (clobber (reg:CC CC_REGNUM)) + (clobber (reg:SI STATUS_REGNUM)) + (clobber (match_operand:BLK 1 "memory_operand" "=m"))] + "reload_completed" + "add sp,sp,%0") + +(define_insn "stack_adjust_mov" + [(set (reg:SI GPR_SP) (reg:SI GPR_FP)) + (clobber (match_operand:BLK 0 "memory_operand" "=m"))] + "reload_completed" + "mov sp,fp" + [(set_attr "type" "move")]) + +(define_insn "stack_adjust_str" + [(set (match_operand 0 "stacktop_operand" "=m") + (match_operand 1 "any_gpr_operand" "r")) + (set (reg:SI GPR_SP) + (plus:SI (reg:SI GPR_SP) (match_operand:SI 2 "nonmemory_operand" "rn"))) + (clobber (match_operand:BLK 3 "memory_operand" "=m"))] + "reload_completed" +{ + return (GET_MODE_SIZE (GET_MODE (operands[0])) <= 4 + ? \"str %1,%0,%C2\" : \"strd %1,%0,%X2\"); +} + [(set_attr "type" "store")]) + +(define_insn "stack_adjust_ldr" + [(set (match_operand:SI 0 "gpr_operand" "=r") + (match_operand:SI 1 "stacktop_operand" "m")) + (set (reg:SI GPR_SP) + (plus:SI (reg:SI GPR_SP) (match_operand:SI 2 "nonmemory_operand" "rn"))) + (clobber (match_operand:BLK 3 "memory_operand" "=m"))] + "reload_completed" + "ldr %0,%1,%C2" + [(set_attr "type" "load")]) + +;; Define some fake vector operations so that the vectorizer is happy to use +;; 64 bit loads/stores. +(define_expand "vec_unpacks_lo_v4hi" + [(match_operand:V2SI 0 "gpr_operand") + (match_operand:V4HI 1 "gpr_operand")] + "" +{ + rtx in = simplify_gen_subreg (SImode, operands[1], V4HImode, 0); + rtx outl = simplify_gen_subreg (SImode, operands[0], V2SImode, 0); + rtx outh + = simplify_gen_subreg (SImode, operands[0], V2SImode, UNITS_PER_WORD); + + if (reg_overlap_mentioned_p (outl, in)) + in = copy_to_mode_reg (SImode, in); + emit_insn (gen_ashlsi3 (outl, in, GEN_INT (16))); + emit_insn (gen_ashrsi3 (outl, outl, GEN_INT (16))); + emit_insn (gen_ashrsi3 (outh, in, GEN_INT (16))); + DONE; +}) + +(define_expand "vec_unpacks_hi_v4hi" + [(match_operand:V2SI 0 "gpr_operand") + (match_operand:V4HI 1 "gpr_operand")] + "" +{ + rtx in = simplify_gen_subreg (SImode, operands[1], V4HImode, UNITS_PER_WORD); + rtx outl = simplify_gen_subreg (SImode, operands[0], V2SImode, 0); + rtx outh + = simplify_gen_subreg (SImode, operands[0], V2SImode, UNITS_PER_WORD); + + if (reg_overlap_mentioned_p (outl, in)) + in = copy_to_mode_reg (SImode, in); + emit_insn (gen_ashlsi3 (outl, in, GEN_INT (16))); + emit_insn (gen_ashrsi3 (outl, outl, GEN_INT (16))); + emit_insn (gen_ashrsi3 (outh, in, GEN_INT (16))); + DONE; +}) + +(define_code_iterator addsub [plus minus]) + +(define_code_iterator alu_binop + [plus minus and ior xor]) + +(define_code_attr insn_opname + [(plus "add") (minus "sub") (mult "mul") (div "div") + (and "and") (ior "ior") (xor "xor")]) + +; The addsi3 / subsi3 do checks that we don't want when splitting V2SImode +; operations into two SImode operations. +(define_code_attr si_pattern_suffix + [(plus "_i") (minus "_i") (and "") (ior "") (xor "")]) + +; You might think that this would work better as a define_expand, but +; again lower_subreg pessimizes the code if it sees indiviudual operations. +; We need to keep inputs and outputs as register pairs if we want to +; get sensible register allocation for double-word load and store operations. +(define_insn_and_split "v2si3" + [(set (match_operand:V2SI 0 "gpr_operand" "=r") + (alu_binop:V2SI (match_operand:V2SI 1 "gpr_operand" "r") + (match_operand:V2SI 2 "gpr_operand" "r"))) + (clobber (reg:CC CC_REGNUM))] + "" + "#" + "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)" + [(const_int 0)] +{ + rtx o0l, o0h, o1l, o1h, o2l, o2h; + + o0l = simplify_gen_subreg (SImode, operands[0], V2SImode, 0); + o0h = simplify_gen_subreg (SImode, operands[0], V2SImode, UNITS_PER_WORD); + o1l = simplify_gen_subreg (SImode, operands[1], V2SImode, 0); + o1h = simplify_gen_subreg (SImode, operands[1], V2SImode, UNITS_PER_WORD); + o2l = simplify_gen_subreg (SImode, operands[2], V2SImode, 0); + o2h = simplify_gen_subreg (SImode, operands[2], V2SImode, UNITS_PER_WORD); + if (reg_overlap_mentioned_p (o0l, o1h)) + o1h = copy_to_mode_reg (SImode, o1h); + if (reg_overlap_mentioned_p (o0l, o2h)) + o2h = copy_to_mode_reg (SImode, o2h); + emit_insn (gen_si3 (o0l, o1l, o2l)); + emit_insn (gen_si3 (o0h, o1h, o2h)); + DONE; +} + [(set_attr "length" "8")]) + +(define_expand "v2sf3" + [(parallel + [(set (match_operand:V2SF 0 "gpr_operand" "") + (addsub:V2SF (match_operand:V2SF 1 "gpr_operand" "") + (match_operand:V2SF 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn_and_split "v2sf3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:V2SF 0 "gpr_operand" "=r") + (addsub:V2SF (match_operand:V2SF 1 "gpr_operand" "r") + (match_operand:V2SF 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)" + [(parallel + [(set (match_dup 4) (addsub:SF (match_dup 5) (match_dup 6))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 10) + (match_dup 11)]) + (parallel + [(set (match_dup 7) (addsub:SF (match_dup 8) (match_dup 9))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 10) + (match_dup 11)])] +{ + operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0); + operands[5] = simplify_gen_subreg (SFmode, operands[1], V2SFmode, 0); + operands[6] = simplify_gen_subreg (SFmode, operands[2], V2SFmode, 0); + operands[7] + = simplify_gen_subreg (SFmode, operands[0], V2SFmode, UNITS_PER_WORD); + operands[8] + = simplify_gen_subreg (SFmode, operands[1], V2SFmode, UNITS_PER_WORD); + operands[9] + = simplify_gen_subreg (SFmode, operands[2], V2SFmode, UNITS_PER_WORD); + if (!reload_completed) + { + if (reg_overlap_mentioned_p (operands[4], operands[8])) + operands[8] = copy_to_mode_reg (SFmode, operands[8]); + if (reg_overlap_mentioned_p (operands[4], operands[9])) + operands[9] = copy_to_mode_reg (SFmode, operands[9]); + emit_insn (gen_sf3 (operands[4], operands[5], operands[6])); + emit_insn (gen_sf3 (operands[7], operands[8], operands[9])); + DONE; + } + gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[8])); + gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[9])); + operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2); + operands[11] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1); +} + [(set_attr "length" "8") + (set_attr "type" "v2fp")]) + +(define_expand "ashlv2si3" + [(parallel + [(set (match_operand:V2SI 0 "gpr_operand" "") + (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "") + (match_operand:SI 2 "general_operand"))) + (use (match_dup 3)) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" +{ + if (const_int_operand (operands[2], VOIDmode)) + operands[3] + = copy_to_mode_reg (SImode, GEN_INT (1 << INTVAL (operands[2]))); + else + { + int o, i; + rtx xop[2], last_out = pc_rtx; + + for (o = 0; o <= UNITS_PER_WORD; o += UNITS_PER_WORD) + { + for (i = 0; i < 2; i++) + { + xop[i] + = (i == 2 ? operands[2] + : simplify_gen_subreg (SImode, operands[i], V2SImode, o)); + gcc_assert (!reg_overlap_mentioned_p (last_out, xop[i]) + /* ??? reg_overlap_mentioned_p doesn't understand + about multi-word SUBREGs. */ + || (GET_CODE (last_out) == SUBREG + && GET_CODE (xop[i]) == SUBREG + && SUBREG_REG (last_out) == SUBREG_REG (xop[i]) + && ((SUBREG_BYTE (last_out) & -UNITS_PER_WORD) + != (SUBREG_BYTE (xop[i]) & -UNITS_PER_WORD)))); + } + emit_insn (gen_ashlsi3 (xop[0], xop[1], operands[2])); + last_out = xop[0]; + } + DONE; + } +}) + +(define_insn_and_split "*ashlv2si3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:V2SI 0 "gpr_operand" "=&r,*1*2") + (ashift:V2SI (match_operand:V2SI 1 "gpr_operand" "r,r") + (match_operand 2 "const_int_operand" "n,n"))) + (use (match_operand:SI 4 "gpr_operand" "r,r")) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "reload_completed" + [(parallel + [(set (match_dup 5) (mult:SI (match_dup 6) (match_dup 4))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 9) + (match_dup 10)]) + (parallel + [(set (match_dup 7) (mult:SI (match_dup 8) (match_dup 4))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 9) + (match_dup 10)])] +{ + operands[5] = simplify_gen_subreg (SImode, operands[0], V2SImode, 0); + operands[6] = simplify_gen_subreg (SImode, operands[1], V2SImode, 0); + operands[7] = simplify_gen_subreg (SImode, operands[0], + V2SImode, UNITS_PER_WORD); + operands[8] = simplify_gen_subreg (SImode, operands[1], + V2SImode, UNITS_PER_WORD); + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[8])); + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[4])); + operands[9] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2); + operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1); + rtx insn + = (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec + (4, + gen_rtx_SET (VOIDmode, operands[5], + gen_rtx_MULT (SImode, operands[6], operands[4])), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)), + operands[9], operands[10]))); + insn = emit_insn (insn); + add_reg_note (insn, REG_EQUAL, + gen_rtx_ASHIFT (SImode, operands[6], operands[2])); + insn + = (gen_rtx_PARALLEL + (VOIDmode, + gen_rtvec + (4, + gen_rtx_SET (VOIDmode, operands[7], + gen_rtx_MULT (SImode, operands[8], operands[4])), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CC_FPmode, CCFP_REGNUM)), + operands[9], operands[10]))); + insn = emit_insn (insn); + add_reg_note (insn, REG_EQUAL, + gen_rtx_ASHIFT (SImode, operands[7], operands[2])); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "fp_int")]) + +(define_expand "mul3" + [(parallel + [(set (match_operand:DWV2MODE 0 "gpr_operand" "") + (mult:DWV2MODE (match_operand:DWV2MODE 1 "gpr_operand" "") + (match_operand:DWV2MODE 2 "gpr_operand" ""))) + (clobber (reg:CC_FP CCFP_REGNUM))])]) + +(define_insn_and_split "mul3_i" + [(match_parallel 3 "float_operation" + [(set (match_operand:DWV2MODE 0 "gpr_operand" "=r") + (mult:DWV2MODE (match_operand:DWV2MODE 1 "gpr_operand" "r") + (match_operand:DWV2MODE 2 "gpr_operand" "r"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "" + "#" + "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)" + [(parallel + [(set (match_dup 4) (mult: (match_dup 5) (match_dup 6))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 10) + (match_dup 11)]) + (parallel + [(set (match_dup 7) (mult: (match_dup 8) (match_dup 9))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 10) + (match_dup 11)])] +{ + operands[4] + = simplify_gen_subreg (mode, operands[0], mode, 0); + operands[5] + = simplify_gen_subreg (mode, operands[1], mode, 0); + operands[6] + = simplify_gen_subreg (mode, operands[2], mode, 0); + operands[7] = simplify_gen_subreg (mode, operands[0], + mode, UNITS_PER_WORD); + operands[8] = simplify_gen_subreg (mode, operands[1], + mode, UNITS_PER_WORD); + operands[9] = simplify_gen_subreg (mode, operands[2], + mode, UNITS_PER_WORD); + if (!reload_completed) + { + if (reg_overlap_mentioned_p (operands[4], operands[8])) + operands[8] = copy_to_mode_reg (mode, operands[8]); + if (reg_overlap_mentioned_p (operands[4], operands[9])) + operands[9] = copy_to_mode_reg (mode, operands[9]); + emit_insn (gen_mul3 (operands[4], operands[5], operands[6])); + emit_insn (gen_mul3 (operands[7], operands[8], operands[9])); + DONE; + } + gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[8])); + gcc_assert (!reg_overlap_mentioned_p (operands[4], operands[9])); + operands[10] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 2); + operands[11] = XVECEXP (operands[3], 0, XVECLEN (operands[3], 0) - 1); +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +(define_insn_and_split "*fmadd_combine" + [(match_parallel 4 "float_operation" + [(set (match_operand:DWV2MODE 0 "gpr_operand" "=r") + (plus:DWV2MODE (mult: + (match_operand: 1 "gpr_operand" "r") + (match_operand: 2 "gpr_operand" "r")) + (match_operand: 3 "gpr_operand" "0"))) + (clobber (reg:CC_FP CCFP_REGNUM))])] + "TARGET_FUSED_MADD || mode == V2SImode" + "#" + "reload_completed || (epiphany_vect_align == 4 && TARGET_SPLIT_VECMOVE_EARLY)" + [(parallel + [(set (match_dup 5) + (plus: (mult: (match_dup 6) (match_dup 7)) + (match_dup 8))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 13) + (match_dup 14)]) + (parallel + [(set (match_dup 9) + (plus: (mult: (match_dup 10) (match_dup 11)) + (match_dup 12))) + (clobber (reg:CC_FP CCFP_REGNUM)) + (match_dup 13) + (match_dup 14)])] +{ + operands[5] + = simplify_gen_subreg (mode, operands[0], mode, 0); + operands[6] + = simplify_gen_subreg (mode, operands[1], mode, 0); + operands[7] + = simplify_gen_subreg (mode, operands[2], mode, 0); + operands[8] + = simplify_gen_subreg (mode, operands[3], mode, 0); + operands[9] = simplify_gen_subreg (mode, operands[0], + mode, UNITS_PER_WORD); + operands[10] = simplify_gen_subreg (mode, operands[1], + mode, UNITS_PER_WORD); + operands[11] = simplify_gen_subreg (mode, operands[2], + mode, UNITS_PER_WORD); + operands[12] = simplify_gen_subreg (mode, operands[3], + mode, UNITS_PER_WORD); + if (!reload_completed) + { + if (reg_overlap_mentioned_p (operands[5], operands[10])) + operands[10] = copy_to_mode_reg (mode, operands[10]); + if (reg_overlap_mentioned_p (operands[5], operands[11])) + operands[11] = copy_to_mode_reg (mode, operands[11]); + if (reg_overlap_mentioned_p (operands[5], operands[12])) + operands[12] = copy_to_mode_reg (mode, operands[12]); + emit_insn (gen_madd (operands[5], operands[6], operands[7], + operands[8])); + emit_insn (gen_madd (operands[9], operands[10], operands[11], + operands[12])); + DONE; + } + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[10])); + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[11])); + gcc_assert (!reg_overlap_mentioned_p (operands[5], operands[12])); + operands[13] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 2); + operands[14] = XVECEXP (operands[4], 0, XVECLEN (operands[4], 0) - 1); +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +(define_expand "vec_set" + [(match_operand:DWV2MODE 0 "register_operand") + (match_operand: 1 "register_operand") + (match_operand 2 "const_int_operand" "")] + "" +{ + operands[0] + = simplify_gen_subreg (mode, operands[0], mode, + UNITS_PER_WORD * INTVAL (operands[2])); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +(define_expand "movmisalign" + [(set (match_operand:DWV2MODE 0 "nonimmediate_operand" "") + (match_operand:DWV2MODE 1 "general_operand" ""))] + "" +{ + rtx op00, op01, op10, op11; + + op00 = simplify_gen_subreg (mode, operands[0], mode, 0); + op01 = simplify_gen_subreg (mode, operands[0], mode, + UNITS_PER_WORD); + op10 = simplify_gen_subreg (mode, operands[1], mode, 0); + op11 = simplify_gen_subreg (mode, operands[1], mode, + UNITS_PER_WORD); + emit_move_insn (op00, op10); + emit_move_insn (op01, op11); + DONE; +}) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "type" "flow")]) diff --git a/gcc-4.9/gcc/config/epiphany/epiphany.opt b/gcc-4.9/gcc/config/epiphany/epiphany.opt new file mode 100644 index 000000000..2acff323c --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany.opt @@ -0,0 +1,148 @@ +; Options for the Adapteva EPIPHANY port of the compiler +; +; Copyright (C) 2005-2014 Free Software Foundation, Inc. +; Contributed by Embecosm on behalf of Adapteva, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +; License for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; . + +mhalf-reg-file +Target Mask(HALF_REG_FILE) +Don't use any of r32..r63. + +mprefer-short-insn-regs +Target Mask(PREFER_SHORT_INSN_REGS) +preferentially allocate registers that allow short instruction generation. + +mbranch-cost= +Target RejectNegative Joined UInteger Var(epiphany_branch_cost) Init(3) +Set branch cost + +mcmove +Target Mask(CMOVE) +enable conditional move instruction usage. + +mnops= +Target RejectNegative Joined UInteger Var(epiphany_n_nops) Init(0) +set number of nops to emit before each insn pattern + +; Problems with using the flags from fsub for comparison are: +; - Because of underflow (lack of subnormal numbers), different small numbers +; can compare as equal. +; - the set of comparisons is limited, and reversing comparisons doesn't work +; in the presence of NaNs. +; The latter problem might be tolerated with -ffinite-math-only , but nothing +; in -funsafe-math-optimizations says different small numbers may be considered +; equal. +msoft-cmpsf +Target Mask(SOFT_CMPSF) +Use software floating point comparisons + +msplit-lohi +Target Mask(SPLIT_LOHI) +Enable split of 32 bit immediate loads into low / high part + +mpost-inc +Target Mask(POST_INC) +Enable use of POST_INC / POST_DEC + +mpost-modify +Target Mask(POST_MODIFY) +Enable use of POST_MODIFY + +mstack-offset= +Target RejectNegative Joined UInteger Var(epiphany_stack_offset) Init(EPIPHANY_STACK_OFFSET) +Set number of bytes on the stack preallocated for use by the callee. + +mround-nearest +target Mask(ROUND_NEAREST) +Assume round to nearest is selected for purposes of scheduling. + +mlong-calls +Target Mask(LONG_CALLS) +Generate call insns as indirect calls + +mshort-calls +Target Mask(SHORT_CALLS) +Generate call insns as direct calls + +msmall16 +Target Mask(SMALL16) +Assume labels and symbols can be addressed using 16 bit absolute addresses. + +mfp-mode= +Target RejectNegative Joined Var(epiphany_normal_fp_mode) Enum(attr_fp_mode) Init(FP_MODE_CALLER) + +; The values are from enum attr_fp_mode, but using that enum would bring +; problems with enum forward declarations. +Enum +Name(attr_fp_mode) Type(int) + +EnumValue +Enum(attr_fp_mode) String(caller) Value(FP_MODE_CALLER) + +EnumValue +Enum(attr_fp_mode) String(round-nearest) Value(FP_MODE_ROUND_NEAREST) + +EnumValue +Enum(attr_fp_mode) String(truncate) Value(FP_MODE_ROUND_TRUNC) + +EnumValue +Enum(attr_fp_mode) String(int) Value(FP_MODE_INT) + +may-round-for-trunc +Target Mask(MAY_ROUND_FOR_TRUNC) +A floatig point to integer truncation may be replaced with rounding to save mode switching + +mvect-double +Target Mask(VECT_DOUBLE) +Vectorize for double-word operations. + +max-vect-align= +Target RejectNegative Joined Var(epiphany_vect_align) Enum(vect_align) Init(8) + +Enum +Name(vect_align) Type(int) + +EnumValue +Enum(vect_align) String(4) Value(4) + +EnumValue +Enum(vect_align) String(8) Value(8) + +msplit-vecmove-early +Target Mask(SPLIT_VECMOVE_EARLY) +Split unaligned 8 byte vector moves before post-modify address generation. + +mfp-iarith +Target Mask(FP_IARITH) +Use the floating point unit for integer add/subtract. + +m1reg- +Target RejectNegative Joined Var(epiphany_m1reg) Enum(m1reg) Init(-1) +Set register to hold -1. + +Enum +Name(m1reg) Type(int) + +EnumValue +Enum(m1reg) String(none) Value(-1) + +EnumValue +Enum(m1reg) String(r43) Value(43) + +EnumValue +Enum(m1reg) String(r63) Value(63) diff --git a/gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h b/gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h new file mode 100644 index 000000000..3dd89b0cd --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/epiphany_intrinsics.h @@ -0,0 +1,27 @@ +/* Epiphany intrinsic functions + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#define __builtin_epiphany_fmadd(a, b, c) __builtin_fmaf (b, c, a) +#define __builtin_epiphany_fmsub(a, b, c) __builtin_fmaf (-(b), c, a) diff --git a/gcc-4.9/gcc/config/epiphany/mode-switch-use.c b/gcc-4.9/gcc/config/epiphany/mode-switch-use.c new file mode 100644 index 000000000..a0aa24925 --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/mode-switch-use.c @@ -0,0 +1,109 @@ +/* Insert USEs in instructions that require mode switching. + This should probably be merged into mode-switching.c . + Copyright (C) 2011-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "function.h" +#include "emit-rtl.h" +#include "tree-pass.h" +#include "insn-attr.h" +#include "insn-config.h" +#include "recog.h" +#include "tm_p.h" +#include "df.h" + +#ifndef TARGET_INSERT_MODE_SWITCH_USE +#define TARGET_INSERT_MODE_SWITCH_USE NULL +#endif + +static unsigned int +insert_uses (void) +{ + static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING; +#define N_ENTITIES ARRAY_SIZE (num_modes) + int e; + void (*target_insert_mode_switch_use) (rtx insn, int, int) + = TARGET_INSERT_MODE_SWITCH_USE; + + for (e = N_ENTITIES - 1; e >= 0; e--) + { + int no_mode = num_modes[e]; + rtx insn; + int mode; + + if (!OPTIMIZE_MODE_SWITCHING (e)) + continue; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + mode = MODE_NEEDED (e, insn); + if (mode == no_mode) + continue; + if (target_insert_mode_switch_use) + { + target_insert_mode_switch_use (insn, e, mode); + df_insn_rescan (insn); + } + } + } + return 0; +} + +namespace { + +const pass_data pass_data_mode_switch_use = +{ + RTL_PASS, /* type */ + "mode_switch_use", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + false, /* has_gate */ + true, /* has_execute */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_mode_switch_use : public rtl_opt_pass +{ +public: + pass_mode_switch_use(gcc::context *ctxt) + : rtl_opt_pass(pass_data_mode_switch_use, ctxt) + {} + + /* opt_pass methods: */ + unsigned int execute () { return insert_uses (); } + +}; // class pass_mode_switch_use + +} // anon namespace + +rtl_opt_pass * +make_pass_mode_switch_use (gcc::context *ctxt) +{ + return new pass_mode_switch_use (ctxt); +} diff --git a/gcc-4.9/gcc/config/epiphany/predicates.md b/gcc-4.9/gcc/config/epiphany/predicates.md new file mode 100644 index 000000000..fb8fd88ba --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/predicates.md @@ -0,0 +1,368 @@ +;; Predicate definitions for code generation on the EPIPHANY cpu. +;; Copyright (C) 1994-2014 Free Software Foundation, Inc. +;; Contributed by Embecosm on behalf of Adapteva, Inc. +;; +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Returns true iff OP is a symbol reference that is a valid operand +;; in a jump or call instruction. + +(define_predicate "symbolic_operand" + (match_code "symbol_ref,label_ref,const") +{ + if (GET_CODE (op) == SYMBOL_REF) + return (!epiphany_is_long_call_p (op) + && (!flag_pic || SYMBOL_REF_LOCAL_P (op))); + if (GET_CODE (op) == LABEL_REF) + return true; + if (GET_CODE (op) == CONST) + { + op = XEXP (op, 0); + if (GET_CODE (op) != PLUS || !symbolic_operand (XEXP (op, 0), mode)) + return false; + /* The idea here is that a 'small' constant offset should be OK. + What exactly is considered 'small' is a bit arbitrary. */ + return satisfies_constraint_L (XEXP (op, 1)); + } + gcc_unreachable (); +}) + +;; Acceptable arguments to the call insn. + +(define_predicate "call_address_operand" + (ior (match_code "reg") + (match_operand 0 "symbolic_operand"))) + +(define_predicate "call_operand" + (match_code "mem") +{ + op = XEXP (op, 0); + return call_address_operand (op, mode); +}) + +;; general purpose register. +(define_predicate "gpr_operand" + (match_code "reg,subreg") +{ + int regno; + + if (!register_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = XEXP (op, 0); + regno = REGNO (op); + return regno >= FIRST_PSEUDO_REGISTER || regno <= 63; +}) + +(define_special_predicate "any_gpr_operand" + (match_code "subreg,reg") +{ + return gpr_operand (op, mode); +}) + +;; register suitable for integer add / sub operations; besides general purpose +;; registers we allow fake hard registers that are eliminated to a real +;; hard register via an offset. +(define_predicate "add_reg_operand" + (match_code "reg,subreg") +{ + int regno; + + if (!register_operand (op, mode)) + return 0; + if (GET_CODE (op) == SUBREG) + op = XEXP (op, 0); + regno = REGNO (op); + return (regno >= FIRST_PSEUDO_REGISTER || regno <= 63 + || regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM); +}) + +;; Also allows suitable constants +(define_predicate "add_operand" + (match_code "reg,subreg,const_int,symbol_ref,label_ref,const") +{ + if (GET_CODE (op) == REG || GET_CODE (op) == SUBREG) + return add_reg_operand (op, mode); + return satisfies_constraint_L (op) || satisfies_constraint_CnL (op); +}) + +;; Ordinary 3rd operand for arithmetic operations +(define_predicate "arith_operand" + (match_code "reg,subreg,const_int,symbol_ref,label_ref,const") +{ + if (GET_CODE (op) == REG || GET_CODE (op) == SUBREG) + return register_operand (op, mode); + return satisfies_constraint_L (op); +}) + +;; Constant integer 3rd operand for arithmetic operations +(define_predicate "arith_int_operand" + (match_code "const_int,symbol_ref,label_ref,const") +{ + return satisfies_constraint_L (op); +}) + +;; Return true if OP is an acceptable argument for a single word move source. + +(define_predicate "move_src_operand" + (match_code + "symbol_ref,label_ref,const,const_int,const_double,reg,subreg,mem,unspec") +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF : + case LABEL_REF : + case CONST : + return 1; + case CONST_INT : + return immediate_operand (op, mode); + case CONST_DOUBLE : + /* SImode constants should always fit into a CONST_INT. Large + unsigned 32-bit constants are represented as negative CONST_INTs. */ + gcc_assert (GET_MODE (op) != SImode); + /* We can handle 32-bit floating point constants. */ + if (mode == SFmode) + return GET_MODE (op) == SFmode; + return 0; + case REG : + return op != frame_pointer_rtx && register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + else + return register_operand (op, mode); + case MEM : + return address_operand (XEXP (op, 0), mode); + case UNSPEC: + return satisfies_constraint_Sra (op); + default : + return 0; + } +}) + +;; Return true if OP is an acceptable argument for a double word move source. + +(define_predicate "move_double_src_operand" + (match_code "reg,subreg,mem,const_int,const_double,const_vector") +{ + if (GET_CODE (op) == MEM && misaligned_operand (op, mode) + && !address_operand (plus_constant (Pmode, XEXP (op, 0), 4), SImode)) + return 0; + return general_operand (op, mode); +}) + +;; Return true if OP is an acceptable argument for a move destination. + +(define_predicate "move_dest_operand" + (match_code "reg,subreg,mem") +{ + switch (GET_CODE (op)) + { + case REG : + return register_operand (op, mode); + case SUBREG : + /* (subreg (mem ...) ...) can occur here if the inner part was once a + pseudo-reg and is now a stack slot. */ + if (GET_CODE (SUBREG_REG (op)) == MEM) + { + return address_operand (XEXP (SUBREG_REG (op), 0), mode); + } + else + { + return register_operand (op, mode); + } + case MEM : + if (GET_MODE_SIZE (mode) == 8 && misaligned_operand (op, mode) + && !address_operand (plus_constant (Pmode, XEXP (op, 0), 4), SImode)) + return 0; + return address_operand (XEXP (op, 0), mode); + default : + return 0; + } +}) + +(define_special_predicate "stacktop_operand" + (match_code "mem") +{ + if (mode != VOIDmode && GET_MODE (op) != mode) + return false; + return rtx_equal_p (XEXP (op, 0), stack_pointer_rtx); +}) + +;; Return 1 if OP is a comparison operator valid for the mode of CC. +;; This allows the use of MATCH_OPERATOR to recognize all the branch insns. +;; +;; Some insns only set a few bits in the condition code. So only allow those +;; comparisons that use the bits that are valid. + +(define_predicate "proper_comparison_operator" + (match_code "eq, ne, le, lt, ge, gt, leu, ltu, geu, gtu, unordered, ordered, uneq, unge, ungt, unle, unlt, ltgt") +{ + enum rtx_code code = GET_CODE (op); + rtx cc = XEXP (op, 0); + + /* combine can try strange things. */ + if (!REG_P (cc)) + return 0; + switch (GET_MODE (cc)) + { + case CC_Zmode: + case CC_N_NEmode: + case CC_FP_EQmode: + return REGNO (cc) == CC_REGNUM && (code == EQ || code == NE); + case CC_C_LTUmode: + return REGNO (cc) == CC_REGNUM && (code == LTU || code == GEU); + case CC_C_GTUmode: + return REGNO (cc) == CC_REGNUM && (code == GTU || code == LEU); + case CC_FPmode: + return (REGNO (cc) == CCFP_REGNUM + && (code == EQ || code == NE || code == LT || code == LE)); + case CC_FP_GTEmode: + return (REGNO (cc) == CC_REGNUM + && (code == EQ || code == NE || code == GT || code == GE + || code == UNLE || code == UNLT)); + case CC_FP_ORDmode: + return REGNO (cc) == CC_REGNUM && (code == ORDERED || code == UNORDERED); + case CC_FP_UNEQmode: + return REGNO (cc) == CC_REGNUM && (code == UNEQ || code == LTGT); + case CCmode: + return REGNO (cc) == CC_REGNUM; + /* From combiner. */ + case QImode: case SImode: case SFmode: case HImode: + /* From cse.c:dead_libcall_p. */ + case DFmode: + return 0; + default: + gcc_unreachable (); + } +}) + +(define_predicate "addsub_operator" + (match_code "plus, minus")) + +(define_predicate "cc_operand" + (and (match_code "reg") + (match_test "REGNO (op) == CC_REGNUM || REGNO (op) == CCFP_REGNUM"))) + +(define_predicate "const0_operand" + (match_code "const_int, const_double") +{ + if (mode == VOIDmode) + mode = GET_MODE (op); + return op == CONST0_RTX (mode); +}) + +(define_predicate "const_float_1_operand" + (match_code "const_double") +{ + return op == CONST1_RTX (mode); +}) + +(define_predicate "cc_move_operand" + (and (match_code "reg") + (ior (match_test "REGNO (op) == CC_REGNUM") + (match_test "gpr_operand (op, mode)")))) + +(define_predicate "float_operation" + (match_code "parallel") +{ + /* Most patterns start out with one SET and one CLOBBER, and gain a USE + or two of FP_NEAREST_REGNUM / FP_TRUNCATE_REGNUM / FP_ANYFP_REGNUM + after mode switching. The longer patterns are + all beyond length 4, and before mode switching, end with a + CLOBBER of CCFP_REGNUM. */ + int count = XVECLEN (op, 0); + bool inserted = MACHINE_FUNCTION (cfun)->control_use_inserted; + int i; + + if (count == 2 + /* Vector ashift has an extra use for the constant factor required to + implement the shift as multiply. */ + || (count == 3 && GET_CODE (XVECEXP (op, 0, 0)) == SET + && GET_CODE (XEXP (XVECEXP (op, 0, 0), 1)) == ASHIFT)) + return !inserted; + + /* combine / recog will pass any old garbage here before checking the + rest of the insn. */ + if (count <= 3) + return false; + + i = 1; + if (count > 4) + for (i = 2; i < count; i++) + { + rtx x = XVECEXP (op, 0, i); + + if (GET_CODE (x) == CLOBBER) + { + if (!REG_P (XEXP (x, 0))) + return false; + if (REGNO (XEXP (x, 0)) == CCFP_REGNUM) + { + if (count == i + 1) + return !inserted; + break; + } + /* Just an ordinary clobber, keep looking. */ + } + else if (GET_CODE (x) == USE + || (GET_CODE (x) == SET && i == 2)) + continue; + else + return false; + } + if (count != i + 3 || !inserted) + return false; + for (i = i+1; i < count; i++) + { + rtx x = XVECEXP (op, 0, i); + + if (GET_CODE (x) != USE && GET_CODE (x) != CLOBBER) + return false; + x = XEXP (x, 0); + if (!REG_P (x) + || (REGNO (x) != FP_NEAREST_REGNUM + && REGNO (x) != FP_TRUNCATE_REGNUM + && REGNO (x) != FP_ANYFP_REGNUM)) + return false; + } + return true; +}) + +(define_predicate "set_fp_mode_operand" + (ior (match_test "gpr_operand (op, mode)") + (and (match_code "const") + (match_test "satisfies_constraint_Cfm (op)")))) + +(define_predicate "post_modify_address" + (match_code "post_modify,post_inc,post_dec")) + +(define_predicate "post_modify_operand" + (and (match_code "mem") + (match_test "post_modify_address (XEXP (op, 0), Pmode)"))) + +(define_predicate "nonsymbolic_immediate_operand" + (ior (match_test "immediate_operand (op, mode)") + (match_code "const_vector"))) /* Is this specific enough? */ + +;; Return true if OP is misaligned memory operand +(define_predicate "misaligned_operand" + (and (match_code "mem") + (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)"))) diff --git a/gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c b/gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c new file mode 100644 index 000000000..16849182c --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/resolve-sw-modes.c @@ -0,0 +1,200 @@ +/* Mode switching cleanup pass for the EPIPHANY cpu. + Copyright (C) 2000-2014 Free Software Foundation, Inc. + Contributed by Embecosm on behalf of Adapteva, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "machmode.h" +#include "tm.h" +#include "hard-reg-set.h" +#include "tm_p.h" +#include "vec.h" +#include "sbitmap.h" +#include "basic-block.h" +#include "df.h" +#include "rtl.h" +#include "insn-config.h" +#include "insn-codes.h" +#include "emit-rtl.h" +#include "recog.h" +#include "function.h" +#include "insn-attr-common.h" +#include "tree-pass.h" + +/* Clean-up after mode switching: + Check for mode setting insns that have FP_MODE_ROUND_UNKNOWN. + If only one rounding mode is required, select that one. + Else we have to choose one to use in this mode setting insn and + insert new mode setting insns on the edges where the other mode + becomes unambigous. */ + +static bool +gate_resolve_sw_modes (void) +{ + return optimize; +} + +static unsigned +resolve_sw_modes (void) +{ + basic_block bb; + rtx insn, src; + vec todo; + sbitmap pushed; + bool need_commit = false; + bool finalize_fp_sets = (MACHINE_FUNCTION (cfun)->unknown_mode_sets == 0); + + todo.create (last_basic_block_for_fn (cfun)); + pushed = sbitmap_alloc (last_basic_block_for_fn (cfun)); + bitmap_clear (pushed); + if (!finalize_fp_sets) + { + df_note_add_problem (); + df_analyze (); + } + FOR_EACH_BB_FN (bb, cfun) + FOR_BB_INSNS (bb, insn) + { + enum attr_fp_mode selected_mode; + + if (!NONJUMP_INSN_P (insn) + || recog_memoized (insn) != CODE_FOR_set_fp_mode) + continue; + src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + if (finalize_fp_sets) + { + SET_SRC (XVECEXP (PATTERN (insn), 0, 2)) = copy_rtx (src); + if (REG_P (src)) + df_insn_rescan (insn); + continue; + } + if (REG_P (src) + || XINT (XVECEXP (XEXP (src, 0), 0, 0), 0) != FP_MODE_ROUND_UNKNOWN) + continue; + if (find_regno_note (insn, REG_UNUSED, FP_TRUNCATE_REGNUM)) + selected_mode = FP_MODE_ROUND_NEAREST; + else if (find_regno_note (insn, REG_UNUSED, FP_NEAREST_REGNUM)) + selected_mode = FP_MODE_ROUND_TRUNC; + else + { + /* We could get more fancy in the selection of the mode by + checking the total frequency of the affected edges. */ + selected_mode = (enum attr_fp_mode) epiphany_normal_fp_rounding; + + todo.quick_push (bb); + bitmap_set_bit (pushed, bb->index); + } + XVECEXP (XEXP (src, 0), 0, 0) = GEN_INT (selected_mode); + SET_SRC (XVECEXP (PATTERN (insn), 0, 1)) = copy_rtx (src); + SET_SRC (XVECEXP (PATTERN (insn), 0, 2)) = copy_rtx (src); + df_insn_rescan (insn); + } + while (todo.length ()) + { + basic_block bb = todo.pop (); + int selected_reg, jilted_reg; + enum attr_fp_mode jilted_mode; + edge e; + edge_iterator ei; + + bitmap_set_bit (pushed, bb->index); + bitmap_set_bit (pushed, bb->index); + + if (epiphany_normal_fp_rounding == FP_MODE_ROUND_NEAREST) + { + selected_reg = FP_NEAREST_REGNUM; + jilted_reg = FP_TRUNCATE_REGNUM; + jilted_mode = FP_MODE_ROUND_TRUNC; + } + else + { + selected_reg = FP_TRUNCATE_REGNUM; + jilted_reg = FP_NEAREST_REGNUM; + jilted_mode = FP_MODE_ROUND_NEAREST; + } + + FOR_EACH_EDGE (e, ei, bb->succs) + { + basic_block succ = e->dest; + rtx seq; + + if (!REGNO_REG_SET_P (DF_LIVE_IN (succ), jilted_reg)) + continue; + if (REGNO_REG_SET_P (DF_LIVE_IN (succ), selected_reg)) + { + if (bitmap_bit_p (pushed, succ->index)) + continue; + todo.quick_push (succ); + bitmap_set_bit (pushed, bb->index); + continue; + } + start_sequence (); + emit_set_fp_mode (EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN, + jilted_mode, NULL); + seq = get_insns (); + end_sequence (); + need_commit = true; + insert_insn_on_edge (seq, e); + } + } + todo.release (); + sbitmap_free (pushed); + if (need_commit) + commit_edge_insertions (); + return 0; +} + +namespace { + +const pass_data pass_data_resolve_sw_modes = +{ + RTL_PASS, /* type */ + "resolve_sw_modes", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + true, /* has_gate */ + true, /* has_execute */ + TV_MODE_SWITCH, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */ +}; + +class pass_resolve_sw_modes : public rtl_opt_pass +{ +public: + pass_resolve_sw_modes(gcc::context *ctxt) + : rtl_opt_pass(pass_data_resolve_sw_modes, ctxt) + {} + + /* opt_pass methods: */ + bool gate () { return gate_resolve_sw_modes (); } + unsigned int execute () { return resolve_sw_modes (); } + +}; // class pass_resolve_sw_modes + +} // anon namespace + +rtl_opt_pass * +make_pass_resolve_sw_modes (gcc::context *ctxt) +{ + return new pass_resolve_sw_modes (ctxt); +} diff --git a/gcc-4.9/gcc/config/epiphany/t-epiphany b/gcc-4.9/gcc/config/epiphany/t-epiphany new file mode 100644 index 000000000..7a329dad7 --- /dev/null +++ b/gcc-4.9/gcc/config/epiphany/t-epiphany @@ -0,0 +1,38 @@ +# Copyright (C) 1997-2014 Free Software Foundation, Inc. +# Contributed by Embecosm on behalf of Adapteva, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +mode-switch-use.o : $(srcdir)/config/epiphany/mode-switch-use.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TM_P_H) $(RTL_H) \ + $(TREE_PASS_H) $(INSN_ATTR_H) $(EMIT_RTL_H) $(FUNCTION_H) $(RECOG_H) \ + insn-config.h $(DF_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $< + +resolve-sw-modes.o : $(srcdir)/config/epiphany/resolve-sw-modes.c \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(MACHMODE_H) $(TM_H) hard-reg-set.h \ + $(TM_P_H) $(VEC_H) sbitmap.h $(BASIC_BLOCK_H) $(DF_H) $(RTL_H) \ + insn-config.h insn-codes.h $(EMIT_RTL_H) $(RECOG_H) $(FUNCTION_H) \ + insn-attr-common.h $(TREE_PASS_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $< + +SPECS = specs.install + +specs: specs.install + sed -e 's,epiphany_library_extra_spec,epiphany_library_stub_spec,' \ + -e 's,epiphany_library_build_spec,epiphany_library_extra_spec,' \ + < specs.install > $@ ; \ diff --git a/gcc-4.9/gcc/config/flat.h b/gcc-4.9/gcc/config/flat.h new file mode 100644 index 000000000..3af4e57e6 --- /dev/null +++ b/gcc-4.9/gcc/config/flat.h @@ -0,0 +1,22 @@ +/* Defines to be used for targets that support flat executables. + Copyright (C) 2006-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This macro applies on top of OBJECT_FORMAT_ELF and indicates that + we want to support both flat and ELF output. */ +#define OBJECT_FORMAT_FLAT diff --git a/gcc-4.9/gcc/config/fr30/constraints.md b/gcc-4.9/gcc/config/fr30/constraints.md new file mode 100644 index 000000000..dc8fa77d1 --- /dev/null +++ b/gcc-4.9/gcc/config/fr30/constraints.md @@ -0,0 +1,71 @@ +;; Constraint definitions for the FR30. +;; Copyright (C) 2011-2014 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;; Register constraints. +(define_register_constraint "d" "MULTIPLY_64_REG" + "The MDH,MDL register pair as used by MUL and MULU.") + +(define_register_constraint "e" "MULTIPLY_32_REG" + "The MDL register as used by MULH and MULUH.") + +(define_register_constraint "h" "HIGH_REGS" + "Registers 8 through 15.") + +(define_register_constraint "l" "LOW_REGS" + "Registers 0 through 7.") + +(define_register_constraint "a" "ALL_REGS" + "@internal") + +;; Integer constraints. +(define_constraint "I" + "An integer in the range 0 to 15." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 15)"))) + +(define_constraint "J" + "An integer in the range -16 to -1." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -16, -1)"))) + +(define_constraint "K" + "An integer in the range 16 to 31." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 16, 31)"))) + +(define_constraint "L" + "An integer in the range 0 to 255." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 255)"))) + +(define_constraint "M" + "An integer in the range 0 to 1048575." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 1048575)"))) + +(define_constraint "P" + "An integer in the range -256 to 255." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, -256, 255)"))) + +;; Extra constraints. +(define_constraint "Q" + "@internal" + (and (match_code "mem") + (match_code "symbol_ref" "0"))) diff --git a/gcc-4.9/gcc/config/fr30/fr30-protos.h b/gcc-4.9/gcc/config/fr30/fr30-protos.h new file mode 100644 index 000000000..a2a7d793f --- /dev/null +++ b/gcc-4.9/gcc/config/fr30/fr30-protos.h @@ -0,0 +1,32 @@ +/* Prototypes for fr30.c functions used in the md file & elsewhere. + Copyright (C) 1999-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +extern void fr30_expand_prologue (void); +extern void fr30_expand_epilogue (void); +extern unsigned int fr30_compute_frame_size (int, int); + +#ifdef RTX_CODE +extern int fr30_check_multiple_regs (rtx *, int, int); +extern void fr30_print_operand (FILE *, rtx, int); +extern void fr30_print_operand_address (FILE *, rtx); +extern rtx fr30_move_double (rtx *); +#ifdef HAVE_MACHINE_MODES +extern int fr30_const_double_is_zero (rtx); +#endif /* HAVE_MACHINE_MODES */ +#endif /* RTX_CODE */ diff --git a/gcc-4.9/gcc/config/fr30/fr30.c b/gcc-4.9/gcc/config/fr30/fr30.c new file mode 100644 index 000000000..65084f69c --- /dev/null +++ b/gcc-4.9/gcc/config/fr30/fr30.c @@ -0,0 +1,1062 @@ +/* FR30 specific functions. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Cygnus Solutions. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/*{{{ Includes */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "tree.h" +#include "stor-layout.h" +#include "varasm.h" +#include "output.h" +#include "expr.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "df.h" +#include "diagnostic-core.h" +#include "tm_p.h" +#include "target.h" +#include "target-def.h" + +/*}}}*/ +/*{{{ Function Prologues & Epilogues */ + +/* The FR30 stack looks like this: + + Before call After call + FP ->| | | | + +-----------------------+ +-----------------------+ high + | | | | memory + | local variables, | | local variables, | + | reg save area, etc. | | reg save area, etc. | + | | | | + +-----------------------+ +-----------------------+ + | | | | + | args to the func that | | args to this func. | + | is being called that | | | + SP ->| do not fit in regs | | | + +-----------------------+ +-----------------------+ + | args that used to be | \ + | in regs; only created | | pretend_size + AP-> | for vararg funcs | / + +-----------------------+ + | | \ + | register save area | | + | | | + +-----------------------+ | reg_size + | return address | | + +-----------------------+ | + FP ->| previous frame ptr | / + +-----------------------+ + | | \ + | local variables | | var_size + | | / + +-----------------------+ + | | \ + low | room for args to | | + memory | other funcs called | | args_size + | from this one | | + SP ->| | / + +-----------------------+ + + Note, AP is a fake hard register. It will be eliminated in favor of + SP or FP as appropriate. + + Note, Some or all of the stack sections above may be omitted if they + are not needed. */ + +/* Structure to be filled in by fr30_compute_frame_size() with register + save masks, and offsets for the current function. */ +struct fr30_frame_info +{ + unsigned int total_size; /* # Bytes that the entire frame takes up. */ + unsigned int pretend_size; /* # Bytes we push and pretend caller did. */ + unsigned int args_size; /* # Bytes that outgoing arguments take up. */ + unsigned int reg_size; /* # Bytes needed to store regs. */ + unsigned int var_size; /* # Bytes that variables take up. */ + unsigned int frame_size; /* # Bytes in current frame. */ + unsigned int gmask; /* Mask of saved registers. */ + unsigned int save_fp; /* Nonzero if frame pointer must be saved. */ + unsigned int save_rp; /* Nonzero if return pointer must be saved. */ + int initialised; /* Nonzero if frame size already calculated. */ +}; + +/* Current frame information calculated by fr30_compute_frame_size(). */ +static struct fr30_frame_info current_frame_info; + +/* Zero structure to initialize current_frame_info. */ +static struct fr30_frame_info zero_frame_info; + +static void fr30_setup_incoming_varargs (cumulative_args_t, enum machine_mode, + tree, int *, int); +static bool fr30_must_pass_in_stack (enum machine_mode, const_tree); +static int fr30_arg_partial_bytes (cumulative_args_t, enum machine_mode, + tree, bool); +static rtx fr30_function_arg (cumulative_args_t, enum machine_mode, + const_tree, bool); +static void fr30_function_arg_advance (cumulative_args_t, enum machine_mode, + const_tree, bool); +static bool fr30_frame_pointer_required (void); +static rtx fr30_function_value (const_tree, const_tree, bool); +static rtx fr30_libcall_value (enum machine_mode, const_rtx); +static bool fr30_function_value_regno_p (const unsigned int); +static bool fr30_can_eliminate (const int, const int); +static void fr30_asm_trampoline_template (FILE *); +static void fr30_trampoline_init (rtx, tree, rtx); +static int fr30_num_arg_regs (enum machine_mode, const_tree); + +#define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM)) +#define RETURN_POINTER_MASK (1 << (RETURN_POINTER_REGNUM)) + +/* Tell prologue and epilogue if register REGNO should be saved / restored. + The return address and frame pointer are treated separately. + Don't consider them here. */ +#define MUST_SAVE_REGISTER(regno) \ + ( (regno) != RETURN_POINTER_REGNUM \ + && (regno) != FRAME_POINTER_REGNUM \ + && df_regs_ever_live_p (regno) \ + && ! call_used_regs [regno] ) + +#define MUST_SAVE_FRAME_POINTER (df_regs_ever_live_p (FRAME_POINTER_REGNUM) || frame_pointer_needed) +#define MUST_SAVE_RETURN_POINTER (df_regs_ever_live_p (RETURN_POINTER_REGNUM) || crtl->profile) + +#if UNITS_PER_WORD == 4 +#define WORD_ALIGN(SIZE) (((SIZE) + 3) & ~3) +#endif + +/* Initialize the GCC target structure. */ +#undef TARGET_ASM_ALIGNED_HI_OP +#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" +#undef TARGET_ASM_ALIGNED_SI_OP +#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" + +#undef TARGET_PROMOTE_PROTOTYPES +#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true +#undef TARGET_PASS_BY_REFERENCE +#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES fr30_arg_partial_bytes +#undef TARGET_FUNCTION_ARG +#define TARGET_FUNCTION_ARG fr30_function_arg +#undef TARGET_FUNCTION_ARG_ADVANCE +#define TARGET_FUNCTION_ARG_ADVANCE fr30_function_arg_advance + +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE fr30_function_value +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE fr30_libcall_value +#undef TARGET_FUNCTION_VALUE_REGNO_P +#define TARGET_FUNCTION_VALUE_REGNO_P fr30_function_value_regno_p + +#undef TARGET_SETUP_INCOMING_VARARGS +#define TARGET_SETUP_INCOMING_VARARGS fr30_setup_incoming_varargs +#undef TARGET_MUST_PASS_IN_STACK +#define TARGET_MUST_PASS_IN_STACK fr30_must_pass_in_stack + +#undef TARGET_FRAME_POINTER_REQUIRED +#define TARGET_FRAME_POINTER_REQUIRED fr30_frame_pointer_required + +#undef TARGET_CAN_ELIMINATE +#define TARGET_CAN_ELIMINATE fr30_can_eliminate + +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE +#define TARGET_ASM_TRAMPOLINE_TEMPLATE fr30_asm_trampoline_template +#undef TARGET_TRAMPOLINE_INIT +#define TARGET_TRAMPOLINE_INIT fr30_trampoline_init + +struct gcc_target targetm = TARGET_INITIALIZER; + + +/* Worker function for TARGET_CAN_ELIMINATE. */ + +bool +fr30_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +{ + return (to == FRAME_POINTER_REGNUM || ! frame_pointer_needed); +} + +/* Returns the number of bytes offset between FROM_REG and TO_REG + for the current function. As a side effect it fills in the + current_frame_info structure, if the data is available. */ +unsigned int +fr30_compute_frame_size (int from_reg, int to_reg) +{ + int regno; + unsigned int return_value; + unsigned int var_size; + unsigned int args_size; + unsigned int pretend_size; + unsigned int reg_size; + unsigned int gmask; + + var_size = WORD_ALIGN (get_frame_size ()); + args_size = WORD_ALIGN (crtl->outgoing_args_size); + pretend_size = crtl->args.pretend_args_size; + + reg_size = 0; + gmask = 0; + + /* Calculate space needed for registers. */ + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++) + { + if (MUST_SAVE_REGISTER (regno)) + { + reg_size += UNITS_PER_WORD; + gmask |= 1 << regno; + } + } + + current_frame_info.save_fp = MUST_SAVE_FRAME_POINTER; + current_frame_info.save_rp = MUST_SAVE_RETURN_POINTER; + + reg_size += (current_frame_info.save_fp + current_frame_info.save_rp) + * UNITS_PER_WORD; + + /* Save computed information. */ + current_frame_info.pretend_size = pretend_size; + current_frame_info.var_size = var_size; + current_frame_info.args_size = args_size; + current_frame_info.reg_size = reg_size; + current_frame_info.frame_size = args_size + var_size; + current_frame_info.total_size = args_size + var_size + reg_size + pretend_size; + current_frame_info.gmask = gmask; + current_frame_info.initialised = reload_completed; + + /* Calculate the required distance. */ + return_value = 0; + + if (to_reg == STACK_POINTER_REGNUM) + return_value += args_size + var_size; + + if (from_reg == ARG_POINTER_REGNUM) + return_value += reg_size; + + return return_value; +} + +/* Called after register allocation to add any instructions needed for the + prologue. Using a prologue insn is favored compared to putting all of the + instructions in output_function_prologue(), since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. */ + +void +fr30_expand_prologue (void) +{ + int regno; + rtx insn; + + if (! current_frame_info.initialised) + fr30_compute_frame_size (0, 0); + + /* This cases shouldn't happen. Catch it now. */ + gcc_assert (current_frame_info.total_size || !current_frame_info.gmask); + + /* Allocate space for register arguments if this is a variadic function. */ + if (current_frame_info.pretend_size) + { + int regs_to_save = current_frame_info.pretend_size / UNITS_PER_WORD; + + /* Push argument registers into the pretend arg area. */ + for (regno = FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS; regno --, regs_to_save --;) + { + insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + + if (current_frame_info.gmask) + { + /* Save any needed call-saved regs. */ + for (regno = STACK_POINTER_REGNUM; regno--;) + { + if ((current_frame_info.gmask & (1 << regno)) != 0) + { + insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, regno))); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + + /* Save return address if necessary. */ + if (current_frame_info.save_rp) + { + insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, + RETURN_POINTER_REGNUM))); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Save old frame pointer and create new one, if necessary. */ + if (current_frame_info.save_fp) + { + if (current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD)) + { + int enter_size = current_frame_info.frame_size + UNITS_PER_WORD; + rtx pattern; + + insn = emit_insn (gen_enter_func (GEN_INT (enter_size))); + RTX_FRAME_RELATED_P (insn) = 1; + + pattern = PATTERN (insn); + + /* Also mark all 3 subexpressions as RTX_FRAME_RELATED_P. */ + if (GET_CODE (pattern) == PARALLEL) + { + int x; + for (x = XVECLEN (pattern, 0); x--;) + { + rtx part = XVECEXP (pattern, 0, x); + + /* One of the insns in the ENTER pattern updates the + frame pointer. If we do not actually need the frame + pointer in this function then this is a side effect + rather than a desired effect, so we do not mark that + insn as being related to the frame set up. Doing this + allows us to compile the crash66.C test file in the + G++ testsuite. */ + if (! frame_pointer_needed + && GET_CODE (part) == SET + && SET_DEST (part) == hard_frame_pointer_rtx) + RTX_FRAME_RELATED_P (part) = 0; + else + RTX_FRAME_RELATED_P (part) = 1; + } + } + } + else + { + insn = emit_insn (gen_movsi_push (frame_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + if (frame_pointer_needed) + { + insn = emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + } + + /* Allocate the stack frame. */ + if (current_frame_info.frame_size == 0) + ; /* Nothing to do. */ + else if (current_frame_info.save_fp + && current_frame_info.frame_size < ((1 << 10) - UNITS_PER_WORD)) + ; /* Nothing to do. */ + else if (current_frame_info.frame_size <= 512) + { + insn = emit_insn (gen_add_to_stack + (GEN_INT (- (signed) current_frame_info.frame_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM); + insn = emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size))); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (crtl->profile) + emit_insn (gen_blockage ()); +} + +/* Called after register allocation to add any instructions needed for the + epilogue. Using an epilogue insn is favored compared to putting all of the + instructions in output_function_epilogue(), since it allows the scheduler + to intermix instructions with the restores of the caller saved registers. + In some cases, it might be necessary to emit a barrier instruction as the + first insn to prevent such scheduling. */ +void +fr30_expand_epilogue (void) +{ + int regno; + + /* Perform the inversion operations of the prologue. */ + gcc_assert (current_frame_info.initialised); + + /* Pop local variables and arguments off the stack. + If frame_pointer_needed is TRUE then the frame pointer register + has actually been used as a frame pointer, and we can recover + the stack pointer from it, otherwise we must unwind the stack + manually. */ + if (current_frame_info.frame_size > 0) + { + if (current_frame_info.save_fp && frame_pointer_needed) + { + emit_insn (gen_leave_func ()); + current_frame_info.save_fp = 0; + } + else if (current_frame_info.frame_size <= 508) + emit_insn (gen_add_to_stack + (GEN_INT (current_frame_info.frame_size))); + else + { + rtx tmp = gen_rtx_REG (Pmode, PROLOGUE_TMP_REGNUM); + emit_insn (gen_movsi (tmp, GEN_INT (current_frame_info.frame_size))); + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp)); + } + } + + if (current_frame_info.save_fp) + emit_insn (gen_movsi_pop (frame_pointer_rtx)); + + /* Pop all the registers that were pushed. */ + if (current_frame_info.save_rp) + emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, RETURN_POINTER_REGNUM))); + + for (regno = 0; regno < STACK_POINTER_REGNUM; regno ++) + if (current_frame_info.gmask & (1 << regno)) + emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno))); + + if (current_frame_info.pretend_size) + emit_insn (gen_add_to_stack (GEN_INT (current_frame_info.pretend_size))); + + /* Reset state info for each function. */ + current_frame_info = zero_frame_info; + + emit_jump_insn (gen_return_from_func ()); +} + +/* Do any needed setup for a variadic function. We must create a register + parameter block, and then copy any anonymous arguments, plus the last + named argument, from registers into memory. * copying actually done in + fr30_expand_prologue(). + + ARG_REGS_USED_SO_FAR has *not* been updated for the last named argument + which has type TYPE and mode MODE, and we rely on this fact. */ +void +fr30_setup_incoming_varargs (cumulative_args_t arg_regs_used_so_far_v, + enum machine_mode mode, + tree type ATTRIBUTE_UNUSED, + int *pretend_size, + int second_time ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *arg_regs_used_so_far + = get_cumulative_args (arg_regs_used_so_far_v); + int size; + + /* All BLKmode values are passed by reference. */ + gcc_assert (mode != BLKmode); + + /* ??? This run-time test as well as the code inside the if + statement is probably unnecessary. */ + if (targetm.calls.strict_argument_naming (arg_regs_used_so_far_v)) + /* If TARGET_STRICT_ARGUMENT_NAMING returns true, then the last named + arg must not be treated as an anonymous arg. */ + /* ??? This is a pointer increment, which makes no sense. */ + arg_regs_used_so_far += fr30_num_arg_regs (mode, type); + + size = FR30_NUM_ARG_REGS - (* arg_regs_used_so_far); + + if (size <= 0) + return; + + * pretend_size = (size * UNITS_PER_WORD); +} + +/*}}}*/ +/*{{{ Printing operands */ + +/* Print a memory address as an operand to reference that memory location. */ + +void +fr30_print_operand_address (FILE *stream, rtx address) +{ + switch (GET_CODE (address)) + { + case SYMBOL_REF: + output_addr_const (stream, address); + break; + + default: + fprintf (stderr, "code = %x\n", GET_CODE (address)); + debug_rtx (address); + output_operand_lossage ("fr30_print_operand_address: unhandled address"); + break; + } +} + +/* Print an operand. */ + +void +fr30_print_operand (FILE *file, rtx x, int code) +{ + rtx x0; + + switch (code) + { + case '#': + /* Output a :D if this instruction is delayed. */ + if (dbr_sequence_length () != 0) + fputs (":D", file); + return; + + case 'p': + /* Compute the register name of the second register in a hi/lo + register pair. */ + if (GET_CODE (x) != REG) + output_operand_lossage ("fr30_print_operand: unrecognized %%p code"); + else + fprintf (file, "r%d", REGNO (x) + 1); + return; + + case 'b': + /* Convert GCC's comparison operators into FR30 comparison codes. */ + switch (GET_CODE (x)) + { + case EQ: fprintf (file, "eq"); break; + case NE: fprintf (file, "ne"); break; + case LT: fprintf (file, "lt"); break; + case LE: fprintf (file, "le"); break; + case GT: fprintf (file, "gt"); break; + case GE: fprintf (file, "ge"); break; + case LTU: fprintf (file, "c"); break; + case LEU: fprintf (file, "ls"); break; + case GTU: fprintf (file, "hi"); break; + case GEU: fprintf (file, "nc"); break; + default: + output_operand_lossage ("fr30_print_operand: unrecognized %%b code"); + break; + } + return; + + case 'B': + /* Convert GCC's comparison operators into the complimentary FR30 + comparison codes. */ + switch (GET_CODE (x)) + { + case EQ: fprintf (file, "ne"); break; + case NE: fprintf (file, "eq"); break; + case LT: fprintf (file, "ge"); break; + case LE: fprintf (file, "gt"); break; + case GT: fprintf (file, "le"); break; + case GE: fprintf (file, "lt"); break; + case LTU: fprintf (file, "nc"); break; + case LEU: fprintf (file, "hi"); break; + case GTU: fprintf (file, "ls"); break; + case GEU: fprintf (file, "c"); break; + default: + output_operand_lossage ("fr30_print_operand: unrecognized %%B code"); + break; + } + return; + + case 'A': + /* Print a signed byte value as an unsigned value. */ + if (GET_CODE (x) != CONST_INT) + output_operand_lossage ("fr30_print_operand: invalid operand to %%A code"); + else + { + HOST_WIDE_INT val; + + val = INTVAL (x); + + val &= 0xff; + + fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); + } + return; + + case 'x': + if (GET_CODE (x) != CONST_INT + || INTVAL (x) < 16 + || INTVAL (x) > 32) + output_operand_lossage ("fr30_print_operand: invalid %%x code"); + else + fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) - 16); + return; + + case 'F': + if (GET_CODE (x) != CONST_DOUBLE) + output_operand_lossage ("fr30_print_operand: invalid %%F code"); + else + { + char str[30]; + + real_to_decimal (str, CONST_DOUBLE_REAL_VALUE (x), + sizeof (str), 0, 1); + fputs (str, file); + } + return; + + case 0: + /* Handled below. */ + break; + + default: + fprintf (stderr, "unknown code = %x\n", code); + output_operand_lossage ("fr30_print_operand: unknown code"); + return; + } + + switch (GET_CODE (x)) + { + case REG: + fputs (reg_names [REGNO (x)], file); + break; + + case MEM: + x0 = XEXP (x,0); + + switch (GET_CODE (x0)) + { + case REG: + gcc_assert ((unsigned) REGNO (x0) < ARRAY_SIZE (reg_names)); + fprintf (file, "@%s", reg_names [REGNO (x0)]); + break; + + case PLUS: + if (GET_CODE (XEXP (x0, 0)) != REG + || REGNO (XEXP (x0, 0)) < FRAME_POINTER_REGNUM + || REGNO (XEXP (x0, 0)) > STACK_POINTER_REGNUM + || GET_CODE (XEXP (x0, 1)) != CONST_INT) + { + fprintf (stderr, "bad INDEXed address:"); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + } + else if (REGNO (XEXP (x0, 0)) == FRAME_POINTER_REGNUM) + { + HOST_WIDE_INT val = INTVAL (XEXP (x0, 1)); + if (val < -(1 << 9) || val > ((1 << 9) - 4)) + { + fprintf (stderr, "frame INDEX out of range:"); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + } + fprintf (file, "@(r14, #" HOST_WIDE_INT_PRINT_DEC ")", val); + } + else + { + HOST_WIDE_INT val = INTVAL (XEXP (x0, 1)); + if (val < 0 || val > ((1 << 6) - 4)) + { + fprintf (stderr, "stack INDEX out of range:"); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + } + fprintf (file, "@(r15, #" HOST_WIDE_INT_PRINT_DEC ")", val); + } + break; + + case SYMBOL_REF: + output_address (x0); + break; + + default: + fprintf (stderr, "bad MEM code = %x\n", GET_CODE (x0)); + debug_rtx (x); + output_operand_lossage ("fr30_print_operand: unhandled MEM"); + break; + } + break; + + case CONST_DOUBLE : + /* We handle SFmode constants here as output_addr_const doesn't. */ + if (GET_MODE (x) == SFmode) + { + REAL_VALUE_TYPE d; + long l; + + REAL_VALUE_FROM_CONST_DOUBLE (d, x); + REAL_VALUE_TO_TARGET_SINGLE (d, l); + fprintf (file, "0x%08lx", l); + break; + } + + /* Fall through. Let output_addr_const deal with it. */ + default: + output_addr_const (file, x); + break; + } + + return; +} + +/*}}}*/ + +/* Implements TARGET_FUNCTION_VALUE. */ + +static rtx +fr30_function_value (const_tree valtype, + const_tree fntype_or_decli ATTRIBUTE_UNUSED, + bool outgoing ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (TYPE_MODE (valtype), RETURN_VALUE_REGNUM); +} + +/* Implements TARGET_LIBCALL_VALUE. */ + +static rtx +fr30_libcall_value (enum machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) +{ + return gen_rtx_REG (mode, RETURN_VALUE_REGNUM); +} + +/* Implements TARGET_FUNCTION_VALUE_REGNO_P. */ + +static bool +fr30_function_value_regno_p (const unsigned int regno) +{ + return (regno == RETURN_VALUE_REGNUM); +} + +/*{{{ Function arguments */ + +/* Return true if we should pass an argument on the stack rather than + in registers. */ + +static bool +fr30_must_pass_in_stack (enum machine_mode mode, const_tree type) +{ + if (mode == BLKmode) + return true; + if (type == NULL) + return false; + return AGGREGATE_TYPE_P (type); +} + +/* Compute the number of word sized registers needed to hold a + function argument of mode INT_MODE and tree type TYPE. */ +static int +fr30_num_arg_regs (enum machine_mode mode, const_tree type) +{ + int size; + + if (targetm.calls.must_pass_in_stack (mode, type)) + return 0; + + if (type && mode == BLKmode) + size = int_size_in_bytes (type); + else + size = GET_MODE_SIZE (mode); + + return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; +} + +/* Returns the number of bytes in which *part* of a parameter of machine + mode MODE and tree type TYPE (which may be NULL if the type is not known). + If the argument fits entirely in the argument registers, or entirely on + the stack, then 0 is returned. + CUM is the number of argument registers already used by earlier + parameters to the function. */ + +static int +fr30_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode, + tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + /* Unnamed arguments, i.e. those that are prototyped as ... + are always passed on the stack. + Also check here to see if all the argument registers are full. */ + if (named == 0 || *cum >= FR30_NUM_ARG_REGS) + return 0; + + /* Work out how many argument registers would be needed if this + parameter were to be passed entirely in registers. If there + are sufficient argument registers available (or if no registers + are needed because the parameter must be passed on the stack) + then return zero, as this parameter does not require partial + register, partial stack stack space. */ + if (*cum + fr30_num_arg_regs (mode, type) <= FR30_NUM_ARG_REGS) + return 0; + + return (FR30_NUM_ARG_REGS - *cum) * UNITS_PER_WORD; +} + +static rtx +fr30_function_arg (cumulative_args_t cum_v, enum machine_mode mode, + const_tree type, bool named) +{ + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + if (!named + || fr30_must_pass_in_stack (mode, type) + || *cum >= FR30_NUM_ARG_REGS) + return NULL_RTX; + else + return gen_rtx_REG (mode, *cum + FIRST_ARG_REGNUM); +} + +/* A C statement (sans semicolon) to update the summarizer variable CUM to + advance past an argument in the argument list. The values MODE, TYPE and + NAMED describe that argument. Once this is done, the variable CUM is + suitable for analyzing the *following* argument with `FUNCTION_ARG', etc. + + This macro need not do anything if the argument in question was passed on + the stack. The compiler knows how to track the amount of stack space used + for arguments without any special help. */ +static void +fr30_function_arg_advance (cumulative_args_t cum, enum machine_mode mode, + const_tree type, bool named) +{ + *get_cumulative_args (cum) += named * fr30_num_arg_regs (mode, type); +} + +/*}}}*/ +/*{{{ Operand predicates */ + +#ifndef Mmode +#define Mmode enum machine_mode +#endif + +/* Returns true iff all the registers in the operands array + are in descending or ascending order. */ +int +fr30_check_multiple_regs (rtx *operands, int num_operands, int descending) +{ + if (descending) + { + unsigned int prev_regno = 0; + + while (num_operands --) + { + if (GET_CODE (operands [num_operands]) != REG) + return 0; + + if (REGNO (operands [num_operands]) < prev_regno) + return 0; + + prev_regno = REGNO (operands [num_operands]); + } + } + else + { + unsigned int prev_regno = CONDITION_CODE_REGNUM; + + while (num_operands --) + { + if (GET_CODE (operands [num_operands]) != REG) + return 0; + + if (REGNO (operands [num_operands]) > prev_regno) + return 0; + + prev_regno = REGNO (operands [num_operands]); + } + } + + return 1; +} + +int +fr30_const_double_is_zero (rtx operand) +{ + REAL_VALUE_TYPE d; + + if (operand == NULL || GET_CODE (operand) != CONST_DOUBLE) + return 0; + + REAL_VALUE_FROM_CONST_DOUBLE (d, operand); + + return REAL_VALUES_EQUAL (d, dconst0); +} + +/*}}}*/ +/*{{{ Instruction Output Routines */ + +/* Output a double word move. + It must be REG<-REG, REG<-MEM, MEM<-REG or REG<-CONST. + On the FR30 we are constrained by the fact that it does not + support offsetable addresses, and so we have to load the + address of the secnd word into the second destination register + before we can use it. */ + +rtx +fr30_move_double (rtx * operands) +{ + rtx src = operands[1]; + rtx dest = operands[0]; + enum rtx_code src_code = GET_CODE (src); + enum rtx_code dest_code = GET_CODE (dest); + enum machine_mode mode = GET_MODE (dest); + rtx val; + + start_sequence (); + + if (dest_code == REG) + { + if (src_code == REG) + { + int reverse = (REGNO (dest) == REGNO (src) + 1); + + /* We normally copy the low-numbered register first. However, if + the first register of operand 0 is the same as the second register + of operand 1, we must copy in the opposite order. */ + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, reverse, TRUE, mode), + operand_subword (src, reverse, TRUE, mode))); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, !reverse, TRUE, mode), + operand_subword (src, !reverse, TRUE, mode))); + } + else if (src_code == MEM) + { + rtx addr = XEXP (src, 0); + rtx dest0 = operand_subword (dest, 0, TRUE, mode); + rtx dest1 = operand_subword (dest, 1, TRUE, mode); + rtx new_mem; + + gcc_assert (GET_CODE (addr) == REG); + + /* Copy the address before clobbering it. See PR 34174. */ + emit_insn (gen_rtx_SET (SImode, dest1, addr)); + emit_insn (gen_rtx_SET (VOIDmode, dest0, + adjust_address (src, SImode, 0))); + emit_insn (gen_rtx_SET (SImode, dest1, + plus_constant (SImode, dest1, + UNITS_PER_WORD))); + + new_mem = gen_rtx_MEM (SImode, dest1); + MEM_COPY_ATTRIBUTES (new_mem, src); + + emit_insn (gen_rtx_SET (VOIDmode, dest1, new_mem)); + } + else if (src_code == CONST_INT || src_code == CONST_DOUBLE) + { + rtx words[2]; + split_double (src, &words[0], &words[1]); + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 0, TRUE, mode), + words[0])); + + emit_insn (gen_rtx_SET (VOIDmode, + operand_subword (dest, 1, TRUE, mode), + words[1])); + } + } + else if (src_code == REG && dest_code == MEM) + { + rtx addr = XEXP (dest, 0); + rtx src0; + rtx src1; + + gcc_assert (GET_CODE (addr) == REG); + + src0 = operand_subword (src, 0, TRUE, mode); + src1 = operand_subword (src, 1, TRUE, mode); + + emit_move_insn (adjust_address (dest, SImode, 0), src0); + + if (REGNO (addr) == STACK_POINTER_REGNUM + || REGNO (addr) == FRAME_POINTER_REGNUM) + emit_insn (gen_rtx_SET (VOIDmode, + adjust_address (dest, SImode, UNITS_PER_WORD), + src1)); + else + { + rtx new_mem; + rtx scratch_reg_r0 = gen_rtx_REG (SImode, 0); + + /* We need a scratch register to hold the value of 'address + 4'. + We use r0 for this purpose. It is used for example for long + jumps and is already marked to not be used by normal register + allocation. */ + emit_insn (gen_movsi_internal (scratch_reg_r0, addr)); + emit_insn (gen_addsi_small_int (scratch_reg_r0, scratch_reg_r0, + GEN_INT (UNITS_PER_WORD))); + new_mem = gen_rtx_MEM (SImode, scratch_reg_r0); + MEM_COPY_ATTRIBUTES (new_mem, dest); + emit_move_insn (new_mem, src1); + emit_insn (gen_blockage ()); + } + } + else + /* This should have been prevented by the constraints on movdi_insn. */ + gcc_unreachable (); + + val = get_insns (); + end_sequence (); + + return val; +} + +/* Implement TARGET_FRAME_POINTER_REQUIRED. */ + +bool +fr30_frame_pointer_required (void) +{ + return (flag_omit_frame_pointer == 0 || crtl->args.pretend_args_size > 0); +} + +/*}}}*/ +/*{{{ Trampoline Output Routines */ + +/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE. + On the FR30, the trampoline is: + + nop + ldi:32 STATIC, r12 + nop + ldi:32 FUNCTION, r0 + jmp @r0 + + The no-ops are to guarantee that the static chain and final + target are 32 bit aligned within the trampoline. That allows us to + initialize those locations with simple SImode stores. The alternative + would be to use HImode stores. */ + +static void +fr30_asm_trampoline_template (FILE *f) +{ + fprintf (f, "\tnop\n"); + fprintf (f, "\tldi:32\t#0, %s\n", reg_names [STATIC_CHAIN_REGNUM]); + fprintf (f, "\tnop\n"); + fprintf (f, "\tldi:32\t#0, %s\n", reg_names [COMPILER_SCRATCH_REGISTER]); + fprintf (f, "\tjmp\t@%s\n", reg_names [COMPILER_SCRATCH_REGISTER]); +} + +/* Implement TARGET_TRAMPOLINE_INIT. */ + +static void +fr30_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +{ + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx mem; + + emit_block_move (m_tramp, assemble_trampoline_template (), + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); + + mem = adjust_address (m_tramp, SImode, 4); + emit_move_insn (mem, chain_value); + mem = adjust_address (m_tramp, SImode, 12); + emit_move_insn (mem, fnaddr); +} + +/*}}}*/ +/* Local Variables: */ +/* folded-file: t */ +/* End: */ diff --git a/gcc-4.9/gcc/config/fr30/fr30.h b/gcc-4.9/gcc/config/fr30/fr30.h new file mode 100644 index 000000000..ff3115af6 --- /dev/null +++ b/gcc-4.9/gcc/config/fr30/fr30.h @@ -0,0 +1,845 @@ +/*{{{ Comment. */ + +/* Definitions of FR30 target. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + Contributed by Cygnus Solutions. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/*}}}*/ +/*{{{ Run-time target specifications. */ + +#undef ASM_SPEC +#define ASM_SPEC "" + +/* Define this to be a string constant containing `-D' options to define the + predefined macros that identify this machine and system. These macros will + be predefined unless the `-ansi' option is specified. */ + +#define TARGET_CPU_CPP_BUILTINS() \ + do \ + { \ + builtin_define_std ("fr30"); \ + builtin_assert ("machine=fr30"); \ + } \ + while (0) + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC "crt0.o%s crti.o%s crtbegin.o%s" + +/* Include the OS stub library, so that the code can be simulated. + This is not the right way to do this. Ideally this kind of thing + should be done in the linker script - but I have not worked out how + to specify the location of a linker script in a gcc command line yet... */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC "%{!mno-lsim:-lsim} crtend.o%s crtn.o%s" + +#undef LIB_SPEC +#define LIB_SPEC "-lc" + +#undef LINK_SPEC +#define LINK_SPEC "%{h*} %{v:-V} \ + %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}" + +/*}}}*/ +/*{{{ Storage Layout. */ + +#define BITS_BIG_ENDIAN 1 + +#define BYTES_BIG_ENDIAN 1 + +#define WORDS_BIG_ENDIAN 1 + +#define UNITS_PER_WORD 4 + +#define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE) \ + do \ + { \ + if (GET_MODE_CLASS (MODE) == MODE_INT \ + && GET_MODE_SIZE (MODE) < 4) \ + (MODE) = SImode; \ + } \ + while (0) + +#define PARM_BOUNDARY 32 + +#define STACK_BOUNDARY 32 + +#define FUNCTION_BOUNDARY 32 + +#define BIGGEST_ALIGNMENT 32 + +#define DATA_ALIGNMENT(TYPE, ALIGN) \ + (TREE_CODE (TYPE) == ARRAY_TYPE \ + && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + (TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) + +#define STRICT_ALIGNMENT 1 + +#define PCC_BITFIELD_TYPE_MATTERS 1 + +/*}}}*/ +/*{{{ Layout of Source Language Data Types. */ + +#define SHORT_TYPE_SIZE 16 +#define INT_TYPE_SIZE 32 +#define LONG_TYPE_SIZE 32 +#define LONG_LONG_TYPE_SIZE 64 +#define FLOAT_TYPE_SIZE 32 +#define DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 64 + +#define DEFAULT_SIGNED_CHAR 1 + +#undef SIZE_TYPE +#define SIZE_TYPE "unsigned int" + +#undef PTRDIFF_TYPE +#define PTRDIFF_TYPE "int" + +#undef WCHAR_TYPE +#define WCHAR_TYPE "long int" + +#undef WCHAR_TYPE_SIZE +#define WCHAR_TYPE_SIZE BITS_PER_WORD + +/*}}}*/ +/*{{{ REGISTER BASICS. */ + +/* Number of hardware registers known to the compiler. They receive numbers 0 + through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number + really is assigned the number `FIRST_PSEUDO_REGISTER'. */ +#define FIRST_PSEUDO_REGISTER 21 + +/* Fixed register assignments: */ + +/* Here we do a BAD THING - reserve a register for use by the machine + description file. There are too many places in compiler where it + assumes that it can issue a branch or jump instruction without + providing a scratch register for it, and reload just cannot cope, so + we keep a register back for these situations. */ +#define COMPILER_SCRATCH_REGISTER 0 + +/* The register that contains the result of a function call. */ +#define RETURN_VALUE_REGNUM 4 + +/* The first register that can contain the arguments to a function. */ +#define FIRST_ARG_REGNUM 4 + +/* A call-used register that can be used during the function prologue. */ +#define PROLOGUE_TMP_REGNUM COMPILER_SCRATCH_REGISTER + +/* Register numbers used for passing a function's static chain pointer. If + register windows are used, the register number as seen by the called + function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as + seen by the calling function is `STATIC_CHAIN_REGNUM'. If these registers + are the same, `STATIC_CHAIN_INCOMING_REGNUM' need not be defined. + + The static chain register need not be a fixed register. + + If the static chain is passed in memory, these macros should not be defined; + instead, the next two macros should be defined. */ +#define STATIC_CHAIN_REGNUM 12 +/* #define STATIC_CHAIN_INCOMING_REGNUM */ + +/* An FR30 specific hardware register. */ +#define ACCUMULATOR_REGNUM 13 + +/* The register number of the frame pointer register, which is used to access + automatic variables in the stack frame. On some machines, the hardware + determines which register this is. On other machines, you can choose any + register you wish for this purpose. */ +#define FRAME_POINTER_REGNUM 14 + +/* The register number of the stack pointer register, which must also be a + fixed register according to `FIXED_REGISTERS'. On most machines, the + hardware determines which register this is. */ +#define STACK_POINTER_REGNUM 15 + +/* The following a fake hard registers that describe some of the dedicated + registers on the FR30. */ +#define CONDITION_CODE_REGNUM 16 +#define RETURN_POINTER_REGNUM 17 +#define MD_HIGH_REGNUM 18 +#define MD_LOW_REGNUM 19 + +/* An initializer that says which registers are used for fixed purposes all + throughout the compiled code and are therefore not available for general + allocation. These would include the stack pointer, the frame pointer + (except on machines where that can be used as a general register when no + frame pointer is needed), the program counter on machines where that is + considered one of the addressable registers, and any other numbered register + with a standard use. + + This information is expressed as a sequence of numbers, separated by commas + and surrounded by braces. The Nth number is 1 if register N is fixed, 0 + otherwise. + + The table initialized from this macro, and the table initialized by the + following one, may be overridden at run time either automatically, by the + actions of the macro `TARGET_CONDITIONAL_REGISTER_USAGE', or by the user + with the command options `-ffixed-REG', `-fcall-used-REG' and + `-fcall-saved-REG'. */ +#define FIXED_REGISTERS \ + { 1, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ \ + 0, 0, 0, 0, 0, 0, 0, 1, /* 8 - 15 */ \ + 1, 1, 1, 1, 1 } /* 16 - 20 */ + +/* XXX - MDL and MDH set as fixed for now - this is until I can get the + mul patterns working. */ + +/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in + general) by function calls as well as for fixed registers. This macro + therefore identifies the registers that are not available for general + allocation of values that must live across function calls. + + If a register has 0 in `CALL_USED_REGISTERS', the compiler automatically + saves it on function entry and restores it on function exit, if the register + is used within the function. */ +#define CALL_USED_REGISTERS \ + { 1, 1, 1, 1, 1, 1, 1, 1, /* 0 - 7 */ \ + 0, 0, 0, 0, 1, 1, 0, 1, /* 8 - 15 */ \ + 1, 1, 1, 1, 1 } /* 16 - 20 */ + +/* A C initializer containing the assembler's names for the machine registers, + each one as a C string constant. This is what translates register numbers + in the compiler into assembler language. */ +#define REGISTER_NAMES \ +{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \ + "r8", "r9", "r10", "r11", "r12", "ac", "fp", "sp", \ + "cc", "rp", "mdh", "mdl", "ap" \ +} + +/* If defined, a C initializer for an array of structures containing a name and + a register number. This macro defines additional names for hard registers, + thus allowing the `asm' option in declarations to refer to registers using + alternate names. */ +#define ADDITIONAL_REGISTER_NAMES \ +{ \ + {"r13", 13}, {"r14", 14}, {"r15", 15}, {"usp", 15}, {"ps", 16}\ +} + +/*}}}*/ +/*{{{ How Values Fit in Registers. */ + +/* A C expression for the number of consecutive hard registers, starting at + register number REGNO, required to hold a value of mode MODE. */ + +#define HARD_REGNO_NREGS(REGNO, MODE) \ + ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) + +/* A C expression that is nonzero if it is permissible to store a value of mode + MODE in hard register number REGNO (or in several registers starting with + that one). */ + +#define HARD_REGNO_MODE_OK(REGNO, MODE) 1 + +/* A C expression that is nonzero if it is desirable to choose register + allocation so as to avoid move instructions between a value of mode MODE1 + and a value of mode MODE2. + + If `HARD_REGNO_MODE_OK (R, MODE1)' and `HARD_REGNO_MODE_OK (R, MODE2)' are + ever different for any R, then `MODES_TIEABLE_P (MODE1, MODE2)' must be + zero. */ +#define MODES_TIEABLE_P(MODE1, MODE2) 1 + +/*}}}*/ +/*{{{ Register Classes. */ + +/* An enumeral type that must be defined with all the register class names as + enumeral values. `NO_REGS' must be first. `ALL_REGS' must be the last + register class, followed by one more enumeral value, `LIM_REG_CLASSES', + which is not a register class but rather tells how many classes there are. + + Each register class has a number, which is the value of casting the class + name to type `int'. The number serves as an index in many of the tables + described below. */ +enum reg_class +{ + NO_REGS, + MULTIPLY_32_REG, /* the MDL register as used by the MULH, MULUH insns */ + MULTIPLY_64_REG, /* the MDH,MDL register pair as used by MUL and MULU */ + LOW_REGS, /* registers 0 through 7 */ + HIGH_REGS, /* registers 8 through 15 */ + REAL_REGS, /* i.e. all the general hardware registers on the FR30 */ + ALL_REGS, + LIM_REG_CLASSES +}; + +#define GENERAL_REGS REAL_REGS +#define N_REG_CLASSES ((int) LIM_REG_CLASSES) + +/* An initializer containing the names of the register classes as C string + constants. These names are used in writing some of the debugging dumps. */ +#define REG_CLASS_NAMES \ +{ \ + "NO_REGS", \ + "MULTIPLY_32_REG", \ + "MULTIPLY_64_REG", \ + "LOW_REGS", \ + "HIGH_REGS", \ + "REAL_REGS", \ + "ALL_REGS" \ + } + +/* An initializer containing the contents of the register classes, as integers + which are bit masks. The Nth integer specifies the contents of class N. + The way the integer MASK is interpreted is that register R is in the class + if `MASK & (1 << R)' is 1. + + When the machine has more than 32 registers, an integer does not suffice. + Then the integers are replaced by sub-initializers, braced groupings + containing several integers. Each sub-initializer must be suitable as an + initializer for the type `HARD_REG_SET' which is defined in + `hard-reg-set.h'. */ +#define REG_CLASS_CONTENTS \ +{ \ + { 0 }, \ + { 1 << MD_LOW_REGNUM }, \ + { (1 << MD_LOW_REGNUM) | (1 << MD_HIGH_REGNUM) }, \ + { (1 << 8) - 1 }, \ + { ((1 << 8) - 1) << 8 }, \ + { (1 << CONDITION_CODE_REGNUM) - 1 }, \ + { (1 << FIRST_PSEUDO_REGISTER) - 1 } \ +} + +/* A C expression whose value is a register class containing hard register + REGNO. In general there is more than one such class; choose a class which + is "minimal", meaning that no smaller class also contains the register. */ +#define REGNO_REG_CLASS(REGNO) \ + ( (REGNO) < 8 ? LOW_REGS \ + : (REGNO) < CONDITION_CODE_REGNUM ? HIGH_REGS \ + : (REGNO) == MD_LOW_REGNUM ? MULTIPLY_32_REG \ + : (REGNO) == MD_HIGH_REGNUM ? MULTIPLY_64_REG \ + : ALL_REGS) + +/* A macro whose definition is the name of the class to which a valid base + register must belong. A base register is one used in an address which is + the register value plus a displacement. */ +#define BASE_REG_CLASS REAL_REGS + +/* A macro whose definition is the name of the class to which a valid index + register must belong. An index register is one used in an address where its + value is either multiplied by a scale factor or added to another register + (as well as added to a displacement). */ +#define INDEX_REG_CLASS REAL_REGS + +/* A C expression which is nonzero if register number NUM is suitable for use + as a base register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard register. */ +#define REGNO_OK_FOR_BASE_P(NUM) 1 + +/* A C expression which is nonzero if register number NUM is suitable for use + as an index register in operand addresses. It may be either a suitable hard + register or a pseudo register that has been allocated such a hard register. + + The difference between an index register and a base register is that the + index register may be scaled. If an address involves the sum of two + registers, neither one of them scaled, then either one may be labeled the + "base" and the other the "index"; but whichever labeling is used must fit + the machine's constraints of which registers may serve in each capacity. + The compiler will try both labelings, looking for one that is valid, and + will reload one or both registers only if neither labeling works. */ +#define REGNO_OK_FOR_INDEX_P(NUM) 1 + +/* A C expression for the maximum number of consecutive registers of + class CLASS needed to hold a value of mode MODE. + + This is closely related to the macro `HARD_REGNO_NREGS'. In fact, the value + of the macro `CLASS_MAX_NREGS (CLASS, MODE)' should be the maximum value of + `HARD_REGNO_NREGS (REGNO, MODE)' for all REGNO values in the class CLASS. + + This macro helps control the handling of multiple-word values in + the reload pass. */ +#define CLASS_MAX_NREGS(CLASS, MODE) HARD_REGNO_NREGS (0, MODE) + +/*}}}*/ +/*{{{ Basic Stack Layout. */ + +/* Define this macro if pushing a word onto the stack moves the stack pointer + to a smaller address. */ +#define STACK_GROWS_DOWNWARD 1 + +/* Define this to macro nonzero if the addresses of local variable slots + are at negative offsets from the frame pointer. */ +#define FRAME_GROWS_DOWNWARD 1 + +/* Offset from the frame pointer to the first local variable slot to be + allocated. + + If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the + first slot's length from `STARTING_FRAME_OFFSET'. Otherwise, it is found by + adding the length of the first slot to the value `STARTING_FRAME_OFFSET'. */ +/* #define STARTING_FRAME_OFFSET -4 */ +#define STARTING_FRAME_OFFSET 0 + +/* Offset from the stack pointer register to the first location at which + outgoing arguments are placed. If not specified, the default value of zero + is used. This is the proper value for most machines. + + If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first + location at which outgoing arguments are placed. */ +#define STACK_POINTER_OFFSET 0 + +/* Offset from the argument pointer register to the first argument's address. + On some machines it may depend on the data type of the function. + + If `ARGS_GROW_DOWNWARD', this is the offset to the location above the first + argument's address. */ +#define FIRST_PARM_OFFSET(FUNDECL) 0 + +/* A C expression whose value is RTL representing the location of the incoming + return address at the beginning of any function, before the prologue. This + RTL is either a `REG', indicating that the return value is saved in `REG', + or a `MEM' representing a location in the stack. + + You only need to define this macro if you want to support call frame + debugging information like that provided by DWARF 2. */ +#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM) + +/*}}}*/ +/*{{{ Register That Address the Stack Frame. */ + +/* The register number of the arg pointer register, which is used to access the + function's argument list. On some machines, this is the same as the frame + pointer register. On some machines, the hardware determines which register + this is. On other machines, you can choose any register you wish for this + purpose. If this is not the same register as the frame pointer register, + then you must mark it as a fixed register according to `FIXED_REGISTERS', or + arrange to be able to eliminate it. */ +#define ARG_POINTER_REGNUM 20 + +/*}}}*/ +/*{{{ Eliminating the Frame Pointer and the Arg Pointer. */ + +/* If defined, this macro specifies a table of register pairs used to eliminate + unneeded registers that point into the stack frame. If it is not defined, + the only elimination attempted by the compiler is to replace references to + the frame pointer with references to the stack pointer. + + The definition of this macro is a list of structure initializations, each of + which specifies an original and replacement register. + + On some machines, the position of the argument pointer is not known until + the compilation is completed. In such a case, a separate hard register must + be used for the argument pointer. This register can be eliminated by + replacing it with either the frame pointer or the argument pointer, + depending on whether or not the frame pointer has been eliminated. + + In this case, you might specify: + #define ELIMINABLE_REGS \ + {{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}} + + Note that the elimination of the argument pointer with the stack pointer is + specified first since that is the preferred elimination. */ + +#define ELIMINABLE_REGS \ +{ \ + {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \ + {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}, \ + {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM} \ +} + +/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'. It specifies the + initial difference between the specified pair of registers. This macro must + be defined if `ELIMINABLE_REGS' is defined. */ +#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ + (OFFSET) = fr30_compute_frame_size (FROM, TO) + +/*}}}*/ +/*{{{ Passing Function Arguments on the Stack. */ + +/* If defined, the maximum amount of space required for outgoing arguments will + be computed and placed into the variable + `crtl->outgoing_args_size'. No space will be pushed onto the + stack for each call; instead, the function prologue should increase the + stack frame size by this amount. + + Defining both `PUSH_ROUNDING' and `ACCUMULATE_OUTGOING_ARGS' is not + proper. */ +#define ACCUMULATE_OUTGOING_ARGS 1 + +/*}}}*/ +/*{{{ Function Arguments in Registers. */ + +/* The number of register assigned to holding function arguments. */ + +#define FR30_NUM_ARG_REGS 4 + +/* A C type for declaring a variable that is used as the first argument of + `FUNCTION_ARG' and other related values. For some target machines, the type + `int' suffices and can hold the number of bytes of argument so far. + + There is no need to record in `CUMULATIVE_ARGS' anything about the arguments + that have been passed on the stack. The compiler has other variables to + keep track of that. For target machines on which all arguments are passed + on the stack, there is no need to store anything in `CUMULATIVE_ARGS'; + however, the data structure must exist and should not be empty, so use + `int'. */ +/* On the FR30 this value is an accumulating count of the number of argument + registers that have been filled with argument values, as opposed to say, + the number of bytes of argument accumulated so far. */ +#define CUMULATIVE_ARGS int + +/* A C statement (sans semicolon) for initializing the variable CUM for the + state at the beginning of the argument list. The variable has type + `CUMULATIVE_ARGS'. The value of FNTYPE is the tree node for the data type + of the function which will receive the args, or 0 if the args are to a + compiler support library function. The value of INDIRECT is nonzero when + processing an indirect call, for example a call through a function pointer. + The value of INDIRECT is zero for a call to an explicitly named function, a + library function call, or when `INIT_CUMULATIVE_ARGS' is used to find + arguments for the function being compiled. + + When processing a call to a compiler support library function, LIBNAME + identifies which one. It is a `symbol_ref' rtx which contains the name of + the function, as a string. LIBNAME is 0 when an ordinary C function call is + being processed. Thus, each time this macro is called, either LIBNAME or + FNTYPE is nonzero, but never both of them at once. */ +#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ + (CUM) = 0 + +/* A C expression that is nonzero if REGNO is the number of a hard register in + which function arguments are sometimes passed. This does *not* include + implicit arguments such as the static chain and the structure-value address. + On many machines, no registers can be used for this purpose since all + function arguments are pushed on the stack. */ +#define FUNCTION_ARG_REGNO_P(REGNO) \ + ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) < FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS)) + +/*}}}*/ +/*{{{ How Large Values are Returned. */ + +/* Define this macro to be 1 if all structure and union return values must be + in memory. Since this results in slower code, this should be defined only + if needed for compatibility with other compilers or with an ABI. If you + define this macro to be 0, then the conventions used for structure and union + return values are decided by the `TARGET_RETURN_IN_MEMORY' macro. + + If not defined, this defaults to the value 1. */ +#define DEFAULT_PCC_STRUCT_RETURN 1 + +/*}}}*/ +/*{{{ Generating Code for Profiling. */ + +/* A C statement or compound statement to output to FILE some assembler code to + call the profiling subroutine `mcount'. Before calling, the assembler code + must load the address of a counter variable into a register where `mcount' + expects to find the address. The name of this variable is `LP' followed by + the number LABELNO, so you would generate the name using `LP%d' in a + `fprintf'. + + The details of how the address should be passed to `mcount' are determined + by your operating system environment, not by GCC. To figure them out, + compile a small program for profiling using the system's installed C + compiler and look at the assembler code that results. */ +#define FUNCTION_PROFILER(FILE, LABELNO) \ +{ \ + fprintf (FILE, "\t mov rp, r1\n" ); \ + fprintf (FILE, "\t ldi:32 mcount, r0\n" ); \ + fprintf (FILE, "\t call @r0\n" ); \ + fprintf (FILE, ".word\tLP%d\n", LABELNO); \ +} + +/*}}}*/ +/*{{{ Trampolines for Nested Functions. */ + +/* A C expression for the size in bytes of the trampoline, as an integer. */ +#define TRAMPOLINE_SIZE 18 + +/* We want the trampoline to be aligned on a 32bit boundary so that we can + make sure the location of the static chain & target function within + the trampoline is also aligned on a 32bit boundary. */ +#define TRAMPOLINE_ALIGNMENT 32 + +/*}}}*/ +/*{{{ Addressing Modes. */ + +/* A number, the maximum number of registers that can appear in a valid memory + address. Note that it is up to you to specify a value equal to the maximum + number that `GO_IF_LEGITIMATE_ADDRESS' would ever accept. */ +#define MAX_REGS_PER_ADDRESS 1 + +/* A C compound statement with a conditional `goto LABEL;' executed if X (an + RTX) is a legitimate memory address on the target machine for a memory + operand of mode MODE. */ + +/* On the FR30 we only have one real addressing mode - an address in a + register. There are three special cases however: + + * indexed addressing using small positive offsets from the stack pointer + + * indexed addressing using small signed offsets from the frame pointer + + * register plus register addressing using R13 as the base register. + + At the moment we only support the first two of these special cases. */ + +#ifdef REG_OK_STRICT +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ + do \ + { \ + if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 << 6) - 4)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 << 9) - 4)) \ + goto LABEL; \ + } \ + while (0) +#else +#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \ + do \ + { \ + if (GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && REGNO (XEXP (X, 0)) == STACK_POINTER_REGNUM \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), 0, (1 << 6) - 4)) \ + goto LABEL; \ + if (GET_CODE (X) == PLUS \ + && ((MODE) == SImode || (MODE) == SFmode) \ + && GET_CODE (XEXP (X, 0)) == REG \ + && (REGNO (XEXP (X, 0)) == FRAME_POINTER_REGNUM \ + || REGNO (XEXP (X, 0)) == ARG_POINTER_REGNUM) \ + && GET_CODE (XEXP (X, 1)) == CONST_INT \ + && IN_RANGE (INTVAL (XEXP (X, 1)), -(1 << 9), (1 << 9) - 4)) \ + goto LABEL; \ + } \ + while (0) +#endif + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as a base register. For hard registers, it should always accept those + which the hardware permits and reject the others. Whether the macro accepts + or rejects pseudo registers must be controlled by `REG_OK_STRICT' as + described above. This usually requires two variant definitions, of which + `REG_OK_STRICT' controls the one actually used. */ +#ifdef REG_OK_STRICT +#define REG_OK_FOR_BASE_P(X) (((unsigned) REGNO (X)) <= STACK_POINTER_REGNUM) +#else +#define REG_OK_FOR_BASE_P(X) 1 +#endif + +/* A C expression that is nonzero if X (assumed to be a `reg' RTX) is valid for + use as an index register. + + The difference between an index register and a base register is that the + index register may be scaled. If an address involves the sum of two + registers, neither one of them scaled, then either one may be labeled the + "base" and the other the "index"; but whichever labeling is used must fit + the machine's constraints of which registers may serve in each capacity. + The compiler will try both labelings, looking for one that is valid, and + will reload one or both registers only if neither labeling works. */ +#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X) + +/*}}}*/ +/*{{{ Describing Relative Costs of Operations */ + +/* Define this macro as a C expression which is nonzero if accessing less than + a word of memory (i.e. a `char' or a `short') is no faster than accessing a + word of memory, i.e., if such access require more than one instruction or if + there is no difference in cost between byte and (aligned) word loads. + + When this macro is not defined, the compiler will access a field by finding + the smallest containing object; when it is defined, a fullword load will be + used if alignment permits. Unless bytes accesses are faster than word + accesses, using word accesses is preferable since it may eliminate + subsequent memory access if subsequent accesses occur to other fields in the + same word of the structure, but to different bytes. */ +#define SLOW_BYTE_ACCESS 1 + +/*}}}*/ +/*{{{ Dividing the output into sections. */ + +/* A C expression whose value is a string containing the assembler operation + that should precede instructions and read-only data. Normally `".text"' is + right. */ +#define TEXT_SECTION_ASM_OP "\t.text" + +/* A C expression whose value is a string containing the assembler operation to + identify the following data as writable initialized data. Normally + `".data"' is right. */ +#define DATA_SECTION_ASM_OP "\t.data" + +#define BSS_SECTION_ASM_OP "\t.section .bss" + +/*}}}*/ +/*{{{ The Overall Framework of an Assembler File. */ + +/* A C string constant describing how to begin a comment in the target + assembler language. The compiler assumes that the comment will end at the + end of the line. */ +#define ASM_COMMENT_START ";" + +/* A C string constant for text to be output before each `asm' statement or + group of consecutive ones. Normally this is `"#APP"', which is a comment + that has no effect on most assemblers but tells the GNU assembler that it + must check the lines that follow for all valid assembler constructs. */ +#define ASM_APP_ON "#APP\n" + +/* A C string constant for text to be output after each `asm' statement or + group of consecutive ones. Normally this is `"#NO_APP"', which tells the + GNU assembler to resume making the time-saving assumptions that are valid + for ordinary compiler output. */ +#define ASM_APP_OFF "#NO_APP\n" + +/*}}}*/ +/*{{{ Output and Generation of Labels. */ + +/* Globalizing directive for a label. */ +#define GLOBAL_ASM_OP "\t.globl " + +/*}}}*/ +/*{{{ Output of Assembler Instructions. */ + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand X. X is an RTL expression. + + CODE is a value that can be used to specify one of several ways of printing + the operand. It is used when identical operands must be printed differently + depending on the context. CODE comes from the `%' specification that was + used to request printing of the operand. If the specification was just + `%DIGIT' then CODE is 0; if the specification was `%LTR DIGIT' then CODE is + the ASCII code for LTR. + + If X is a register, this macro should print the register's name. The names + can be found in an array `reg_names' whose type is `char *[]'. `reg_names' + is initialized from `REGISTER_NAMES'. + + When the machine description has a specification `%PUNCT' (a `%' followed by + a punctuation character), this macro is called with a null pointer for X and + the punctuation character for CODE. */ +#define PRINT_OPERAND(STREAM, X, CODE) fr30_print_operand (STREAM, X, CODE) + +/* A C expression which evaluates to true if CODE is a valid punctuation + character for use in the `PRINT_OPERAND' macro. If + `PRINT_OPERAND_PUNCT_VALID_P' is not defined, it means that no punctuation + characters (except for the standard one, `%') are used in this way. */ +#define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#') + +/* A C compound statement to output to stdio stream STREAM the assembler syntax + for an instruction operand that is a memory reference whose address is X. X + is an RTL expression. */ + +#define PRINT_OPERAND_ADDRESS(STREAM, X) fr30_print_operand_address (STREAM, X) + +#define REGISTER_PREFIX "%" +#define LOCAL_LABEL_PREFIX "." +#define USER_LABEL_PREFIX "" +#define IMMEDIATE_PREFIX "" + +/*}}}*/ +/*{{{ Output of Dispatch Tables. */ + +/* This macro should be provided on machines where the addresses in a dispatch + table are relative to the table's own address. + + The definition should be a C statement to output to the stdio stream STREAM + an assembler pseudo-instruction to generate a difference between two labels. + VALUE and REL are the numbers of two internal labels. The definitions of + these labels are output using `(*targetm.asm_out.internal_label)', and they must be + printed in the same way here. For example, + + fprintf (STREAM, "\t.word L%d-L%d\n", VALUE, REL) */ +#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \ +fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL) + +/* This macro should be provided on machines where the addresses in a dispatch + table are absolute. + + The definition should be a C statement to output to the stdio stream STREAM + an assembler pseudo-instruction to generate a reference to a label. VALUE + is the number of an internal label whose definition is output using + `(*targetm.asm_out.internal_label)'. For example, + + fprintf (STREAM, "\t.word L%d\n", VALUE) */ +#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \ +fprintf (STREAM, "\t.word .L%d\n", VALUE) + +/*}}}*/ +/*{{{ Assembler Commands for Alignment. */ + +/* A C statement to output to the stdio stream STREAM an assembler command to + advance the location counter to a multiple of 2 to the POWER bytes. POWER + will be a C expression of type `int'. */ +#define ASM_OUTPUT_ALIGN(STREAM, POWER) \ + fprintf ((STREAM), "\t.p2align %d\n", (POWER)) + +/*}}}*/ +/*{{{ Miscellaneous Parameters. */ + +/* An alias for a machine mode name. This is the machine mode that elements of + a jump-table should have. */ +#define CASE_VECTOR_MODE SImode + +/* The maximum number of bytes that a single instruction can move quickly from + memory to memory. */ +#define MOVE_MAX 8 + +/* A C expression which is nonzero if on this machine it is safe to "convert" + an integer of INPREC bits to one of OUTPREC bits (where OUTPREC is smaller + than INPREC) by merely operating on it as if it had only OUTPREC bits. + + On many machines, this expression can be 1. + + When `TRULY_NOOP_TRUNCATION' returns 1 for a pair of sizes for modes for + which `MODES_TIEABLE_P' is 0, suboptimal code can result. If this is the + case, making `TRULY_NOOP_TRUNCATION' return 0 in such cases may improve + things. */ +#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 + +/* An alias for the machine mode for pointers. On most machines, define this + to be the integer mode corresponding to the width of a hardware pointer; + `SImode' on 32-bit machine or `DImode' on 64-bit machines. On some machines + you must define this to be one of the partial integer modes, such as + `PSImode'. + + The width of `Pmode' must be at least as large as the value of + `POINTER_SIZE'. If it is not equal, you must define the macro + `POINTERS_EXTEND_UNSIGNED' to specify how pointers are extended to `Pmode'. */ +#define Pmode SImode + +/* An alias for the machine mode used for memory references to functions being + called, in `call' RTL expressions. On most machines this should be + `QImode'. */ +#define FUNCTION_MODE QImode + +/*}}}*/ + +/* Local Variables: */ +/* folded-file: t */ +/* End: */ diff --git a/gcc-4.9/gcc/config/fr30/fr30.md b/gcc-4.9/gcc/config/fr30/fr30.md new file mode 100644 index 000000000..8bbd77c69 --- /dev/null +++ b/gcc-4.9/gcc/config/fr30/fr30.md @@ -0,0 +1,1267 @@ +;; FR30 machine description. +;; Copyright (C) 1998-2014 Free Software Foundation, Inc. +;; Contributed by Cygnus Solutions. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. + +;;{{{ Attributes + +(define_attr "length" "" (const_int 2)) + +;; Used to distinguish between small memory model targets and big mode targets. + +(define_attr "size" "small,big" + (const (if_then_else (symbol_ref "TARGET_SMALL_MODEL") + (const_string "small") + (const_string "big")))) + + +;; Define an attribute to be used by the delay slot code. +;; An instruction by default is considered to be 'delayable' +;; that is, it can be placed into a delay slot, but it is not +;; itself a delayed branch type instruction. An instruction +;; whose type is 'delayed' is one which has a delay slot, and +;; an instruction whose delay_type is 'other' is one which does +;; not have a delay slot, nor can it be placed into a delay slot. + +(define_attr "delay_type" "delayable,delayed,other" (const_string "delayable")) + +;;}}} +;;{{{ Delay Slot Specifications + +(define_delay (eq_attr "delay_type" "delayed") + [(and (eq_attr "delay_type" "delayable") + (eq_attr "length" "2")) + (nil) + (nil)] +) + +(include "predicates.md") +(include "constraints.md") + +;;}}} +;;{{{ Moves + +;;{{{ Comment + +;; Wrap moves in define_expand to prevent memory->memory moves from being +;; generated at the RTL level, which generates better code for most machines +;; which can't do mem->mem moves. + +;; If operand 0 is a `subreg' with mode M of a register whose own mode is wider +;; than M, the effect of this instruction is to store the specified value in +;; the part of the register that corresponds to mode M. The effect on the rest +;; of the register is undefined. + +;; This class of patterns is special in several ways. First of all, each of +;; these names *must* be defined, because there is no other way to copy a datum +;; from one place to another. + +;; Second, these patterns are not used solely in the RTL generation pass. Even +;; the reload pass can generate move insns to copy values from stack slots into +;; temporary registers. When it does so, one of the operands is a hard +;; register and the other is an operand that can need to be reloaded into a +;; register. + +;; Therefore, when given such a pair of operands, the pattern must +;; generate RTL which needs no reloading and needs no temporary +;; registers--no registers other than the operands. For example, if +;; you support the pattern with a `define_expand', then in such a +;; case the `define_expand' mustn't call `force_reg' or any other such +;; function which might generate new pseudo registers. + +;; This requirement exists even for subword modes on a RISC machine +;; where fetching those modes from memory normally requires several +;; insns and some temporary registers. Look in `spur.md' to see how +;; the requirement can be satisfied. + +;; During reload a memory reference with an invalid address may be passed as an +;; operand. Such an address will be replaced with a valid address later in the +;; reload pass. In this case, nothing may be done with the address except to +;; use it as it stands. If it is copied, it will not be replaced with a valid +;; address. No attempt should be made to make such an address into a valid +;; address and no routine (such as `change_address') that will do so may be +;; called. Note that `general_operand' will fail when applied to such an +;; address. +;; +;; The global variable `reload_in_progress' (which must be explicitly declared +;; if required) can be used to determine whether such special handling is +;; required. +;; +;; The variety of operands that have reloads depends on the rest of +;; the machine description, but typically on a RISC machine these can +;; only be pseudo registers that did not get hard registers, while on +;; other machines explicit memory references will get optional +;; reloads. +;; +;; If a scratch register is required to move an object to or from memory, it +;; can be allocated using `gen_reg_rtx' prior to reload. But this is +;; impossible during and after reload. If there are cases needing scratch +;; registers after reload, you must define `SECONDARY_INPUT_RELOAD_CLASS' and +;; perhaps also `SECONDARY_OUTPUT_RELOAD_CLASS' to detect them, and provide +;; patterns `reload_inM' or `reload_outM' to handle them. + +;; The constraints on a `moveM' must permit moving any hard register to any +;; other hard register provided that `HARD_REGNO_MODE_OK' permits mode M in +;; both registers and `REGISTER_MOVE_COST' applied to their classes returns a +;; value of 2. + +;; It is obligatory to support floating point `moveM' instructions +;; into and out of any registers that can hold fixed point values, +;; because unions and structures (which have modes `SImode' or +;; `DImode') can be in those registers and they may have floating +;; point members. + +;; There may also be a need to support fixed point `moveM' instructions in and +;; out of floating point registers. Unfortunately, I have forgotten why this +;; was so, and I don't know whether it is still true. If `HARD_REGNO_MODE_OK' +;; rejects fixed point values in floating point registers, then the constraints +;; of the fixed point `moveM' instructions must be designed to avoid ever +;; trying to reload into a floating point register. + +;;}}} +;;{{{ Push and Pop + +;; Push a register onto the stack +(define_insn "movsi_push" + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "register_operand" "a"))] + "" + "st %0, @-r15" +) + +;; Pop a register off the stack +(define_insn "movsi_pop" + [(set (match_operand:SI 0 "register_operand" "=a") + (mem:SI (post_inc:SI (reg:SI 15))))] + "" + "ld @r15+, %0" +) + +;;}}} +;;{{{ 1 Byte Moves + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + " +{ + if (!reload_in_progress + && !reload_completed + && GET_CODE (operands[0]) == MEM + && (GET_CODE (operands[1]) == MEM + || immediate_operand (operands[1], QImode))) + operands[1] = copy_to_mode_reg (QImode, operands[1]); +}") + +(define_insn "movqi_unsigned_register_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "memory_operand" "m")))] + "" + "ldub %1, %0" +) + +(define_expand "movqi_signed_register_load" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "memory_operand" "")))] + "" + " + emit_insn (gen_movqi_unsigned_register_load (operands[0], operands[1])); + emit_insn (gen_extendqisi2 (operands[0], operands[0])); + DONE; + " +) + +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,red,m,r") + (match_operand:QI 1 "general_operand" "i,red,r,rm"))] + "" + "@ + ldi:8\\t#%A1, %0 + mov \\t%1, %0 + stb \\t%1, %0 + ldub \\t%1, %0" +) + +;;}}} +;;{{{ 2 Byte Moves + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + " +{ + if (!reload_in_progress + && !reload_completed + && GET_CODE (operands[0]) == MEM + && (GET_CODE (operands[1]) == MEM + || immediate_operand (operands[1], HImode))) + operands[1] = copy_to_mode_reg (HImode, operands[1]); +}") + +(define_insn "movhi_unsigned_register_load" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))] + "" + "lduh %1, %0" +) + +(define_expand "movhi_signed_register_load" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "memory_operand" "")))] + "" + " + emit_insn (gen_movhi_unsigned_register_load (operands[0], operands[1])); + emit_insn (gen_extendhisi2 (operands[0], operands[0])); + DONE; + " +) + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,red,m,r") + (match_operand:HI 1 "general_operand" "L,M,n,red,r,rm"))] + "" + "@ + ldi:8 \\t#%1, %0 + ldi:20\\t#%1, %0 + ldi:32\\t#%1, %0 + mov \\t%1, %0 + sth \\t%1, %0 + lduh \\t%1, %0" + [(set_attr "length" "*,4,6,*,*,*")] +) + +;;}}} +;;{{{ 4 Byte Moves + +;; If the destination is a MEM and the source is a +;; MEM or an CONST_INT move the source into a register. +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + "{ + if (!reload_in_progress + && !reload_completed + && GET_CODE(operands[0]) == MEM + && (GET_CODE (operands[1]) == MEM + || immediate_operand (operands[1], SImode))) + operands[1] = copy_to_mode_reg (SImode, operands[1]); + }" +) + +;; We can do some clever tricks when loading certain immediate +;; values. We implement these tricks as define_splits, rather +;; than putting the code into the define_expand "movsi" above, +;; because if we put them there, they will be evaluated at RTL +;; generation time and then the combiner pass will come along +;; and replace the multiple insns that have been generated with +;; the original, slower, load insns. (The combiner pass only +;; cares about reducing the number of instructions, it does not +;; care about instruction lengths or speeds). Splits are +;; evaluated after the combine pass and before the scheduling +;; passes, so that they are the perfect place to put this +;; intelligence. +;; +;; XXX we probably ought to implement these for QI and HI mode +;; loads as well. + +;; If we are loading a small negative constant we can save space +;; and time by loading the positive value and then sign extending it. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "INTVAL (operands[1]) <= -1 && INTVAL (operands[1]) >= -128 + && (GET_CODE (operands[0]) != SUBREG + || SCALAR_INT_MODE_P (GET_MODE (XEXP (operands[0], 0))))" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] + "{ + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + operands[2] = gen_lowpart (QImode, operands[0]); + }" +) + +;; If we are loading a large negative constant, one which does +;; not have any of its bottom 24 bit set, then we can save time +;; and space by loading the byte value and shifting it into place. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "(INTVAL (operands[1]) < 0) && ((INTVAL (operands[1]) & 0x00ffffff) == 0)" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (const_int 24))) + (clobber (reg:CC 16))])] + "{ + HOST_WIDE_INT val = INTVAL (operands[1]); + operands[2] = GEN_INT (val >> 24); + }" +) + +;; If we are loading a large positive constant, one which has bits +;; in the top byte set, but whose set bits all lie within an 8 bit +;; range, then we can save time and space by loading the byte value +;; and shifting it into place. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "const_int_operand" ""))] + "(INTVAL (operands[1]) > 0x00ffffff) + && ((INTVAL (operands[1]) >> exact_log2 (INTVAL (operands[1]) & (- INTVAL (operands[1])))) < 0x100)" + [(set (match_dup 0) (match_dup 2)) + (parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC 16))])] + "{ + HOST_WIDE_INT val = INTVAL (operands[1]); + int shift = exact_log2 (val & ( - val)); + operands[2] = GEN_INT (val >> shift); + operands[3] = GEN_INT (shift); + }" +) + +;; When TARGET_SMALL_MODEL is defined we assume that all symbolic +;; values are addresses which will fit in 20 bits. + +(define_insn "movsi_internal" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,red,V,r,m") + (match_operand:SI 1 "general_operand" "L,M,n,i,rde,r,rm,r"))] + "" + "* + { + switch (which_alternative) + { + case 0: return \"ldi:8 \\t#%1, %0\"; + case 1: return \"ldi:20\\t#%1, %0\"; + case 2: return \"ldi:32\\t#%1, %0\"; + case 3: if (TARGET_SMALL_MODEL) + return \"ldi:20\\t%1, %0\"; + else + return \"ldi:32\\t%1, %0\"; + case 4: return \"mov \\t%1, %0\"; + case 5: return \"st \\t%1, %0\"; + case 6: return \"ld \\t%1, %0\"; + case 7: return \"st \\t%1, %0\"; + default: gcc_unreachable (); + } + }" + [(set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 4) + (eq_attr "alternative" "2") (const_int 6) + (eq_attr "alternative" "3") + (if_then_else (eq_attr "size" "small") + (const_int 4) + (const_int 6))] + (const_int 2)))] +) + +;;}}} +;;{{{ 8 Byte Moves + +;; Note - the FR30 does not have an 8 byte load/store instruction +;; but we have to support this pattern because some other patterns +;; (e.g. muldisi2) can produce a DImode result. +;; (This code is stolen from the M32R port.) + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + " + /* Everything except mem = const or mem = mem can be done easily. */ + + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (DImode, operands[1]); + " +) + +;; We use an insn and a split so that we can generate +;; RTL rather than text from fr30_move_double(). + +(define_insn "*movdi_insn" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,m,r") + (match_operand:DI 1 "di_operand" "r,m,r,nF"))] + "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)" + "#" + [(set_attr "length" "4,8,12,12")] +) + +(define_split + [(set (match_operand:DI 0 "nonimmediate_di_operand" "") + (match_operand:DI 1 "di_operand" ""))] + "reload_completed" + [(match_dup 2)] + "operands[2] = fr30_move_double (operands);" +) + +;;}}} +;;{{{ Load & Store Multiple Registers + +;; The load multiple and store multiple patterns are implemented +;; as peepholes because the only time they are expected to occur +;; is during function prologues and epilogues. + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 3 "high_register_operand" "h"))] + "fr30_check_multiple_regs (operands, 4, 1)" + "stm1 (%0, %1, %2, %3)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "high_register_operand" "h"))] + "fr30_check_multiple_regs (operands, 3, 1)" + "stm1 (%0, %1, %2)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "high_register_operand" "h")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "high_register_operand" "h"))] + "fr30_check_multiple_regs (operands, 2, 1)" + "stm1 (%0, %1)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (match_operand:SI 0 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 1 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 2 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 3 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15))))] + "fr30_check_multiple_regs (operands, 4, 0)" + "ldm1 (%0, %1, %2, %3)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (match_operand:SI 0 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 1 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 2 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15))))] + "fr30_check_multiple_regs (operands, 3, 0)" + "ldm1 (%0, %1, %2)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (match_operand:SI 0 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15)))) + (set (match_operand:SI 1 "high_register_operand" "h") + (mem:SI (post_inc:SI (reg:SI 15))))] + "fr30_check_multiple_regs (operands, 2, 0)" + "ldm1 (%0, %1)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 3 "low_register_operand" "l"))] + "fr30_check_multiple_regs (operands, 4, 1)" + "stm0 (%0, %1, %2, %3)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 2 "low_register_operand" "l"))] + "fr30_check_multiple_regs (operands, 3, 1)" + "stm0 (%0, %1, %2)" + [(set_attr "delay_type" "other")] +) + +(define_peephole + [(set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 0 "low_register_operand" "l")) + (set (mem:SI (pre_dec:SI (reg:SI 15))) + (match_operand:SI 1 "low_register_operand" "l"))] + "fr30_check_multiple_regs (operands, 2, 1)" + "stm0 (%0, %1)" + [(set_attr "delay_type" "other")] +) + +;;}}} +;;{{{ Floating Point Moves + +;; Note - Patterns for SF mode moves are compulsory, but +;; patterns for DF are optional, as GCC can synthesize them. + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "{ + if (!reload_in_progress && !reload_completed + && memory_operand (operands[0], SFmode) + && memory_operand (operands[1], SFmode)) + operands[1] = copy_to_mode_reg (SFmode, operands[1]); + }" +) + +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,red,m,r") + (match_operand:SF 1 "general_operand" "Fn,i,rde,r,rm"))] + "" + "* + { + switch (which_alternative) + { + case 0: return \"ldi:32\\t%1, %0\"; + case 1: if (TARGET_SMALL_MODEL) + return \"ldi:20\\t%1, %0\"; + else + return \"ldi:32\\t%1, %0\"; + case 2: return \"mov \\t%1, %0\"; + case 3: return \"st \\t%1, %0\"; + case 4: return \"ld \\t%1, %0\"; + default: gcc_unreachable (); + } + }" + [(set (attr "length") (cond [(eq_attr "alternative" "0") (const_int 6) + (eq_attr "alternative" "1") + (if_then_else (eq_attr "size" "small") + (const_int 4) + (const_int 6))] + (const_int 2)))] +) + +(define_insn "*movsf_constant_store" + [(set (match_operand:SF 0 "memory_operand" "=m") + (match_operand:SF 1 "immediate_operand" "F"))] + "" + "* + { + const char * ldi_instr; + const char * tmp_reg; + static char buffer[100]; + + ldi_instr = fr30_const_double_is_zero (operands[1]) ? \"ldi:8\" : \"ldi:32\"; + + tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER]; + + sprintf (buffer, \"%s\\t#%%1, %s\\t;\\n\\tst\\t%s, %%0\\t; Created by movsf_constant_store\", + ldi_instr, tmp_reg, tmp_reg); + + return buffer; + }" + [(set_attr "length" "8")] +) + +;;}}} + +;;}}} +;;{{{ Conversions + +;; Signed conversions from a smaller integer to a larger integer + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "register_operand" "0")))] + "" + "extsb %0" +) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:HI 1 "register_operand" "0")))] + "" + "extsh %0" +) + +;; Unsigned conversions from a smaller integer to a larger integer + +(define_insn "zero_extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "register_operand" "0")))] + "" + "extub %0" +) + +(define_insn "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))] + "" + "extuh %0" +) + +;;}}} +;;{{{ Arithmetic + +;;{{{ Addition + +;; This is a special pattern just for adjusting the stack size. +(define_insn "add_to_stack" + [(set (reg:SI 15) + (plus:SI (reg:SI 15) + (match_operand:SI 0 "stack_add_operand" "i")))] + "" + "addsp %0" +) + +;; We need some trickery to be able to handle the addition of +;; large (i.e. outside +/- 16) constants. We need to be able to +;; handle this because reload assumes that it can generate add +;; instructions with arbitrary sized constants. +(define_expand "addsi3" + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))] + "" + "{ + if ( GET_CODE (operands[2]) == REG + || GET_CODE (operands[2]) == SUBREG) + emit_insn (gen_addsi_regs (operands[0], operands[1], operands[2])); + else if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2])); + else if (INTVAL (operands[2]) >= -16 + && INTVAL (operands[2]) <= 15 + && (!REG_P (operands[1]) + || !REGNO_PTR_FRAME_P (REGNO (operands[1])) + || REGNO (operands[1]) == STACK_POINTER_REGNUM)) + emit_insn (gen_addsi_small_int (operands[0], operands[1], operands[2])); + else + emit_insn (gen_addsi_big_int (operands[0], operands[1], operands[2])); + DONE; + }" +) + +(define_insn "addsi_regs" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "%0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "addn %2, %0" +) + +;; Do not allow an eliminable register in the source register. It +;; might be eliminated in favor of the stack pointer, probably +;; increasing the offset, and so rendering the instruction illegal. +(define_insn "addsi_small_int" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "add_immediate_operand" "I,J")))] + "!REG_P (operands[1]) + || !REGNO_PTR_FRAME_P (REGNO (operands[1])) + || REGNO (operands[1]) == STACK_POINTER_REGNUM" + "@ + addn %2, %0 + addn2 %2, %0" +) + +(define_expand "addsi_big_int" + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "")))] + "" + "{ + /* Cope with the possibility that ops 0 and 1 are the same register. */ + if (rtx_equal_p (operands[0], operands[1])) + { + if (reload_in_progress || reload_completed) + { + rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/); + + emit_insn (gen_movsi (reg, operands[2])); + emit_insn (gen_addsi_regs (operands[0], operands[0], reg)); + } + else + { + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_addsi_regs (operands[0], operands[0], operands[2])); + } + } + else + { + emit_insn (gen_movsi (operands[0], operands[2])); + emit_insn (gen_addsi_regs (operands[0], operands[0], operands[1])); + } + DONE; + }" +) + +(define_insn "*addsi_for_reload" + [(set (match_operand:SI 0 "register_operand" "=&r,r,r") + (plus:SI (match_operand:SI 1 "register_operand" "r,r,r") + (match_operand:SI 2 "immediate_operand" "L,M,n")))] + "reload_in_progress || reload_completed" + "@ + ldi:8\\t#%2, %0 \\n\\taddn\\t%1, %0 + ldi:20\\t#%2, %0 \\n\\taddn\\t%1, %0 + ldi:32\\t#%2, %0 \\n\\taddn\\t%1, %0" + [(set_attr "length" "4,6,8")] +) + +;;}}} +;;{{{ Subtraction + +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "r")))] + "" + "subn %2, %0" +) + +;;}}} +;;{{{ Multiplication + +;; Signed multiplication producing 64-bit results from 32-bit inputs +(define_insn "mulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "mul %2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0" + [(set_attr "length" "6")] +) + +;; Unsigned multiplication producing 64-bit results from 32-bit inputs +(define_insn "umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%r")) + (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "mulu %2, %1\\n\\tmov\\tmdh, %0\\n\\tmov\\tmdl, %p0" + [(set_attr "length" "6")] +) + +;; Signed multiplication producing 32-bit result from 16-bit inputs +(define_insn "mulhisi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "%r")) + (sign_extend:SI (match_operand:HI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "mulh %2, %1\\n\\tmov\\tmdl, %0" + [(set_attr "length" "4")] +) + +;; Unsigned multiplication producing 32-bit result from 16-bit inputs +(define_insn "umulhisi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%r")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "r")))) + (clobber (reg:CC 16))] + "" + "muluh %2, %1\\n\\tmov\\tmdl, %0" + [(set_attr "length" "4")] +) + +;; Signed multiplication producing 32-bit result from 32-bit inputs +(define_insn "mulsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:CC 16))] + "" + "mul %2, %1\\n\\tmov\\tmdl, %0" + [(set_attr "length" "4")] +) + +;;}}} +;;}}} +;;{{{ Shifts + +;; Arithmetic Shift Left +(define_insn "ashlsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (ashift:SI (match_operand:SI 1 "register_operand" "0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,K"))) + (clobber (reg:CC 16))] + "" + "@ + lsl %2, %0 + lsl %2, %0 + lsl2 %x2, %0" +) + +;; Arithmetic Shift Right +(define_insn "ashrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,K"))) + (clobber (reg:CC 16))] + "" + "@ + asr %2, %0 + asr %2, %0 + asr2 %x2, %0" +) + +;; Logical Shift Right +(define_insn "lshrsi3" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I,K"))) + (clobber (reg:CC 16))] + "" + "@ + lsr %2, %0 + lsr %2, %0 + lsr2 %x2, %0" +) + +;;}}} +;;{{{ Logical Operations + +;; Logical AND, 32-bit integers +(define_insn "andsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "0"))) + (clobber (reg:CC 16))] + "" + "and %1, %0" +) + +;; Inclusive OR, 32-bit integers +(define_insn "iorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "0"))) + (clobber (reg:CC 16))] + "" + "or %1, %0" +) + +;; Exclusive OR, 32-bit integers +(define_insn "xorsi3" + [(set (match_operand:SI 0 "register_operand" "=r") + (xor:SI (match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" "0"))) + (clobber (reg:CC 16))] + "" + "eor %1, %0" +) + +;; One's complement, 32-bit integers +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand" "") + (not:SI (match_operand:SI 1 "register_operand" "")))] + "" + "{ + if (rtx_equal_p (operands[0], operands[1])) + { + if (reload_in_progress || reload_completed) + { + rtx reg = gen_rtx_REG (SImode, 0/*COMPILER_SCRATCH_REGISTER*/); + + emit_insn (gen_movsi (reg, constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], operands[0], reg)); + } + else + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (reg, constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], operands[0], reg)); + } + } + else + { + emit_insn (gen_movsi_internal (operands[0], constm1_rtx)); + emit_insn (gen_xorsi3 (operands[0], operands[1], operands[0])); + } + DONE; + }" +) + +;;}}} +;;{{{ Comparisons + +;; The actual comparisons, generated by the cbranch and/or cstore expanders + +(define_insn "*cmpsi_internal" + [(set (reg:CC 16) + (compare:CC (match_operand:SI 0 "register_operand" "r,r,r") + (match_operand:SI 1 "nonmemory_operand" "r,I,J")))] + "" + "@ + cmp %1, %0 + cmp %1, %0 + cmp2 %1, %0" +) + +;;}}} +;;{{{ Branches + +;; Define_expands called by the machine independent part of the compiler +;; to allocate a new comparison register + +(define_expand "cbranchsi4" + [(set (reg:CC 16) + (compare:CC (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (set (pc) + (if_then_else (match_operator 0 "ordered_comparison_operator" + [(reg:CC 16) (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" + "" +) + + +;; Actual branches. We must allow for the (label_ref) and the (pc) to be +;; swapped. If they are swapped, it reverses the sense of the branch. + +;; This pattern matches the (branch-if-true) branches generated above. +;; It generates two different instruction sequences depending upon how +;; far away the destination is. + +;; The calculation for the instruction length is derived as follows: +;; The branch instruction has a 9-bit signed displacement so we have +;; this inequality for the displacement: +;; +;; -256 <= pc < 256 +;; or +;; -256 + 256 <= pc + 256 < 256 + 256 +;; i.e. +;; 0 <= pc + 256 < 512 +;; +;; if we consider the displacement as an unsigned value, then negative +;; displacements become very large positive displacements, and the +;; inequality becomes: +;; +;; pc + 256 < 512 +;; +;; In order to allow for the fact that the real branch instruction works +;; from pc + 2, we increase the offset to 258. +;; +;; Note - we do not have to worry about whether the branch is delayed or +;; not, as branch shortening happens after delay slot reorganization. + +(define_insn "*branch_true" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(reg:CC 16) + (const_int 0)]) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + "* + { + if (get_attr_length (insn) == 2) + return \"b%b0%#\\t%l1\"; + else + { + static char buffer [100]; + const char * tmp_reg; + const char * ldi_insn; + + tmp_reg = reg_names [COMPILER_SCRATCH_REGISTER]; + + ldi_insn = TARGET_SMALL_MODEL ? \"ldi:20\" : \"ldi:32\"; + + /* The code produced here is, for say the EQ case: + + Bne 1f + LDI